mirror of
https://github.com/bootandy/dust.git
synced 2026-01-15 06:13:53 -08:00
The -e and -v flags allow multiple values. Before values after the first were ignored. This change allows the user to specify multiple regexs and have them all applied.
163 lines
5.2 KiB
Rust
163 lines
5.2 KiB
Rust
use platform::get_metadata;
|
|
use std::collections::HashSet;
|
|
use std::path::{Path, PathBuf};
|
|
|
|
use crate::platform;
|
|
use regex::Regex;
|
|
|
|
pub fn simplify_dir_names<P: AsRef<Path>>(filenames: Vec<P>) -> HashSet<PathBuf> {
|
|
let mut top_level_names: HashSet<PathBuf> = HashSet::with_capacity(filenames.len());
|
|
let mut to_remove: Vec<PathBuf> = Vec::with_capacity(filenames.len());
|
|
|
|
for t in filenames {
|
|
let top_level_name = normalize_path(t);
|
|
let mut can_add = true;
|
|
|
|
for tt in top_level_names.iter() {
|
|
if is_a_parent_of(&top_level_name, tt) {
|
|
to_remove.push(tt.to_path_buf());
|
|
} else if is_a_parent_of(tt, &top_level_name) {
|
|
can_add = false;
|
|
}
|
|
}
|
|
to_remove.sort_unstable();
|
|
top_level_names.retain(|tr| to_remove.binary_search(tr).is_err());
|
|
to_remove.clear();
|
|
if can_add {
|
|
top_level_names.insert(top_level_name);
|
|
}
|
|
}
|
|
|
|
top_level_names
|
|
}
|
|
|
|
pub fn get_filesystem_devices<'a, P: IntoIterator<Item = &'a PathBuf>>(paths: P) -> HashSet<u64> {
|
|
// Gets the device ids for the filesystems which are used by the argument paths
|
|
paths
|
|
.into_iter()
|
|
.filter_map(|p| {
|
|
let meta = get_metadata(p, false);
|
|
|
|
if let Some((_size, Some((_id, dev)))) = meta {
|
|
Some(dev)
|
|
} else {
|
|
None
|
|
}
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
|
|
// normalize path ...
|
|
// 1. removing repeated separators
|
|
// 2. removing interior '.' ("current directory") path segments
|
|
// 3. removing trailing extra separators and '.' ("current directory") path segments
|
|
// * `Path.components()` does all the above work; ref: <https://doc.rust-lang.org/std/path/struct.Path.html#method.components>
|
|
// 4. changing to os preferred separator (automatically done by recollecting components back into a PathBuf)
|
|
path.as_ref().components().collect::<PathBuf>()
|
|
}
|
|
|
|
pub fn is_filtered_out_due_to_regex(filter_regex: &[Regex], dir: &Path) -> bool {
|
|
if filter_regex.is_empty() {
|
|
false
|
|
} else {
|
|
filter_regex
|
|
.iter()
|
|
.all(|f| !f.is_match(&dir.as_os_str().to_string_lossy()))
|
|
}
|
|
}
|
|
|
|
pub fn is_filtered_out_due_to_invert_regex(filter_regex: &[Regex], dir: &Path) -> bool {
|
|
filter_regex
|
|
.iter()
|
|
.any(|f| f.is_match(&dir.as_os_str().to_string_lossy()))
|
|
}
|
|
|
|
fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool {
|
|
let parent = parent.as_ref();
|
|
let child = child.as_ref();
|
|
child.starts_with(parent) && !parent.starts_with(child)
|
|
}
|
|
|
|
mod tests {
|
|
#[allow(unused_imports)]
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_simplify_dir() {
|
|
let mut correct = HashSet::new();
|
|
correct.insert(PathBuf::from("a"));
|
|
assert_eq!(simplify_dir_names(vec!["a"]), correct);
|
|
}
|
|
|
|
#[test]
|
|
fn test_simplify_dir_rm_subdir() {
|
|
let mut correct = HashSet::new();
|
|
correct.insert(["a", "b"].iter().collect::<PathBuf>());
|
|
assert_eq!(simplify_dir_names(vec!["a/b", "a/b/c", "a/b/d/f"]), correct);
|
|
}
|
|
|
|
#[test]
|
|
fn test_simplify_dir_duplicates() {
|
|
let mut correct = HashSet::new();
|
|
correct.insert(["a", "b"].iter().collect::<PathBuf>());
|
|
correct.insert(PathBuf::from("c"));
|
|
assert_eq!(
|
|
simplify_dir_names(vec![
|
|
"a/b",
|
|
"a/b//",
|
|
"a/././b///",
|
|
"c",
|
|
"c/",
|
|
"c/.",
|
|
"c/././",
|
|
"c/././."
|
|
]),
|
|
correct
|
|
);
|
|
}
|
|
#[test]
|
|
fn test_simplify_dir_rm_subdir_and_not_substrings() {
|
|
let mut correct = HashSet::new();
|
|
correct.insert(PathBuf::from("b"));
|
|
correct.insert(["c", "a", "b"].iter().collect::<PathBuf>());
|
|
correct.insert(["a", "b"].iter().collect::<PathBuf>());
|
|
assert_eq!(simplify_dir_names(vec!["a/b", "c/a/b/", "b"]), correct);
|
|
}
|
|
|
|
#[test]
|
|
fn test_simplify_dir_dots() {
|
|
let mut correct = HashSet::new();
|
|
correct.insert(PathBuf::from("src"));
|
|
assert_eq!(simplify_dir_names(vec!["src/."]), correct);
|
|
}
|
|
|
|
#[test]
|
|
fn test_simplify_dir_substring_names() {
|
|
let mut correct = HashSet::new();
|
|
correct.insert(PathBuf::from("src"));
|
|
correct.insert(PathBuf::from("src_v2"));
|
|
assert_eq!(simplify_dir_names(vec!["src/", "src_v2"]), correct);
|
|
}
|
|
|
|
#[test]
|
|
fn test_is_a_parent_of() {
|
|
assert!(is_a_parent_of("/usr", "/usr/andy"));
|
|
assert!(is_a_parent_of("/usr", "/usr/andy/i/am/descendant"));
|
|
assert!(!is_a_parent_of("/usr", "/usr/."));
|
|
assert!(!is_a_parent_of("/usr", "/usr/"));
|
|
assert!(!is_a_parent_of("/usr", "/usr"));
|
|
assert!(!is_a_parent_of("/usr/", "/usr"));
|
|
assert!(!is_a_parent_of("/usr/andy", "/usr"));
|
|
assert!(!is_a_parent_of("/usr/andy", "/usr/sibling"));
|
|
assert!(!is_a_parent_of("/usr/folder", "/usr/folder_not_a_child"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_is_a_parent_of_root() {
|
|
assert!(is_a_parent_of("/", "/usr/andy"));
|
|
assert!(is_a_parent_of("/", "/usr"));
|
|
assert!(!is_a_parent_of("/", "/"));
|
|
}
|
|
}
|