feat: Handle duplicate dir names better

If we run `dust /usr/*/Trash`
We see several 'Trash' directories in the output but do not know which
user they belong to.

This fix means if we see duplicate names in a directory we will display
the parent directory name as well
This commit is contained in:
andy.boot
2025-02-05 20:28:17 +00:00
parent a962b80eec
commit 759658ee96
6 changed files with 83 additions and 2 deletions

View File

@@ -273,7 +273,7 @@ fn clean_indentation_string(s: &str) -> String {
is
}
fn get_printable_name<P: AsRef<Path>>(dir_name: &P, short_paths: bool) -> String {
pub fn get_printable_name<P: AsRef<Path>>(dir_name: &P, short_paths: bool) -> String {
let dir_name = dir_name.as_ref();
let printable_name = {
if short_paths {

View File

@@ -1,3 +1,6 @@
use stfu8::encode_u8;
use crate::display::get_printable_name;
use crate::display_node::DisplayNode;
use crate::node::FileTime;
use crate::node::Node;
@@ -14,6 +17,7 @@ pub struct AggregateData {
pub number_of_lines: usize,
pub depth: usize,
pub using_a_filter: bool,
pub short_paths: bool,
}
pub fn get_biggest(
@@ -40,13 +44,17 @@ pub fn get_biggest(
} else {
top_level_nodes.iter().map(|node| node.size).sum()
};
let nodes = handle_duplicate_top_level_names(top_level_nodes, display_data.short_paths);
root = Node {
name: PathBuf::from("(total)"),
size,
children: top_level_nodes,
children: nodes,
inode_device: None,
depth: 0,
};
// Always include the base nodes if we add a 'parent' (total) node
heap = always_add_children(&display_data, &root, heap);
} else {
@@ -74,6 +82,8 @@ pub fn fill_remaining_lines<'a>(
let line = heap.pop();
match line {
Some(line) => {
// If we are not doing only_file OR if we are doing
// only_file and it has no children (ie is a file not a dir)
if !display_data.only_file || line.children.is_empty() {
allowed_nodes.insert(line.name.as_path(), line);
}
@@ -161,3 +171,57 @@ fn build_display_node(mut new_children: Vec<DisplayNode>, current: &Node) -> Dis
children: new_children,
}
}
fn names_have_dup(top_level_nodes: &Vec<Node>) -> bool {
let mut stored = HashSet::new();
for node in top_level_nodes {
let name = get_printable_name(&node.name, true);
if stored.contains(&name) {
return true;
}
stored.insert(name);
}
false
}
fn handle_duplicate_top_level_names(top_level_nodes: Vec<Node>, short_paths: bool) -> Vec<Node> {
// If we have top level names that are the same - we need to tweak them:
if short_paths && names_have_dup(&top_level_nodes) {
let mut new_top_nodes = top_level_nodes.clone();
let mut dir_walk_up_count = 0;
while names_have_dup(&new_top_nodes) && dir_walk_up_count < 10 {
dir_walk_up_count += 1;
let mut newer = vec![];
for node in new_top_nodes.iter() {
let mut folders = node.name.iter().rev();
// Get parent folder (if second time round get grandparent and so on)
for _ in 0..dir_walk_up_count {
folders.next();
}
match folders.next() {
// Add (parent_name) to path of Node
Some(data) => {
let parent = encode_u8(data.as_encoded_bytes());
let current_node = node.name.display();
let n = Node {
name: PathBuf::from(format!("{current_node}({parent})")),
size: node.size,
children: node.children.clone(),
inode_device: node.inode_device,
depth: node.depth,
};
newer.push(n)
}
// Node does not have a parent
None => newer.push(node.clone()),
}
}
new_top_nodes = newer;
}
new_top_nodes
} else {
top_level_nodes
}
}

View File

@@ -293,6 +293,7 @@ fn main() {
number_of_lines,
depth,
using_a_filter: !filter_regexs.is_empty() || !invert_filter_regexs.is_empty(),
short_paths: !config.get_full_paths(&options),
};
get_biggest(top_level_nodes, agg_data, &by_filetime, keep_collapsed)
}

View File

@@ -261,3 +261,19 @@ pub fn test_collapse() {
assert!(output.contains("many"));
assert!(!output.contains("hello_file"));
}
#[test]
pub fn test_handle_duplicate_names() {
// Check that even if we run on a multiple directories with the same name
// we still show the distinct parent dir in the output
let output = build_command(vec![
"tests/test_dir_matching/dave/dup_name",
"tests/test_dir_matching/andy/dup_name",
"ci",
]);
assert!(output.contains("andy"));
assert!(output.contains("dave"));
assert!(output.contains("ci"));
assert!(output.contains("dup_name"));
assert!(!output.contains("test_dir_matching"));
}