Feature: Adding file types filter & F flag changed

-t = Show summary of types

-e = Filter by regex
	allows you to specify a file type like -e "\.txt$"

Change behaviour of '-f' flag - it now counts only files. Before it
counted files & directories. This was needed for compatibility with
the new '-e' filter flag
This commit is contained in:
andy.boot
2021-08-12 12:21:35 +01:00
parent d8a334df3b
commit 124c19b5c9
10 changed files with 221 additions and 45 deletions

33
Cargo.lock generated
View File

@@ -31,9 +31,9 @@ dependencies = [
[[package]]
name = "assert_cmd"
version = "1.0.7"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d20831bd004dda4c7c372c19cdabff369f794a95e955b3f13fe460e3e1ae95f"
checksum = "c98233c6673d8601ab23e77eb38f999c51100d46c5703b17288c57fddf3a1ffe"
dependencies = [
"bstr",
"doc-comment",
@@ -62,9 +62,9 @@ checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "bitflags"
version = "1.2.1"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bstr"
@@ -111,9 +111,9 @@ dependencies = [
[[package]]
name = "crossbeam-deque"
version = "0.8.0"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9"
checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e"
dependencies = [
"cfg-if",
"crossbeam-epoch",
@@ -164,6 +164,7 @@ dependencies = [
"clap",
"lscolors",
"rayon",
"regex",
"stfu8",
"tempfile",
"terminal_size",
@@ -215,9 +216,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.98"
version = "0.2.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790"
checksum = "3cb00336871be5ed2c8ed44b60ae9959dc5b9f08539422ed43f09e34ecaeba21"
[[package]]
name = "lscolors"
@@ -230,9 +231,9 @@ dependencies = [
[[package]]
name = "memchr"
version = "2.4.0"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
[[package]]
name = "memoffset"
@@ -261,9 +262,9 @@ checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"
[[package]]
name = "predicates"
version = "2.0.0"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6e46ca79eb4e21e2ec14430340c71250ab69332abf85521c95d3a8bc336aa76"
checksum = "c143348f141cc87aab5b950021bac6145d0e5ae754b0591de23244cee42c9308"
dependencies = [
"difflib",
"itertools",
@@ -278,9 +279,9 @@ checksum = "57e35a3326b75e49aa85f5dc6ec15b41108cf5aee58eabb1f274dd18b73c2451"
[[package]]
name = "predicates-tree"
version = "1.0.2"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15f553275e5721409451eb85e15fd9a860a6e5ab4496eb215987502b5f5391f2"
checksum = "d7dd0fd014130206c9352efbdc92be592751b2b9274dff685348341082c6ea3d"
dependencies = [
"predicates-core",
"treeline",
@@ -353,9 +354,9 @@ dependencies = [
[[package]]
name = "redox_syscall"
version = "0.2.9"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee"
checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff"
dependencies = [
"bitflags",
]

View File

@@ -30,6 +30,7 @@ unicode-width = "0.1"
rayon="1"
thousands = "0.2"
stfu8 = "0.2"
regex = "1"
[target.'cfg(windows)'.dependencies]
winapi-util = "0.1"

View File

@@ -57,6 +57,8 @@ Usage: dust -b (do not show percentages or draw ASCII bars)
Usage: dust -i (do not show hidden files)
Usage: dust -c (No colors [monochrome])
Usage: dust -f (Count files instead of diskspace)
Usage: dust -t Group by filetype
Usage: dust -e regex Only include files matching this regex (eg dust -e "\.png$" would match png files)
```

View File

@@ -1,8 +1,10 @@
use std::fs;
use crate::node::Node;
use crate::utils::is_filtered_out_due_to_regex;
use rayon::iter::ParallelBridge;
use rayon::prelude::ParallelIterator;
use regex::Regex;
use std::path::PathBuf;
use std::sync::atomic;
@@ -17,6 +19,7 @@ use crate::platform::get_metadata;
pub struct WalkData {
pub ignore_directories: HashSet<PathBuf>,
pub filter_regex: Option<Regex>,
pub allowed_filesystems: HashSet<u64>,
pub use_apparent_size: bool,
pub by_filecount: bool,
@@ -84,6 +87,15 @@ fn ignore_file(entry: &DirEntry, walk_data: &WalkData) -> bool {
}
}
}
// Keeping `walk_data.filter_regex.is_some()` is important for performance reasons, it stops unnecessary work
if walk_data.filter_regex.is_some()
&& entry.path().is_file()
&& is_filtered_out_due_to_regex(&walk_data.filter_regex, &entry.path())
{
return true;
}
(is_dot_file && walk_data.ignore_hidden) || is_ignored_path
}
@@ -110,8 +122,10 @@ fn walk(dir: PathBuf, permissions_flag: &AtomicBool, walk_data: &WalkData) -> Op
return build_node(
entry.path(),
vec![],
&walk_data.filter_regex,
walk_data.use_apparent_size,
data.is_symlink(),
data.is_file(),
walk_data.by_filecount,
);
}
@@ -128,8 +142,10 @@ fn walk(dir: PathBuf, permissions_flag: &AtomicBool, walk_data: &WalkData) -> Op
build_node(
dir,
children,
&walk_data.filter_regex,
walk_data.use_apparent_size,
false,
false,
walk_data.by_filecount,
)
}

View File

@@ -107,7 +107,6 @@ impl DrawData<'_> {
#[allow(clippy::too_many_arguments)]
pub fn draw_it(
permission_error: bool,
use_full_path: bool,
is_reversed: bool,
no_colors: bool,
@@ -116,9 +115,6 @@ pub fn draw_it(
by_filecount: bool,
option_root_node: Option<DisplayNode>,
) {
if permission_error {
eprintln!("Did not have permissions for all directories");
}
if option_root_node.is_none() {
return;
}

View File

@@ -1,6 +1,7 @@
use crate::display_node::DisplayNode;
use crate::node::Node;
use std::collections::BinaryHeap;
use std::collections::HashMap;
use std::collections::HashSet;
use std::path::PathBuf;
@@ -13,7 +14,11 @@ pub fn get_by_depth(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode>
Some(build_by_depth(&root, n - 1))
}
pub fn get_biggest(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode> {
pub fn get_biggest(
top_level_nodes: Vec<Node>,
n: usize,
using_file_type_filter: bool,
) -> Option<DisplayNode> {
if top_level_nodes.is_empty() {
// perhaps change this, bring back Error object?
return None;
@@ -22,18 +27,17 @@ pub fn get_biggest(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode>
let mut heap = BinaryHeap::new();
let number_top_level_nodes = top_level_nodes.len();
let root = get_new_root(top_level_nodes);
root.children.iter().for_each(|c| heap.push(c));
let mut allowed_nodes = HashSet::new();
allowed_nodes.insert(&root.name);
heap = add_children(using_file_type_filter, &root, heap);
for _ in number_top_level_nodes..n {
let line = heap.pop();
match line {
Some(line) => {
line.children.iter().for_each(|c| heap.push(c));
allowed_nodes.insert(&line.name);
heap = add_children(using_file_type_filter, line, heap);
}
None => break,
}
@@ -41,6 +45,72 @@ pub fn get_biggest(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode>
recursive_rebuilder(&allowed_nodes, &root)
}
pub fn get_all_file_types(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode> {
let mut map: HashMap<String, DisplayNode> = HashMap::new();
build_by_all_file_types(top_level_nodes, &mut map);
let mut by_types: Vec<DisplayNode> = map.into_iter().map(|(_k, v)| v).collect();
by_types.sort();
by_types.reverse();
let displayed = if by_types.len() <= n {
by_types
} else {
let (displayed, rest) = by_types.split_at(if n > 1 { n - 1 } else { 1 });
let remaining = DisplayNode {
name: PathBuf::from("(others)"),
size: rest.iter().map(|a| a.size).sum(),
children: vec![],
};
let mut displayed = displayed.to_vec();
displayed.push(remaining);
displayed
};
let result = DisplayNode {
name: PathBuf::from("(total)"),
size: displayed.iter().map(|a| a.size).sum(),
children: displayed,
};
Some(result)
}
fn add_children<'a>(
using_file_type_filter: bool,
line: &'a Node,
mut heap: BinaryHeap<&'a Node>,
) -> BinaryHeap<&'a Node> {
if using_file_type_filter {
line.children.iter().for_each(|c| {
if c.name.is_file() || c.size > 0 {
heap.push(c)
}
});
} else {
line.children.iter().for_each(|c| heap.push(c));
}
heap
}
fn build_by_all_file_types(top_level_nodes: Vec<Node>, counter: &mut HashMap<String, DisplayNode>) {
for node in top_level_nodes {
if node.name.is_file() {
let ext = node.name.extension();
let key: String = match ext {
Some(e) => ".".to_string() + &e.to_string_lossy(),
None => "(no extension)".into(),
};
let mut display_node = counter.entry(key.clone()).or_insert(DisplayNode {
name: PathBuf::from(key),
size: 0,
children: vec![],
});
display_node.size += node.size;
}
build_by_all_file_types(node.children, counter)
}
}
fn build_by_depth(node: &Node, depth: usize) -> DisplayNode {
let new_children = {
if depth == 0 {

View File

@@ -1,15 +1,18 @@
#[macro_use]
extern crate clap;
extern crate rayon;
extern crate regex;
extern crate unicode_width;
use std::collections::HashSet;
use std::process;
use self::display::draw_it;
use clap::{App, AppSettings, Arg};
use dir_walker::walk_it;
use dir_walker::WalkData;
use filter::{get_biggest, get_by_depth};
use filter::{get_all_file_types, get_biggest, get_by_depth};
use regex::Regex;
use std::cmp::max;
use std::path::PathBuf;
use terminal_size::{terminal_size, Height, Width};
@@ -151,6 +154,23 @@ fn main() {
.long("ignore_hidden")
.help("Do not display hidden files"),
)
.arg(
Arg::with_name("filter")
.short("e")
.long("filter")
.takes_value(true)
.number_of_values(1)
.multiple(true)
.conflicts_with("types")
.help("Only include files matching this regex. For png files type: -e \"\\.png$\" "),
)
.arg(
Arg::with_name("types")
.short("t")
.long("file_types")
.conflicts_with("depth")
.help("show only these file types"),
)
.arg(
Arg::with_name("width")
.short("w")
@@ -169,6 +189,20 @@ fn main() {
}
};
let summarize_file_types = options.is_present("types");
let maybe_filter = if options.is_present("filter") {
match Regex::new(options.value_of("filter").unwrap()) {
Ok(r) => Some(r),
Err(e) => {
eprintln!("Ignoring bad value for filter {:?}", e);
process::exit(1);
}
}
} else {
None
};
let number_of_lines = match value_t!(options.value_of("number_of_lines"), usize) {
Ok(v) => v,
Err(_) => {
@@ -217,23 +251,34 @@ fn main() {
let walk_data = WalkData {
ignore_directories: ignored_full_path,
filter_regex: maybe_filter,
allowed_filesystems,
use_apparent_size,
by_filecount,
ignore_hidden,
};
let (nodes, errors) = walk_it(simplified_dirs, walk_data);
let (top_level_nodes, has_errors) = walk_it(simplified_dirs, walk_data);
let tree = {
match depth {
None => get_biggest(nodes, number_of_lines),
Some(depth) => get_by_depth(nodes, depth),
match (depth, summarize_file_types) {
(_, true) => get_all_file_types(top_level_nodes, number_of_lines),
(Some(depth), _) => get_by_depth(top_level_nodes, depth),
(_, _) => get_biggest(
top_level_nodes,
number_of_lines,
options.values_of("filter").is_some(),
),
}
};
if options.is_present("filter") {
println!("Filtering by: {}", options.value_of("filter").unwrap());
}
if has_errors {
eprintln!("Did not have permissions for all directories");
}
draw_it(
errors,
options.is_present("display_full_paths"),
!options.is_present("reverse"),
no_colors,

View File

@@ -1,5 +1,7 @@
use crate::platform::get_metadata;
use crate::utils::is_filtered_out_due_to_regex;
use regex::Regex;
use std::cmp::Ordering;
use std::path::PathBuf;
@@ -14,18 +16,29 @@ pub struct Node {
pub fn build_node(
dir: PathBuf,
children: Vec<Node>,
filter_regex: &Option<Regex>,
use_apparent_size: bool,
is_symlink: bool,
is_file: bool,
by_filecount: bool,
) -> Option<Node> {
match get_metadata(&dir, use_apparent_size) {
Some(data) => {
let (size, inode_device) = if by_filecount {
(1, data.1)
} else if is_symlink && !use_apparent_size {
(0, None)
let inode_device = if is_symlink && !use_apparent_size {
None
} else {
data
data.1
};
let size = if is_filtered_out_due_to_regex(filter_regex, &dir)
|| (is_symlink && !use_apparent_size)
|| by_filecount && !is_file
{
0
} else if by_filecount {
1
} else {
data.0
};
Some(Node {

View File

@@ -3,12 +3,7 @@ use std::collections::HashSet;
use std::path::{Path, PathBuf};
use crate::platform;
fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool {
let parent = parent.as_ref();
let child = child.as_ref();
child.starts_with(parent) && !parent.starts_with(child)
}
use regex::Regex;
pub fn simplify_dir_names<P: AsRef<Path>>(filenames: Vec<P>) -> HashSet<PathBuf> {
let mut top_level_names: HashSet<PathBuf> = HashSet::with_capacity(filenames.len());
@@ -62,6 +57,19 @@ pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
path.as_ref().components().collect::<PathBuf>()
}
pub fn is_filtered_out_due_to_regex(filter_regex: &Option<Regex>, dir: &Path) -> bool {
match filter_regex {
Some(fr) => !fr.is_match(&dir.as_os_str().to_string_lossy()),
None => false,
}
}
fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool {
let parent = parent.as_ref();
let child = child.as_ref();
child.starts_with(parent) && !parent.starts_with(child)
}
mod tests {
#[allow(unused_imports)]
use super::*;

View File

@@ -60,7 +60,7 @@ pub fn test_d_flag_works_and_still_recurses_down() {
// We had a bug where running with '-d 1' would stop at the first directory and the code
// would fail to recurse down
let output = build_command(vec!["-d", "1", "-f", "-c", "tests/test_dir2/"]);
assert!(output.contains("7 ┌─┴ test_dir2"));
assert!(output.contains("4 ┌─┴ test_dir2"));
}
// Check against directories and files whos names are substrings of each other
@@ -97,8 +97,8 @@ pub fn test_number_of_files() {
let output = build_command(vec!["-c", "-f", "tests/test_dir"]);
assert!(output.contains("1 ┌── a_file "));
assert!(output.contains("1 ├── hello_file"));
assert!(output.contains("3 ┌─┴ many"));
assert!(output.contains("4 ┌─┴ test_dir"));
assert!(output.contains("2 ┌─┴ many"));
assert!(output.contains("2 ┌─┴ test_dir"));
}
#[cfg_attr(target_os = "windows", ignore)]
@@ -116,3 +116,27 @@ pub fn test_apparent_size() {
let incorrect_apparent_size = "4.0K ├── hello_file";
assert!(!output.contains(incorrect_apparent_size));
}
#[test]
pub fn test_show_files_by_type() {
// Check we can list files by type
let output = build_command(vec!["-c", "-t", "tests"]);
assert!(output.contains(" .unicode"));
assert!(output.contains(" .japan"));
assert!(output.contains(" .rs"));
assert!(output.contains(" (no extension)"));
assert!(output.contains("┌─┴ (total)"));
}
#[test]
pub fn test_show_files_by_specific_type() {
// Check we can see '.rs' files in the tests directory
let output = build_command(vec!["-c", "-e", "\\.rs$", "tests"]);
assert!(output.contains(" ┌─┴ tests"));
assert!(!output.contains("0B ┌── tests"));
assert!(!output.contains("0B ┌─┴ tests"));
// Check there are no '.bad_type' files in the tests directory
let output = build_command(vec!["-c", "-e", "bad_regex", "tests"]);
assert!(output.contains("0B ┌── tests"));
}