feat: support --files-from/--files0-from

Read NUL- or newline-terminated paths from FILE or stdin.

Wire options through `config` and `main`. Add tests and update the
man page, README, and completion scripts.
This commit is contained in:
Junghyeon Park
2025-08-30 10:38:17 +00:00
committed by andy.boot
parent 14efddfd05
commit 7974e2eaf0
15 changed files with 137 additions and 39 deletions

View File

@@ -102,7 +102,8 @@ Usage: dust -S (Custom Stack size - Use if you see: 'fatal runtime error: stack
Usage: dust --skip-total (No total row will be displayed)
Usage: dust -z 40000/30MB/20kib (Exclude output files/directories below size 40000 bytes / 30MB / 20KiB)
Usage: dust -j (Prints JSON representation of directories, try: dust -j | jq)
Usage: dust --files0-from=FILE (Reads null-terminated file paths from FILE); If FILE is - then read from stdin
Usage: dust --files0-from=FILE (Read NUL-terminated file paths from FILE; if FILE is '-', read from stdin)
Usage: dust --files-from=FILE (Read newline-terminated file paths from FILE; if FILE is '-', read from stdin)
Usage: dust --collapse=node-modules will keep the node-modules folder collapsed in display instead of recursively opening it
```

View File

@@ -62,7 +62,8 @@ tb\:"terabyte (TB)"))' \
'--atime=[just like -mtime, but based on file access time]:ATIME:_default' \
'-y+[just like -mtime, but based on file change time]:CTIME:_default' \
'--ctime=[just like -mtime, but based on file change time]:CTIME:_default' \
'--files0-from=[run dust on NUL-terminated file names specified in file; if argument is -, then read names from standard input]:FILES0_FROM:_files' \
'(--files-from)--files0-from=[Read NUL-terminated paths from FILE (use \`-\` for stdin)]:FILES0_FROM:_files' \
'(--files0-from)--files-from=[Read newline-terminated paths from FILE (use \`-\` for stdin)]:FILES_FROM:_files' \
'*--collapse=[Keep these directories collapsed]:COLLAPSE:_files' \
'-m+[Directory '\''size'\'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time]:FILETIME:((a\:"last accessed time"
c\:"last changed time"

View File

@@ -50,7 +50,8 @@ Register-ArgumentCompleter -Native -CommandName 'dust' -ScriptBlock {
[CompletionResult]::new('--atime', '--atime', [CompletionResultType]::ParameterName, 'just like -mtime, but based on file access time')
[CompletionResult]::new('-y', '-y', [CompletionResultType]::ParameterName, 'just like -mtime, but based on file change time')
[CompletionResult]::new('--ctime', '--ctime', [CompletionResultType]::ParameterName, 'just like -mtime, but based on file change time')
[CompletionResult]::new('--files0-from', '--files0-from', [CompletionResultType]::ParameterName, 'run dust on NUL-terminated file names specified in file; if argument is -, then read names from standard input')
[CompletionResult]::new('--files0-from', '--files0-from', [CompletionResultType]::ParameterName, 'Read NUL-terminated paths from FILE (use `-` for stdin)')
[CompletionResult]::new('--files-from', '--files-from', [CompletionResultType]::ParameterName, 'Read newline-terminated paths from FILE (use `-` for stdin)')
[CompletionResult]::new('--collapse', '--collapse', [CompletionResultType]::ParameterName, 'Keep these directories collapsed')
[CompletionResult]::new('-m', '-m', [CompletionResultType]::ParameterName, 'Directory ''size'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time')
[CompletionResult]::new('--filetime', '--filetime', [CompletionResultType]::ParameterName, 'Directory ''size'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time')

View File

@@ -23,7 +23,7 @@ _dust() {
case "${cmd}" in
dust)
opts="-d -T -n -p -X -I -L -x -s -r -c -C -b -B -z -R -f -i -v -e -t -w -P -D -F -o -S -j -M -A -y -m -h -V --depth --threads --config --number-of-lines --full-paths --ignore-directory --ignore-all-in-file --dereference-links --limit-filesystem --apparent-size --reverse --no-colors --force-colors --no-percent-bars --bars-on-right --min-size --screen-reader --skip-total --filecount --ignore-hidden --invert-filter --filter --file-types --terminal-width --no-progress --print-errors --only-dir --only-file --output-format --stack-size --output-json --mtime --atime --ctime --files0-from --collapse --filetime --help --version [PATH]..."
opts="-d -T -n -p -X -I -L -x -s -r -c -C -b -B -z -R -f -i -v -e -t -w -P -D -F -o -S -j -M -A -y -m -h -V --depth --threads --config --number-of-lines --full-paths --ignore-directory --ignore-all-in-file --dereference-links --limit-filesystem --apparent-size --reverse --no-colors --force-colors --no-percent-bars --bars-on-right --min-size --screen-reader --skip-total --filecount --ignore-hidden --invert-filter --filter --file-types --terminal-width --no-progress --print-errors --only-dir --only-file --output-format --stack-size --output-json --mtime --atime --ctime --files0-from --files-from --collapse --filetime --help --version [PATH]..."
if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") )
return 0
@@ -182,6 +182,10 @@ _dust() {
COMPREPLY=($(compgen -f "${cur}"))
return 0
;;
--files-from)
COMPREPLY=($(compgen -f "${cur}"))
return 0
;;
--collapse)
COMPREPLY=($(compgen -f "${cur}"))
return 0

View File

@@ -47,7 +47,8 @@ set edit:completion:arg-completer[dust] = {|@words|
cand --atime 'just like -mtime, but based on file access time'
cand -y 'just like -mtime, but based on file change time'
cand --ctime 'just like -mtime, but based on file change time'
cand --files0-from 'run dust on NUL-terminated file names specified in file; if argument is -, then read names from standard input'
cand --files0-from 'Read NUL-terminated paths from FILE (use `-` for stdin)'
cand --files-from 'Read newline-terminated paths from FILE (use `-` for stdin)'
cand --collapse 'Keep these directories collapsed'
cand -m 'Directory ''size'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time'
cand --filetime 'Directory ''size'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time'

View File

@@ -22,7 +22,8 @@ complete -c dust -s S -l stack-size -d 'Specify memory to use as stack size - us
complete -c dust -s M -l mtime -d '+/-n matches files modified more/less than n days ago , and n matches files modified exactly n days ago, days are rounded down.That is +n => (−∞, curr(n+1)), n => [curr(n+1), currn), and -n => (𝑐𝑢𝑟𝑟𝑛, +∞)' -r
complete -c dust -s A -l atime -d 'just like -mtime, but based on file access time' -r
complete -c dust -s y -l ctime -d 'just like -mtime, but based on file change time' -r
complete -c dust -l files0-from -d 'run dust on NUL-terminated file names specified in file; if argument is -, then read names from standard input' -r -F
complete -c dust -l files0-from -d 'Read NUL-terminated paths from FILE (use `-` for stdin)' -r -F
complete -c dust -l files-from -d 'Read newline-terminated paths from FILE (use `-` for stdin)' -r -F
complete -c dust -l collapse -d 'Keep these directories collapsed' -r -F
complete -c dust -s m -l filetime -d 'Directory \'size\' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time' -r -f -a "a\t'last accessed time'
c\t'last changed time'

View File

@@ -4,7 +4,7 @@
.SH NAME
Dust \- Like du but more intuitive
.SH SYNOPSIS
\fBdust\fR [\fB\-d\fR|\fB\-\-depth\fR] [\fB\-T\fR|\fB\-\-threads\fR] [\fB\-\-config\fR] [\fB\-n\fR|\fB\-\-number\-of\-lines\fR] [\fB\-p\fR|\fB\-\-full\-paths\fR] [\fB\-X\fR|\fB\-\-ignore\-directory\fR] [\fB\-I\fR|\fB\-\-ignore\-all\-in\-file\fR] [\fB\-L\fR|\fB\-\-dereference\-links\fR] [\fB\-x\fR|\fB\-\-limit\-filesystem\fR] [\fB\-s\fR|\fB\-\-apparent\-size\fR] [\fB\-r\fR|\fB\-\-reverse\fR] [\fB\-c\fR|\fB\-\-no\-colors\fR] [\fB\-C\fR|\fB\-\-force\-colors\fR] [\fB\-b\fR|\fB\-\-no\-percent\-bars\fR] [\fB\-B\fR|\fB\-\-bars\-on\-right\fR] [\fB\-z\fR|\fB\-\-min\-size\fR] [\fB\-R\fR|\fB\-\-screen\-reader\fR] [\fB\-\-skip\-total\fR] [\fB\-f\fR|\fB\-\-filecount\fR] [\fB\-i\fR|\fB\-\-ignore\-hidden\fR] [\fB\-v\fR|\fB\-\-invert\-filter\fR] [\fB\-e\fR|\fB\-\-filter\fR] [\fB\-t\fR|\fB\-\-file\-types\fR] [\fB\-w\fR|\fB\-\-terminal\-width\fR] [\fB\-P\fR|\fB\-\-no\-progress\fR] [\fB\-\-print\-errors\fR] [\fB\-D\fR|\fB\-\-only\-dir\fR] [\fB\-F\fR|\fB\-\-only\-file\fR] [\fB\-o\fR|\fB\-\-output\-format\fR] [\fB\-S\fR|\fB\-\-stack\-size\fR] [\fB\-j\fR|\fB\-\-output\-json\fR] [\fB\-M\fR|\fB\-\-mtime\fR] [\fB\-A\fR|\fB\-\-atime\fR] [\fB\-y\fR|\fB\-\-ctime\fR] [\fB\-\-files0\-from\fR] [\fB\-\-collapse\fR] [\fB\-m\fR|\fB\-\-filetime\fR] [\fB\-h\fR|\fB\-\-help\fR] [\fB\-V\fR|\fB\-\-version\fR] [\fIPATH\fR]
\fBdust\fR [\fB\-d\fR|\fB\-\-depth\fR] [\fB\-T\fR|\fB\-\-threads\fR] [\fB\-\-config\fR] [\fB\-n\fR|\fB\-\-number\-of\-lines\fR] [\fB\-p\fR|\fB\-\-full\-paths\fR] [\fB\-X\fR|\fB\-\-ignore\-directory\fR] [\fB\-I\fR|\fB\-\-ignore\-all\-in\-file\fR] [\fB\-L\fR|\fB\-\-dereference\-links\fR] [\fB\-x\fR|\fB\-\-limit\-filesystem\fR] [\fB\-s\fR|\fB\-\-apparent\-size\fR] [\fB\-r\fR|\fB\-\-reverse\fR] [\fB\-c\fR|\fB\-\-no\-colors\fR] [\fB\-C\fR|\fB\-\-force\-colors\fR] [\fB\-b\fR|\fB\-\-no\-percent\-bars\fR] [\fB\-B\fR|\fB\-\-bars\-on\-right\fR] [\fB\-z\fR|\fB\-\-min\-size\fR] [\fB\-R\fR|\fB\-\-screen\-reader\fR] [\fB\-\-skip\-total\fR] [\fB\-f\fR|\fB\-\-filecount\fR] [\fB\-i\fR|\fB\-\-ignore\-hidden\fR] [\fB\-v\fR|\fB\-\-invert\-filter\fR] [\fB\-e\fR|\fB\-\-filter\fR] [\fB\-t\fR|\fB\-\-file\-types\fR] [\fB\-w\fR|\fB\-\-terminal\-width\fR] [\fB\-P\fR|\fB\-\-no\-progress\fR] [\fB\-\-print\-errors\fR] [\fB\-D\fR|\fB\-\-only\-dir\fR] [\fB\-F\fR|\fB\-\-only\-file\fR] [\fB\-o\fR|\fB\-\-output\-format\fR] [\fB\-S\fR|\fB\-\-stack\-size\fR] [\fB\-j\fR|\fB\-\-output\-json\fR] [\fB\-M\fR|\fB\-\-mtime\fR] [\fB\-A\fR|\fB\-\-atime\fR] [\fB\-y\fR|\fB\-\-ctime\fR] [\fB\-\-files0\-from\fR] [\fB\-\-files\-from\fR] [\fB\-\-collapse\fR] [\fB\-m\fR|\fB\-\-filetime\fR] [\fB\-h\fR|\fB\-\-help\fR] [\fB\-V\fR|\fB\-\-version\fR] [\fIPATH\fR]
.SH DESCRIPTION
Like du but more intuitive
.SH OPTIONS
@@ -138,7 +138,10 @@ just like \-mtime, but based on file access time
just like \-mtime, but based on file change time
.TP
\fB\-\-files0\-from\fR=\fIFILES0_FROM\fR
run dust on NUL\-terminated file names specified in file; if argument is \-, then read names from standard input
Read NUL\-terminated paths from FILE (use `\-` for stdin)
.TP
\fB\-\-files\-from\fR=\fIFILES_FROM\fR
Read newline\-terminated paths from FILE (use `\-` for stdin)
.TP
\fB\-\-collapse\fR=\fICOLLAPSE\fR
Keep these directories collapsed

View File

@@ -172,11 +172,14 @@ pub struct Cli {
#[arg(short('y'), long, allow_hyphen_values(true))]
pub ctime: Option<String>,
/// run dust on NUL-terminated file names specified in file; if argument is
/// -, then read names from standard input
#[arg(long, value_hint(ValueHint::AnyPath))]
/// Read NUL-terminated paths from FILE (use `-` for stdin).
#[arg(long, value_hint(ValueHint::AnyPath), conflicts_with("files_from"))]
pub files0_from: Option<String>,
/// Read newline-terminated paths from FILE (use `-` for stdin).
#[arg(long, value_hint(ValueHint::AnyPath), conflicts_with("files0_from"))]
pub files_from: Option<String>,
/// Keep these directories collapsed
#[arg(long, value_hint(ValueHint::AnyPath))]
pub collapse: Option<Vec<String>>,

View File

@@ -36,16 +36,25 @@ pub struct Config {
pub output_json: Option<bool>,
pub print_errors: Option<bool>,
pub files0_from: Option<String>,
pub files_from: Option<String>,
}
impl Config {
pub fn get_files_from(&self, options: &Cli) -> Option<String> {
pub fn get_files0_from(&self, options: &Cli) -> Option<String> {
let from_file = &options.files0_from;
match from_file {
None => self.files0_from.as_ref().map(|x| x.to_string()),
Some(x) => Some(x.to_string()),
}
}
pub fn get_files_from(&self, options: &Cli) -> Option<String> {
let from_file = &options.files_from;
match from_file {
None => self.files_from.as_ref().map(|x| x.to_string()),
Some(x) => Some(x.to_string()),
}
}
pub fn get_no_colors(&self, options: &Cli) -> bool {
Some(true) == self.no_colors || options.no_colors
}

View File

@@ -22,8 +22,9 @@ use progress::PIndicator;
use regex::Error;
use std::collections::HashSet;
use std::env;
use std::fs::read_to_string;
use std::fs::{read, read_to_string};
use std::io;
use std::io::Read;
use std::panic;
use std::process;
use std::sync::Arc;
@@ -127,34 +128,15 @@ fn main() {
})
.expect("Error setting Ctrl-C handler");
let target_dirs = match config.get_files_from(&options) {
Some(path) => {
if path == "-" {
let mut targets_to_add = io::stdin()
.lines()
.map_while(Result::ok)
.collect::<Vec<String>>();
if targets_to_add.is_empty() {
eprintln!("No input provided, defaulting to current directory");
targets_to_add.push(".".to_owned());
}
targets_to_add
} else {
// read file
match read_to_string(path) {
Ok(file_content) => file_content.lines().map(|x| x.to_string()).collect(),
Err(e) => {
eprintln!("Error reading file: {e}");
vec![".".to_owned()]
}
}
}
}
None => match options.params {
let target_dirs = if let Some(path) = config.get_files0_from(&options) {
read_paths_from_source(&path, true)
} else if let Some(path) = config.get_files_from(&options) {
read_paths_from_source(&path, false)
} else {
match options.params {
Some(ref values) => values.clone(),
None => vec![".".to_owned()],
},
}
};
let summarize_file_types = options.file_types;
@@ -398,6 +380,53 @@ fn print_any_errors(print_errors: bool, final_errors: &RuntimeErrors) {
}
}
fn read_paths_from_source(path: &str, null_terminated: bool) -> Vec<String> {
let from_stdin = path == "-";
let result: Result<Vec<String>, Option<String>> = (|| {
// 1) read bytes
let bytes = if from_stdin {
let mut b = Vec::new();
io::stdin().lock().read_to_end(&mut b).map_err(|_| None)?;
b
} else {
read(path).map_err(|e| Some(e.to_string()))?
};
let text = std::str::from_utf8(&bytes).map_err(|e| {
if from_stdin {
None
} else {
Some(e.to_string())
}
})?;
let items: Vec<String> = if null_terminated {
text.split('\0')
.filter(|s| !s.is_empty())
.map(str::to_owned)
.collect()
} else {
text.lines().map(str::to_owned).collect()
};
if from_stdin && items.is_empty() {
return Err(None);
}
Ok(items)
})();
match result {
Ok(v) => v,
Err(None) => {
eprintln!("No files provided, defaulting to current directory");
vec![".".to_owned()]
}
Err(Some(msg)) => {
eprintln!("Failed to read file: {msg}");
vec![".".to_owned()]
}
}
}
fn init_rayon(stack: &Option<usize>, threads: &Option<usize>) -> rayon::ThreadPool {
let stack_size = match stack {
Some(s) => Some(*s),

View File

Binary file not shown.

View File

@@ -0,0 +1,2 @@
tests/test_dir_files_from/a_file
tests/test_dir_files_from/hello_file

View File

@@ -0,0 +1 @@
hello

View File

@@ -104,6 +104,48 @@ pub fn test_ignore_all_in_file() {
assert!(!output.contains(".secret"));
}
#[test]
pub fn test_files_from_flag_file() {
let output = build_command(vec!["--files-from", "tests/test_dir_files_from/files_from.txt"]);
assert!(output.contains("a_file"));
assert!(output.contains("hello_file"));
}
#[test]
pub fn test_files0_from_flag_file() {
let output = build_command(vec!["--files0-from", "tests/test_dir_files_from/files0_from.txt"]);
assert!(output.contains("a_file"));
assert!(output.contains("hello_file"));
}
#[test]
pub fn test_files_from_flag_stdin() {
let mut cmd = Command::cargo_bin("dust").unwrap();
cmd.arg("-P").arg("--files-from").arg("-");
let input = b"tests/test_dir_files_from/a_file\ntests/test_dir_files_from/hello_file\n";
cmd.write_stdin(input.as_ref());
let finished = &cmd.unwrap();
let stderr = std::str::from_utf8(&finished.stderr).unwrap();
assert_eq!(stderr, "");
let output = std::str::from_utf8(&finished.stdout).unwrap();
assert!(output.contains("a_file"));
assert!(output.contains("hello_file"));
}
#[test]
pub fn test_files0_from_flag_stdin() {
let mut cmd = Command::cargo_bin("dust").unwrap();
cmd.arg("-P").arg("--files0-from").arg("-");
let input = b"tests/test_dir_files_from/a_file\0tests/test_dir_files_from/hello_file\0";
cmd.write_stdin(input.as_ref());
let finished = &cmd.unwrap();
let stderr = std::str::from_utf8(&finished.stderr).unwrap();
assert_eq!(stderr, "");
let output = std::str::from_utf8(&finished.stdout).unwrap();
assert!(output.contains("a_file"));
assert!(output.contains("hello_file"));
}
#[test]
pub fn test_with_bad_param() {
let mut cmd = Command::cargo_bin("dust").unwrap();