Implemented a progress indicator (#275)

* v1.0

* renamed operations to be more clear

* put info later because there is still operations par of the preparing process

* updated the last line clearing

* changed name of module and structs to ones that make more sens

* Disable size computation when file_count option is set

* added sleep during the thread waiting

* use 1024 powered instead of 10 to compute showed number

* include DS_Store

* added files directories skipped information

* small format update

* implement the -H option

* put wait back

* remove PAtomicInfo since it's not used

* cargo fmt

* wrapped atomic operations to reduce overhead

* updated comments

* Use AtomicU64Wrapper instead of AtomicU64 in TotalSize

* update size suffix

* sto dividing size when larger than terabytes

* Fix use_iso flag not be set properly

* update properties display

* some reformating

* use stdout instead of print

* Moved config instance into main because it's easier to read

* merge base formatting into macro

* update name to be more intuitive and separated math operations for more flexibility

* print currently indexed path

* cargo fmt

* reset size between each target dirs

* Access to TotalSize rather than it's inner

* small comment change

* Update sysinfo version to 0.26.7

* fix: update use of sysinfo.system

System is now much quicker to start but requires an explicit call
to refresh memory else it deafults to 0 (oops)

* clippy: Fix new clippy

* fix: bug where hard links could be double counted

When running:
dust dir_a dir_b

if a file was hard linked in both dir_a and dir_b it would be double
counted.

This fix resolves this by keeping the shared hashmap around between runs
for the second and subsequent arguments.
https://github.com/bootandy/dust/issues/282

* Fix: depth=0 bug for multiple arguments

https://github.com/bootandy/dust/issues/282

* refactor filter.rs

* refactor filter.rs

* refactor create AggregateData for filter.rs

* feature: Support for dereference links -L follow

du has -L flag which allows it to dereference or follow
symlinks. Clone this feature into dust.
https://github.com/bootandy/dust/issues/276

* refactor dir_walker

I find this layout cleaner

* v1.0

* changed name of module and structs to ones that make more sens

* Disable size computation when file_count option is set

* added files directories skipped information

* implement the -H option

* wrapped atomic operations to reduce overhead

* used human_readable_number function in display module rather than our own

* implemented progress disabling

* cargo fmt & cargo clippy

Co-authored-by: Guillaume Gomez <guillaume1.gomez@gmail.com>
Co-authored-by: andy.boot <bootandy@gmail.com>
This commit is contained in:
NovaliX
2023-01-14 11:43:22 +00:00
committed by GitHub
parent ea3cc537ea
commit f3c074759d
13 changed files with 442 additions and 14 deletions

5
.gitignore vendored
View File

@@ -6,4 +6,7 @@
**/*.rs.bk
*.swp
.vscode/*
*.idea/*
*.idea/*
#ignore macos files
.DS_Store

View File

@@ -56,6 +56,8 @@ _dust() {
'(-d --depth)--file_types[show only these file types]' \
'-H[print sizes in powers of 1000 (e.g., 1.1G)]' \
'--si[print sizes in powers of 1000 (e.g., 1.1G)]' \
'-P[Disable the progress indication.]' \
'--no-progress[Disable the progress indication.]' \
'-D[Only directories will be displayed.]' \
'--only-dir[Only directories will be displayed.]' \
'*::inputs:' \

View File

@@ -62,6 +62,8 @@ Register-ArgumentCompleter -Native -CommandName 'dust' -ScriptBlock {
[CompletionResult]::new('--file_types', 'file_types', [CompletionResultType]::ParameterName, 'show only these file types')
[CompletionResult]::new('-H', 'H', [CompletionResultType]::ParameterName, 'print sizes in powers of 1000 (e.g., 1.1G)')
[CompletionResult]::new('--si', 'si', [CompletionResultType]::ParameterName, 'print sizes in powers of 1000 (e.g., 1.1G)')
[CompletionResult]::new('-P', 'P', [CompletionResultType]::ParameterName, 'Disable the progress indication.')
[CompletionResult]::new('--no-progress', 'no-progress', [CompletionResultType]::ParameterName, 'Disable the progress indication.')
[CompletionResult]::new('-D', 'D', [CompletionResultType]::ParameterName, 'Only directories will be displayed.')
[CompletionResult]::new('--only-dir', 'only-dir', [CompletionResultType]::ParameterName, 'Only directories will be displayed.')
break

View File

@@ -19,7 +19,8 @@ _dust() {
case "${cmd}" in
dust)
opts="-h -V -d -n -p -X -L -x -s -r -c -b -z -f -i -v -e -t -w -H -D --help --version --depth --number-of-lines --full-paths --ignore-directory --dereference-links --limit-filesystem --apparent-size --reverse --no-colors --no-percent-bars --min-size --skip-total --filecount --ignore_hidden --invert-filter --filter --file_types --terminal_width --si --only-dir <inputs>..."
opts="-h -V -d -n -p -X -L -x -s -r -c -b -z -f -i -v -e -t -w -H -P -D --help --version --depth --number-of-lines --full-paths --ignore-directory --dereference-links --limit-filesystem --apparent-size --reverse --no-colors --no-percent-bars --min-size --skip-total --filecount --ignore_hidden --invert-filter --filter --file_types --terminal_width --si --no-progress --only-dir <inputs>..."
if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") )
return 0

View File

@@ -59,6 +59,8 @@ set edit:completion:arg-completer[dust] = {|@words|
cand --file_types 'show only these file types'
cand -H 'print sizes in powers of 1000 (e.g., 1.1G)'
cand --si 'print sizes in powers of 1000 (e.g., 1.1G)'
cand -P 'Disable the progress indication.'
cand --no-progress 'Disable the progress indication.'
cand -D 'Only directories will be displayed.'
cand --only-dir 'Only directories will be displayed.'
}

View File

@@ -19,4 +19,5 @@ complete -c dust -s f -l filecount -d 'Directory \'size\' is number of child fil
complete -c dust -s i -l ignore_hidden -d 'Do not display hidden files'
complete -c dust -s t -l file_types -d 'show only these file types'
complete -c dust -s H -l si -d 'print sizes in powers of 1000 (e.g., 1.1G)'
complete -c dust -s P -l no-progress -d 'Disable the progress indication.'
complete -c dust -s D -l only-dir -d 'Only directories will be displayed.'

View File

@@ -140,6 +140,12 @@ pub fn build_cli() -> Command<'static> {
.long("si")
.help("print sizes in powers of 1000 (e.g., 1.1G)")
)
.arg(
Arg::new("disable_progress")
.short('P')
.long("no-progress")
.help("Disable the progress indication."),
)
.arg(Arg::new("inputs").multiple_occurrences(true))
.arg(
Arg::new("only_dir")

View File

@@ -20,12 +20,16 @@ pub struct Config {
pub iso: Option<bool>,
pub min_size: Option<String>,
pub only_dir: Option<bool>,
pub disable_progress: Option<bool>,
}
impl Config {
pub fn get_no_colors(&self, options: &ArgMatches) -> bool {
Some(true) == self.no_colors || options.is_present("no_colors")
}
pub fn get_disable_progress(&self, options: &ArgMatches) -> bool {
Some(true) == self.disable_progress || options.is_present("disable_progress")
}
pub fn get_apparent_size(&self, options: &ArgMatches) -> bool {
Some(true) == self.display_apparent_size || options.is_present("display_apparent_size")
}

View File

@@ -1,6 +1,12 @@
use std::fs;
use std::sync::Arc;
use crate::node::Node;
use crate::progress;
use crate::progress::PAtomicInfo;
use crate::progress::PConfig;
use crate::progress::ThreadSyncMathTrait;
use crate::progress::ThreadSyncTrait;
use crate::utils::is_filtered_out_due_to_invert_regex;
use crate::utils::is_filtered_out_due_to_regex;
use rayon::iter::ParallelBridge;
@@ -17,7 +23,6 @@ use crate::node::build_node;
use std::fs::DirEntry;
use crate::platform::get_metadata;
pub struct WalkData<'a> {
pub ignore_directories: HashSet<PathBuf>,
pub filter_regex: &'a [Regex],
@@ -27,6 +32,8 @@ pub struct WalkData<'a> {
pub by_filecount: bool,
pub ignore_hidden: bool,
pub follow_links: bool,
pub progress_config: Option<&'a Arc<PConfig>>,
pub progress_data: Option<&'a Arc<PAtomicInfo>>,
}
pub fn walk_it(dirs: HashSet<PathBuf>, walk_data: WalkData) -> (Vec<Node>, bool) {
@@ -39,6 +46,7 @@ pub fn walk_it(dirs: HashSet<PathBuf>, walk_data: WalkData) -> (Vec<Node>, bool)
clean_inodes(
walk(d, &permissions_flag, &walk_data, 0)?,
&mut inodes,
walk_data.progress_data,
walk_data.use_apparent_size,
)
})
@@ -50,8 +58,13 @@ pub fn walk_it(dirs: HashSet<PathBuf>, walk_data: WalkData) -> (Vec<Node>, bool)
fn clean_inodes(
x: Node,
inodes: &mut HashSet<(u64, u64)>,
info_data: Option<&Arc<PAtomicInfo>>,
use_apparent_size: bool,
) -> Option<Node> {
if let Some(data) = info_data {
data.state.set(progress::Operation::PREPARING);
}
if !use_apparent_size {
if let Some(id) = x.inode_device {
if !inodes.insert(id) {
@@ -65,7 +78,7 @@ fn clean_inodes(
tmp.sort_by(sort_by_inode);
let new_children: Vec<_> = tmp
.into_iter()
.filter_map(|c| clean_inodes(c, inodes, use_apparent_size))
.filter_map(|c| clean_inodes(c, inodes, info_data, use_apparent_size))
.collect();
Some(Node {
@@ -129,6 +142,22 @@ fn walk(
walk_data: &WalkData,
depth: usize,
) -> Option<Node> {
let info_data = &walk_data.progress_data;
let info_conf = &walk_data.progress_config;
if let Some(data) = info_data {
data.state.set(progress::Operation::INDEXING);
if depth == 0 {
data.current_path.set(dir.to_string_lossy().to_string());
// reset the value between each target dirs
data.files_skipped.set(0);
data.directories_skipped.set(0);
data.total_file_size.set(0);
data.file_number.set(0);
}
}
let mut children = vec![];
if let Ok(entries) = fs::read_dir(&dir) {
@@ -148,7 +177,8 @@ fn walk(
if data.is_dir() || (walk_data.follow_links && data.is_symlink()) {
return walk(entry.path(), permissions_flag, walk_data, depth + 1);
}
return build_node(
let n = build_node(
entry.path(),
vec![],
walk_data.filter_regex,
@@ -159,18 +189,58 @@ fn walk(
walk_data.by_filecount,
depth,
);
if !ignore_file(entry, walk_data) {
if let Some(ref node) = n {
if let Some(data) = info_data {
data.file_number.add(1);
}
// Use `is_some_and` when stabilized
if let Some(conf) = info_conf {
if !conf.file_count_only {
if let Some(data) = info_data {
data.total_file_size.add(node.size);
}
}
}
}
} else if let Some(data) = info_data {
data.files_skipped.add(1);
}
n
} else {
None
}
} else {
if let Some(data) = info_data {
data.files_skipped.add(1);
}
None
}
} else {
permissions_flag.store(true, atomic::Ordering::Relaxed);
if let Some(data) = info_data {
data.directories_skipped.add(1);
}
None
}
None
})
.collect();
} else {
// Handle edge case where dust is called with a file instead of a directory
if !dir.exists() {
permissions_flag.store(true, atomic::Ordering::Relaxed);
if let Some(data) = info_data {
data.files_skipped.add(1);
}
} else if let Some(data) = info_data {
data.directories_skipped.add(1);
}
}
build_node(
@@ -208,10 +278,13 @@ mod tests {
let n = create_node();
// First time we insert the node
assert_eq!(clean_inodes(n.clone(), &mut inodes, false), Some(n.clone()));
assert_eq!(
clean_inodes(n.clone(), &mut inodes, None, false),
Some(n.clone())
);
// Second time is a duplicate - we ignore it
assert_eq!(clean_inodes(n.clone(), &mut inodes, false), None);
assert_eq!(clean_inodes(n.clone(), &mut inodes, None, false), None);
}
#[test]
@@ -221,7 +294,13 @@ mod tests {
let n = create_node();
// If using apparent size we include Nodes, even if duplicate inodes
assert_eq!(clean_inodes(n.clone(), &mut inodes, true), Some(n.clone()));
assert_eq!(clean_inodes(n.clone(), &mut inodes, true), Some(n.clone()));
assert_eq!(
clean_inodes(n.clone(), &mut inodes, None, true),
Some(n.clone())
);
assert_eq!(
clean_inodes(n.clone(), &mut inodes, None, true),
Some(n.clone())
);
}
}

View File

@@ -377,7 +377,7 @@ fn get_pretty_name(
}
}
fn human_readable_number(size: u64, iso: bool) -> String {
pub fn human_readable_number(size: u64, iso: bool) -> String {
for (i, u) in UNITS.iter().enumerate() {
let num: u64 = if iso { 1000 } else { 1024 };
let marker = num.pow((UNITS.len() - i) as u32);

View File

@@ -7,11 +7,14 @@ mod filter;
mod filter_type;
mod node;
mod platform;
mod progress;
mod utils;
use crate::cli::build_cli;
use dir_walker::WalkData;
use filter::AggregateData;
use progress::PConfig;
use progress::PIndicator;
use std::collections::HashSet;
use std::io::BufRead;
use std::process;
@@ -165,6 +168,31 @@ fn main() {
.flat_map(|x| simplified_dirs.iter().map(move |d| d.join(&x)))
.collect();
let iso = config.get_iso(&options);
let ignore_hidden = config.get_ignore_hidden(&options);
let disable_progress = config.get_disable_progress(&options);
let info_opt = if disable_progress {
None
} else {
let conf = PConfig {
file_count_only: by_filecount,
use_iso: config.get_iso(&options),
ignore_hidden,
};
let info = PIndicator::spawn(conf);
Some(info)
};
let (info_conf, info_data) = if let Some(ref info) = info_opt {
(Some(&info.config), Some(&info.data))
} else {
(None, None)
};
let walk_data = WalkData {
ignore_directories: ignored_full_path,
filter_regex: &filter_regexs,
@@ -172,13 +200,16 @@ fn main() {
allowed_filesystems,
use_apparent_size: config.get_apparent_size(&options),
by_filecount,
ignore_hidden: config.get_ignore_hidden(&options),
ignore_hidden,
follow_links,
progress_config: info_conf,
progress_data: info_data,
};
let _rayon = init_rayon();
let iso = config.get_iso(&options);
let (top_level_nodes, has_errors) = walk_it(simplified_dirs, walk_data);
let tree = match summarize_file_types {
true => get_all_file_types(&top_level_nodes, number_of_lines),
false => {
@@ -194,9 +225,14 @@ fn main() {
}
};
if let Some(info) = info_opt {
info.stop();
}
if has_errors {
eprintln!("Did not have permissions for all directories");
}
if let Some(root_node) = tree {
draw_it(
config.get_full_paths(&options),

292
src/progress.rs Normal file
View File

@@ -0,0 +1,292 @@
use std::{
fmt::Display,
io::Write,
sync::{
atomic::{AtomicBool, AtomicU64, AtomicU8, Ordering},
Arc, RwLock,
},
thread::JoinHandle,
time::{Duration, Instant},
};
use crate::display;
/* -------------------------------------------------------------------------- */
pub const ATOMIC_ORDERING: Ordering = Ordering::Relaxed;
// small wrappers for atomic number to reduce overhead
pub trait ThreadSyncTrait<T> {
fn set(&self, val: T);
fn get(&self) -> T;
}
pub trait ThreadSyncMathTrait<T> {
fn add(&self, val: T);
}
macro_rules! create_atomic_wrapper {
($ident: ident, $atomic_type: ty, $type: ty, $ordering: ident) => {
#[derive(Default)]
pub struct $ident {
inner: $atomic_type,
}
impl ThreadSyncTrait<$type> for $ident {
fn set(&self, val: $type) {
self.inner.store(val, $ordering)
}
fn get(&self) -> $type {
self.inner.load($ordering)
}
}
};
($ident: ident, $atomic_type: ty, $type: ty, $ordering: ident + add) => {
create_atomic_wrapper!($ident, $atomic_type, $type, $ordering);
impl ThreadSyncMathTrait<$type> for $ident {
fn add(&self, val: $type) {
self.inner.fetch_add(val, $ordering);
}
}
};
}
create_atomic_wrapper!(AtomicU64Wrapper, AtomicU64, u64, ATOMIC_ORDERING + add);
create_atomic_wrapper!(AtomicU8Wrapper, AtomicU8, u8, ATOMIC_ORDERING + add);
#[derive(Default)]
pub struct ThreadStringWrapper {
inner: RwLock<String>,
}
impl ThreadSyncTrait<String> for ThreadStringWrapper {
fn set(&self, val: String) {
*self.inner.write().unwrap() = val;
}
fn get(&self) -> String {
(*self.inner.read().unwrap()).clone()
}
}
/* -------------------------------------------------------------------------- */
// creating an enum this way allows to have simpler syntax compared to a Mutex or a RwLock
#[allow(non_snake_case)]
pub mod Operation {
pub const INDEXING: u8 = 0;
pub const PREPARING: u8 = 1;
}
#[derive(Default)]
pub struct PAtomicInfo {
pub file_number: AtomicU64Wrapper,
pub files_skipped: AtomicU64Wrapper,
pub directories_skipped: AtomicU64Wrapper,
pub total_file_size: TotalSize,
pub state: AtomicU8Wrapper,
pub current_path: ThreadStringWrapper,
}
impl PAtomicInfo {
fn new(c: &PConfig) -> Self {
Self {
total_file_size: TotalSize::new(c),
..Default::default()
}
}
}
/* -------------------------------------------------------------------------- */
#[derive(Default)]
pub struct TotalSize {
use_iso: bool,
inner: AtomicU64Wrapper,
}
impl TotalSize {
fn new(c: &PConfig) -> Self {
Self {
use_iso: c.use_iso,
..Default::default()
}
}
}
impl Display for TotalSize {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&display::human_readable_number(
self.inner.get(),
self.use_iso,
))
}
}
impl ThreadSyncTrait<u64> for TotalSize {
fn set(&self, val: u64) {
self.inner.set(val)
}
fn get(&self) -> u64 {
self.inner.get()
}
}
impl ThreadSyncMathTrait<u64> for TotalSize {
fn add(&self, val: u64) {
self.inner.add(val)
}
}
/* -------------------------------------------------------------------------- */
#[derive(Default)]
pub struct PConfig {
pub file_count_only: bool,
pub ignore_hidden: bool,
pub use_iso: bool,
}
pub struct PIndicator {
thread_run: Arc<AtomicBool>,
thread: JoinHandle<()>,
pub data: Arc<PAtomicInfo>,
pub config: Arc<PConfig>,
}
impl PIndicator {
pub fn spawn(config: PConfig) -> Self {
macro_rules! init_shared_data {
(let $ident: ident, $ident2: ident = $value: expr) => {
let $ident = Arc::new($value);
let $ident2 = $ident.clone();
};
}
init_shared_data!(let instant, instant2 = Instant::now());
init_shared_data!(let time_thread_run, time_thread_run2 = AtomicBool::new(true));
init_shared_data!(let config, config2 = config);
init_shared_data!(let data, data2 = PAtomicInfo::new(&config));
let time_info_thread = std::thread::spawn(move || {
const SHOW_WALKING_AFTER: u64 = 0;
const PROGRESS_CHARS_DELTA: u64 = 100;
const PROGRESS_CHARS: [char; 4] = ['-', '\\', '|', '/'];
const PROGRESS_CHARS_LEN: usize = PROGRESS_CHARS.len();
let mut progress_char_i: usize = 0;
let mut stdout = std::io::stdout();
let mut last_msg_len = 0;
while time_thread_run2.load(ATOMIC_ORDERING) {
if instant2.elapsed() > Duration::from_secs(SHOW_WALKING_AFTER) {
// print!("{:?}", *state2.read().unwrap());
// clear the line
print!("\r{:width$}", " ", width = last_msg_len);
macro_rules! format_base {
($state: expr) => {
format!(
"\r{} \"{}\"... {}",
$state,
data2.current_path.get(),
PROGRESS_CHARS[progress_char_i],
)
};
}
let msg = match data2.state.get() {
Operation::INDEXING => {
const PROPS_SEPARATOR: &str = ", ";
let base = format_base!("Indexing");
macro_rules! format_property {
($value: ident, $singular: expr, $plural: expr) => {
format!(
"{} {}",
$value,
if $value > 1 { $plural } else { $singular }
)
};
}
let mut main_props = Vec::new();
let fn_ = data2.file_number.get();
if config2.file_count_only {
main_props.push(format_property!(fn_, "file", "files"));
} else {
main_props.push(format!("{}", data2.total_file_size));
main_props.push(format_property!(fn_, "file", "files"));
};
let main_props_str = main_props.join(PROPS_SEPARATOR);
let base = format!("{} - {}", base, main_props_str);
let ds = data2.directories_skipped.get();
let fs = data2.files_skipped.get();
if ds + fs != 0 {
let mut strs = Vec::new();
if fs != 0 {
strs.push(format_property!(fs, "file", "files"))
}
if ds != 0 {
strs.push(format_property!(ds, "directory", "directories"))
}
format!("{} ({} skipped)", base, strs.join(", "))
} else {
base
}
}
Operation::PREPARING => {
format_base!("Preparing")
}
_ => panic!("Unknown State"),
};
last_msg_len = msg.len();
write!(stdout, "{}", msg).unwrap();
stdout.flush().unwrap();
progress_char_i += 1;
progress_char_i %= PROGRESS_CHARS_LEN;
std::thread::sleep(Duration::from_millis(PROGRESS_CHARS_DELTA));
} else {
// wait duration is in seconds so we need only to check each second
std::thread::sleep(Duration::from_secs(1));
}
}
// clear the line for the last time
print!("\r{:width$}", " ", width = last_msg_len);
// Return at the start of the line so the output can be printed correctly
print!("\r");
stdout.flush().unwrap();
});
Self {
thread_run: time_thread_run,
thread: time_info_thread,
data,
config,
}
}
pub fn stop(self) {
self.thread_run.store(false, ATOMIC_ORDERING);
self.thread.join().unwrap();
}
}

View File

@@ -26,7 +26,7 @@ fn link_it(link_path: PathBuf, file_path_s: &str, is_soft: bool) -> String {
c.arg(file_path_s);
c.arg(link_name_s);
assert!(c.output().is_ok());
return link_name_s.into();
link_name_s.into()
}
#[cfg_attr(target_os = "windows", ignore)]