Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

df: fix rounding behavior in humanreadable mode #3554

Merged
merged 1 commit into from
May 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion src/uu/df/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ path = "src/df.rs"

[dependencies]
clap = { version = "3.1", features = ["wrap_help", "cargo"] }
number_prefix = "0.4"
uucore = { version=">=0.0.11", package="uucore", path="../../uucore", features=["libc", "fsext"] }
unicode-width = "0.1.9"

Expand Down
221 changes: 107 additions & 114 deletions src/uu/df/src/blocks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@ const IEC_BASES: [u128; 10] = [
1_237_940_039_285_380_274_899_124_224,
];

/// Suffixes for the first nine multi-byte unit suffixes.
const SUFFIXES: [char; 9] = ['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'];

/// The first ten powers of 1000.
const SI_BASES: [u128; 10] = [
1,
1_000,
Expand All @@ -42,96 +40,71 @@ const SI_BASES: [u128; 10] = [
1_000_000_000_000_000_000_000_000_000,
];

// we use "kB" instead of "KB" because of GNU df
const SI_SUFFIXES: [&str; 9] = ["B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"];
/// A SuffixType determines whether the suffixes are 1000 or 1024 based, and whether they are
/// intended for HumanReadable mode or not.
#[derive(Clone, Copy)]
pub(crate) enum SuffixType {
Iec,
Si,
HumanReadable(HumanReadable),
}

/// Convert a multiple of 1024 into a string like "12K" or "34M".
///
/// # Examples
///
/// Powers of 1024 become "1K", "1M", "1G", etc.
///
/// ```rust,ignore
/// assert_eq!(to_magnitude_and_suffix_1024(1024).unwrap(), "1K");
/// assert_eq!(to_magnitude_and_suffix_1024(1024 * 1024).unwrap(), "1M");
/// assert_eq!(to_magnitude_and_suffix_1024(1024 * 1024 * 1024).unwrap(), "1G");
/// ```
///
/// Multiples of those powers affect the magnitude part of the
/// returned string:
///
/// ```rust,ignore
/// assert_eq!(to_magnitude_and_suffix_1024(123 * 1024).unwrap(), "123K");
/// assert_eq!(to_magnitude_and_suffix_1024(456 * 1024 * 1024).unwrap(), "456M");
/// assert_eq!(to_magnitude_and_suffix_1024(789 * 1024 * 1024 * 1024).unwrap(), "789G");
/// ```
fn to_magnitude_and_suffix_1024(n: u128) -> Result<String, ()> {
// Find the smallest power of 1024 that is larger than `n`. That
// number indicates which units and suffix to use.
for i in 0..IEC_BASES.len() - 1 {
if n < IEC_BASES[i + 1] {
return Ok(format!("{}{}", n / IEC_BASES[i], SUFFIXES[i]));
impl SuffixType {
/// The first ten powers of 1024 and 1000, respectively.
fn bases(&self) -> [u128; 10] {
match self {
Self::Iec | Self::HumanReadable(HumanReadable::Binary) => IEC_BASES,
Self::Si | Self::HumanReadable(HumanReadable::Decimal) => SI_BASES,
}
}

/// Suffixes for the first nine multi-byte unit suffixes.
fn suffixes(&self) -> [&'static str; 9] {
match self {
// we use "kB" instead of "KB", same as GNU df
Self::Si => ["B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"],
Self::Iec => ["B", "K", "M", "G", "T", "P", "E", "Z", "Y"],
Self::HumanReadable(HumanReadable::Binary) => {
["", "K", "M", "G", "T", "P", "E", "Z", "Y"]
}
Self::HumanReadable(HumanReadable::Decimal) => {
["", "k", "M", "G", "T", "P", "E", "Z", "Y"]
}
}
}
Err(())
}

/// Convert a number into a string like "12kB" or "34MB".
///
/// Powers of 1000 become "1kB", "1MB", "1GB", etc.
/// Convert a number into a magnitude and a multi-byte unit suffix.
///
/// The returned string has a maximum length of 5 chars, for example: "1.1kB", "999kB", "1MB".
fn to_magnitude_and_suffix_not_powers_of_1024(n: u128) -> Result<String, ()> {
pub(crate) fn to_magnitude_and_suffix(n: u128, suffix_type: SuffixType) -> String {
let bases = suffix_type.bases();
let suffixes = suffix_type.suffixes();
let mut i = 0;

while SI_BASES[i + 1] - SI_BASES[i] < n && i < SI_SUFFIXES.len() {
while bases[i + 1] - bases[i] < n && i < suffixes.len() {
i += 1;
}

let quot = n / SI_BASES[i];
let rem = n % SI_BASES[i];
let suffix = SI_SUFFIXES[i];
let quot = n / bases[i];
let rem = n % bases[i];
let suffix = suffixes[i];

if rem == 0 {
Ok(format!("{}{}", quot, suffix))
format!("{}{}", quot, suffix)
} else {
let tenths_place = rem / (SI_BASES[i] / 10);
let tenths_place = rem / (bases[i] / 10);

if rem % (SI_BASES[i] / 10) == 0 {
Ok(format!("{}.{}{}", quot, tenths_place, suffix))
if rem % (bases[i] / 10) == 0 {
format!("{}.{}{}", quot, tenths_place, suffix)
} else if tenths_place + 1 == 10 || quot >= 10 {
Ok(format!("{}{}", quot + 1, suffix))
format!("{}{}", quot + 1, suffix)
} else {
Ok(format!("{}.{}{}", quot, tenths_place + 1, suffix))
format!("{}.{}{}", quot, tenths_place + 1, suffix)
}
}
}

/// Convert a number into a magnitude and a multi-byte unit suffix.
///
/// # Errors
///
/// If the number is too large to represent.
fn to_magnitude_and_suffix(n: u128) -> Result<String, ()> {
if n % 1024 == 0 && n % 1000 != 0 {
to_magnitude_and_suffix_1024(n)
} else {
to_magnitude_and_suffix_not_powers_of_1024(n)
}
}

/// A mode to use in condensing the display of a large number of bytes.
pub(crate) enum SizeFormat {
HumanReadable(HumanReadable),
StaticBlockSize,
}

impl Default for SizeFormat {
fn default() -> Self {
Self::StaticBlockSize
}
}

/// A mode to use in condensing the human readable display of a large number
/// of bytes.
///
Expand Down Expand Up @@ -207,10 +180,15 @@ pub(crate) fn block_size_from_matches(matches: &ArgMatches) -> Result<BlockSize,
impl fmt::Display for BlockSize {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Bytes(n) => match to_magnitude_and_suffix(*n as u128) {
Ok(s) => write!(f, "{}", s),
Err(_) => Err(fmt::Error),
},
Self::Bytes(n) => {
let s = if n % 1024 == 0 && n % 1000 != 0 {
to_magnitude_and_suffix(*n as u128, SuffixType::Iec)
} else {
to_magnitude_and_suffix(*n as u128, SuffixType::Si)
};

write!(f, "{}", s)
}
}
}
}
Expand All @@ -220,56 +198,64 @@ mod tests {

use std::env;

use crate::blocks::{to_magnitude_and_suffix, BlockSize};
use crate::blocks::{to_magnitude_and_suffix, BlockSize, SuffixType};

#[test]
fn test_to_magnitude_and_suffix_powers_of_1024() {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have to admit, it's pretty difficult to follow the logic of HumanReadable, SizeFormat, BlockSize, SuffixType, and more, but if I'm reading this pull request correctly, we are just decoupling the value of the number (1024, 2048, 4096, etc.) from the decision of what suffix to apply (SuffixType::IEC, SuffixType:SI, etc.). So we could now call to_magnitude_and_suffix(1024, SuffixType::SI) if we wanted to. Is that right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's correct.

Your comment made me rethink the enums you mentioned and I realized that SizeFormat is not necessary and I turned it into an Option<HumanReadable> with the latest push. I hope this reduces some of the complexity.

assert_eq!(to_magnitude_and_suffix(1024).unwrap(), "1K");
assert_eq!(to_magnitude_and_suffix(2048).unwrap(), "2K");
assert_eq!(to_magnitude_and_suffix(4096).unwrap(), "4K");
assert_eq!(to_magnitude_and_suffix(1024 * 1024).unwrap(), "1M");
assert_eq!(to_magnitude_and_suffix(2 * 1024 * 1024).unwrap(), "2M");
assert_eq!(to_magnitude_and_suffix(1024 * 1024 * 1024).unwrap(), "1G");
assert_eq!(to_magnitude_and_suffix(1024, SuffixType::Iec), "1K");
assert_eq!(to_magnitude_and_suffix(2048, SuffixType::Iec), "2K");
assert_eq!(to_magnitude_and_suffix(4096, SuffixType::Iec), "4K");
assert_eq!(to_magnitude_and_suffix(1024 * 1024, SuffixType::Iec), "1M");
assert_eq!(
to_magnitude_and_suffix(34 * 1024 * 1024 * 1024).unwrap(),
to_magnitude_and_suffix(2 * 1024 * 1024, SuffixType::Iec),
"2M"
);
assert_eq!(
to_magnitude_and_suffix(1024 * 1024 * 1024, SuffixType::Iec),
"1G"
);
assert_eq!(
to_magnitude_and_suffix(34 * 1024 * 1024 * 1024, SuffixType::Iec),
"34G"
);
}

#[test]
fn test_to_magnitude_and_suffix_not_powers_of_1024() {
assert_eq!(to_magnitude_and_suffix(1).unwrap(), "1B");
assert_eq!(to_magnitude_and_suffix(999).unwrap(), "999B");

assert_eq!(to_magnitude_and_suffix(1000).unwrap(), "1kB");
assert_eq!(to_magnitude_and_suffix(1001).unwrap(), "1.1kB");
assert_eq!(to_magnitude_and_suffix(1023).unwrap(), "1.1kB");
assert_eq!(to_magnitude_and_suffix(1025).unwrap(), "1.1kB");
assert_eq!(to_magnitude_and_suffix(10_001).unwrap(), "11kB");
assert_eq!(to_magnitude_and_suffix(999_000).unwrap(), "999kB");

assert_eq!(to_magnitude_and_suffix(999_001).unwrap(), "1MB");
assert_eq!(to_magnitude_and_suffix(999_999).unwrap(), "1MB");
assert_eq!(to_magnitude_and_suffix(1_000_000).unwrap(), "1MB");
assert_eq!(to_magnitude_and_suffix(1_000_001).unwrap(), "1.1MB");
assert_eq!(to_magnitude_and_suffix(1_100_000).unwrap(), "1.1MB");
assert_eq!(to_magnitude_and_suffix(1_100_001).unwrap(), "1.2MB");
assert_eq!(to_magnitude_and_suffix(1_900_000).unwrap(), "1.9MB");
assert_eq!(to_magnitude_and_suffix(1_900_001).unwrap(), "2MB");
assert_eq!(to_magnitude_and_suffix(9_900_000).unwrap(), "9.9MB");
assert_eq!(to_magnitude_and_suffix(9_900_001).unwrap(), "10MB");
assert_eq!(to_magnitude_and_suffix(999_000_000).unwrap(), "999MB");

assert_eq!(to_magnitude_and_suffix(999_000_001).unwrap(), "1GB");
assert_eq!(to_magnitude_and_suffix(1_000_000_000).unwrap(), "1GB");
assert_eq!(to_magnitude_and_suffix(1_000_000_001).unwrap(), "1.1GB");
}
assert_eq!(to_magnitude_and_suffix(1, SuffixType::Si), "1B");
assert_eq!(to_magnitude_and_suffix(999, SuffixType::Si), "999B");

assert_eq!(to_magnitude_and_suffix(1000, SuffixType::Si), "1kB");
assert_eq!(to_magnitude_and_suffix(1001, SuffixType::Si), "1.1kB");
assert_eq!(to_magnitude_and_suffix(1023, SuffixType::Si), "1.1kB");
assert_eq!(to_magnitude_and_suffix(1025, SuffixType::Si), "1.1kB");
assert_eq!(to_magnitude_and_suffix(10_001, SuffixType::Si), "11kB");
assert_eq!(to_magnitude_and_suffix(999_000, SuffixType::Si), "999kB");

assert_eq!(to_magnitude_and_suffix(999_001, SuffixType::Si), "1MB");
assert_eq!(to_magnitude_and_suffix(999_999, SuffixType::Si), "1MB");
assert_eq!(to_magnitude_and_suffix(1_000_000, SuffixType::Si), "1MB");
assert_eq!(to_magnitude_and_suffix(1_000_001, SuffixType::Si), "1.1MB");
assert_eq!(to_magnitude_and_suffix(1_100_000, SuffixType::Si), "1.1MB");
assert_eq!(to_magnitude_and_suffix(1_100_001, SuffixType::Si), "1.2MB");
assert_eq!(to_magnitude_and_suffix(1_900_000, SuffixType::Si), "1.9MB");
assert_eq!(to_magnitude_and_suffix(1_900_001, SuffixType::Si), "2MB");
assert_eq!(to_magnitude_and_suffix(9_900_000, SuffixType::Si), "9.9MB");
assert_eq!(to_magnitude_and_suffix(9_900_001, SuffixType::Si), "10MB");
assert_eq!(
to_magnitude_and_suffix(999_000_000, SuffixType::Si),
"999MB"
);

#[test]
fn test_to_magnitude_and_suffix_multiples_of_1000_and_1024() {
assert_eq!(to_magnitude_and_suffix(128_000).unwrap(), "128kB");
assert_eq!(to_magnitude_and_suffix(1000 * 1024).unwrap(), "1.1MB");
assert_eq!(to_magnitude_and_suffix(1_000_000_000_000).unwrap(), "1TB");
assert_eq!(to_magnitude_and_suffix(999_000_001, SuffixType::Si), "1GB");
assert_eq!(
to_magnitude_and_suffix(1_000_000_000, SuffixType::Si),
"1GB"
);
assert_eq!(
to_magnitude_and_suffix(1_000_000_001, SuffixType::Si),
"1.1GB"
);
}

#[test]
Expand All @@ -279,6 +265,13 @@ mod tests {
assert_eq!(format!("{}", BlockSize::Bytes(3 * 1024 * 1024)), "3M");
}

#[test]
fn test_block_size_display_multiples_of_1000_and_1024() {
assert_eq!(format!("{}", BlockSize::Bytes(128_000)), "128kB");
assert_eq!(format!("{}", BlockSize::Bytes(1000 * 1024)), "1.1MB");
assert_eq!(format!("{}", BlockSize::Bytes(1_000_000_000_000)), "1TB");
}

#[test]
fn test_default_block_size() {
assert_eq!(BlockSize::Bytes(1024), BlockSize::default());
Expand Down
14 changes: 7 additions & 7 deletions src/uu/df/src/df.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ mod columns;
mod filesystem;
mod table;

use blocks::{HumanReadable, SizeFormat};
use blocks::HumanReadable;
use table::HeaderMode;
use uucore::display::Quotable;
use uucore::error::{UError, UResult, USimpleError};
Expand Down Expand Up @@ -71,7 +71,7 @@ static OUTPUT_FIELD_LIST: [&str; 12] = [
struct Options {
show_local_fs: bool,
show_all_fs: bool,
size_format: SizeFormat,
human_readable: Option<HumanReadable>,
block_size: BlockSize,
header_mode: HeaderMode,

Expand Down Expand Up @@ -100,7 +100,7 @@ impl Default for Options {
show_local_fs: Default::default(),
show_all_fs: Default::default(),
block_size: Default::default(),
size_format: Default::default(),
human_readable: Default::default(),
header_mode: Default::default(),
include: Default::default(),
exclude: Default::default(),
Expand Down Expand Up @@ -200,13 +200,13 @@ impl Options {
HeaderMode::Default
}
},
size_format: {
human_readable: {
if matches.is_present(OPT_HUMAN_READABLE_BINARY) {
SizeFormat::HumanReadable(HumanReadable::Binary)
Some(HumanReadable::Binary)
} else if matches.is_present(OPT_HUMAN_READABLE_DECIMAL) {
SizeFormat::HumanReadable(HumanReadable::Decimal)
Some(HumanReadable::Decimal)
} else {
SizeFormat::StaticBlockSize
None
}
},
include,
Expand Down
Loading