From 4623575a6641bba9c4b45303d28386f7e92cc2b1 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 28 Aug 2023 22:51:45 +0200 Subject: [PATCH 1/4] echo: fix wrapping behavior of octal sequences --- src/uu/echo/src/echo.rs | 109 ++++++++++++++++++------------------- tests/by-util/test_echo.rs | 17 ++++++ 2 files changed, 71 insertions(+), 55 deletions(-) diff --git a/src/uu/echo/src/echo.rs b/src/uu/echo/src/echo.rs index cd9467714aa..4aba703a17d 100644 --- a/src/uu/echo/src/echo.rs +++ b/src/uu/echo/src/echo.rs @@ -21,73 +21,72 @@ mod options { pub const DISABLE_BACKSLASH_ESCAPE: &str = "disable_backslash_escape"; } -fn parse_code( - input: &mut Peekable, - base: u32, - max_digits: u32, - bits_per_digit: u32, -) -> Option { - let mut ret = 0x8000_0000; - for _ in 0..max_digits { - match input.peek().and_then(|c| c.to_digit(base)) { - Some(n) => ret = (ret << bits_per_digit) | n, +/// Parse the numeric part of the `\xHHH` and `\0NNN` escape sequences +fn parse_code(input: &mut Peekable, base: u8, max_digits: u32) -> Option { + // All arithmetic on `ret` needs to be wrapping, because octal input can + // take 3 digits, which is 9 bits, and therefore more than what fits in a + // `u8`. GNU just seems to wrap these values. + // Note that if we instead make `ret` a `u32` and use `char::from_u32` will + // yield incorrect results because it will interpret values larger than + // `u8::MAX` as unicode. + let mut ret = input.peek().and_then(|c| c.to_digit(base as u32))? as u8; + + // We can safely ifgnore the None case because we just peeked it. + let _ = input.next(); + + for _ in 1..max_digits { + match input.peek().and_then(|c| c.to_digit(base as u32)) { + Some(n) => ret = ret.wrapping_mul(base).wrapping_add(n as u8), None => break, } - input.next(); + // We can safely ifgnore the None case because we just peeked it. + let _ = input.next(); } - std::char::from_u32(ret) + + Some(ret.into()) } fn print_escaped(input: &str, mut output: impl Write) -> io::Result { - let mut should_stop = false; - - let mut buffer = ['\\'; 2]; - - // TODO `cargo +nightly clippy` complains that `.peek()` is never - // called on `iter`. However, `peek()` is called inside the - // `parse_code()` function that borrows `iter`. let mut iter = input.chars().peekable(); - while let Some(mut c) = iter.next() { - let mut start = 1; - - if c == '\\' { - if let Some(next) = iter.next() { - c = match next { - '\\' => '\\', - 'a' => '\x07', - 'b' => '\x08', - 'c' => { - should_stop = true; - break; - } - 'e' => '\x1b', - 'f' => '\x0c', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - 'v' => '\x0b', - 'x' => parse_code(&mut iter, 16, 2, 4).unwrap_or_else(|| { - start = 0; - next - }), - '0' => parse_code(&mut iter, 8, 3, 3).unwrap_or('\0'), - _ => { - start = 0; - next - } - }; - } + while let Some(c) = iter.next() { + if c != '\\' { + write!(output, "{c}")?; + continue; } - buffer[1] = c; - - // because printing char slices is apparently not available in the standard library - for ch in &buffer[start..] { - write!(output, "{ch}")?; + if let Some(next) = iter.next() { + let unescaped = match next { + '\\' => '\\', + 'a' => '\x07', + 'b' => '\x08', + 'c' => return Ok(true), + 'e' => '\x1b', + 'f' => '\x0c', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'v' => '\x0b', + 'x' => { + if let Some(c) = parse_code(&mut iter, 16, 2) { + c + } else { + write!(output, "\\")?; + 'x' + } + } + '0' => parse_code(&mut iter, 8, 3).unwrap_or('\0'), + c => { + write!(output, "\\")?; + c + } + }; + write!(output, "{unescaped}")?; + } else { + write!(output, "\\")?; } } - Ok(should_stop) + Ok(false) } #[uucore::main] diff --git a/tests/by-util/test_echo.rs b/tests/by-util/test_echo.rs index 3a8e7f86b34..82137c715ee 100644 --- a/tests/by-util/test_echo.rs +++ b/tests/by-util/test_echo.rs @@ -236,3 +236,20 @@ fn test_hyphen_values_between() { .success() .stdout_is("dumdum dum dum dum -e dum\n"); } + +#[test] +fn wrapping_octal() { + // Some odd behavior of GNU. Values of \0400 and greater do not fit in the + // u8 that we write to stdout. So we test that it wraps: + // + // We give it this input: + // \o501 = 1_0100_0001 (yes, **9** bits) + // This should be wrapped into: + // \o101 = 'A' = 0100_0001, + // because we only write a single character + new_ucmd!() + .arg("-e") + .arg("\\0501") + .succeeds() + .stdout_is("A\n"); +} From 1ad10dd371cfe341993c23d302c7a002a85953ae Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 29 Aug 2023 21:54:19 +0200 Subject: [PATCH 2/4] echo: add support for \NNN octal escape sequence --- src/uu/echo/src/echo.rs | 10 ++++++++++ tests/by-util/test_echo.rs | 27 +++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/src/uu/echo/src/echo.rs b/src/uu/echo/src/echo.rs index 4aba703a17d..565166842db 100644 --- a/src/uu/echo/src/echo.rs +++ b/src/uu/echo/src/echo.rs @@ -54,6 +54,16 @@ fn print_escaped(input: &str, mut output: impl Write) -> io::Result { continue; } + // This is for the \NNN syntax for octal sequences. + // Note that '0' is intentionally omitted because that + // would be the \0NNN syntax. + if let Some('1'..='8') = iter.peek() { + if let Some(parsed) = parse_code(&mut iter, 8, 3) { + write!(output, "{parsed}")?; + continue; + } + } + if let Some(next) = iter.next() { let unescaped = match next { '\\' => '\\', diff --git a/tests/by-util/test_echo.rs b/tests/by-util/test_echo.rs index 82137c715ee..7de963973a2 100644 --- a/tests/by-util/test_echo.rs +++ b/tests/by-util/test_echo.rs @@ -253,3 +253,30 @@ fn wrapping_octal() { .succeeds() .stdout_is("A\n"); } + +#[test] +fn old_octal_syntax() { + new_ucmd!() + .arg("-e") + .arg("\\1foo") + .succeeds() + .stdout_is("\x01foo\n"); + + new_ucmd!() + .arg("-e") + .arg("\\43foo") + .succeeds() + .stdout_is("#foo\n"); + + new_ucmd!() + .arg("-e") + .arg("\\101foo") + .succeeds() + .stdout_is("Afoo\n"); + + new_ucmd!() + .arg("-e") + .arg("\\1011") + .succeeds() + .stdout_is("A1\n"); +} From 45487d47b88d7aeb792869f48250f7c01f58507e Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 25 Sep 2023 11:43:21 +0200 Subject: [PATCH 3/4] extract Base enum --- src/uu/echo/src/echo.rs | 32 ++++++++++++++++++++++++-------- tests/by-util/test_echo.rs | 4 ++-- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/uu/echo/src/echo.rs b/src/uu/echo/src/echo.rs index 565166842db..5ccc6a32a95 100644 --- a/src/uu/echo/src/echo.rs +++ b/src/uu/echo/src/echo.rs @@ -21,8 +21,24 @@ mod options { pub const DISABLE_BACKSLASH_ESCAPE: &str = "disable_backslash_escape"; } +#[repr(u8)] +#[derive(Clone, Copy)] +enum Base { + Oct = 8, + Hex = 16, +} + +impl Base { + fn max_digits(&self) -> u8 { + match self { + Self::Oct => 3, + Self::Hex => 2, + } + } +} + /// Parse the numeric part of the `\xHHH` and `\0NNN` escape sequences -fn parse_code(input: &mut Peekable, base: u8, max_digits: u32) -> Option { +fn parse_code(input: &mut Peekable, base: Base) -> Option { // All arithmetic on `ret` needs to be wrapping, because octal input can // take 3 digits, which is 9 bits, and therefore more than what fits in a // `u8`. GNU just seems to wrap these values. @@ -31,15 +47,15 @@ fn parse_code(input: &mut Peekable, base: u8, max_digits: u32) -> Option< // `u8::MAX` as unicode. let mut ret = input.peek().and_then(|c| c.to_digit(base as u32))? as u8; - // We can safely ifgnore the None case because we just peeked it. + // We can safely ignore the None case because we just peeked it. let _ = input.next(); - for _ in 1..max_digits { + for _ in 1..base.max_digits() { match input.peek().and_then(|c| c.to_digit(base as u32)) { - Some(n) => ret = ret.wrapping_mul(base).wrapping_add(n as u8), + Some(n) => ret = ret.wrapping_mul(base as u8).wrapping_add(n as u8), None => break, } - // We can safely ifgnore the None case because we just peeked it. + // We can safely ignore the None case because we just peeked it. let _ = input.next(); } @@ -58,7 +74,7 @@ fn print_escaped(input: &str, mut output: impl Write) -> io::Result { // Note that '0' is intentionally omitted because that // would be the \0NNN syntax. if let Some('1'..='8') = iter.peek() { - if let Some(parsed) = parse_code(&mut iter, 8, 3) { + if let Some(parsed) = parse_code(&mut iter, Base::Oct) { write!(output, "{parsed}")?; continue; } @@ -77,14 +93,14 @@ fn print_escaped(input: &str, mut output: impl Write) -> io::Result { 't' => '\t', 'v' => '\x0b', 'x' => { - if let Some(c) = parse_code(&mut iter, 16, 2) { + if let Some(c) = parse_code(&mut iter, Base::Hex) { c } else { write!(output, "\\")?; 'x' } } - '0' => parse_code(&mut iter, 8, 3).unwrap_or('\0'), + '0' => parse_code(&mut iter, Base::Oct).unwrap_or('\0'), c => { write!(output, "\\")?; c diff --git a/tests/by-util/test_echo.rs b/tests/by-util/test_echo.rs index 7de963973a2..dce5a4c9520 100644 --- a/tests/by-util/test_echo.rs +++ b/tests/by-util/test_echo.rs @@ -270,9 +270,9 @@ fn old_octal_syntax() { new_ucmd!() .arg("-e") - .arg("\\101foo") + .arg("\\101 foo") .succeeds() - .stdout_is("Afoo\n"); + .stdout_is("A foo\n"); new_ucmd!() .arg("-e") From a107374c8286796b170fbabbd65a6a28656a7415 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 3 Oct 2023 12:10:20 +0200 Subject: [PATCH 4/4] echo: use controlflow instead of bool --- src/uu/echo/src/echo.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/uu/echo/src/echo.rs b/src/uu/echo/src/echo.rs index 5ccc6a32a95..94788721004 100644 --- a/src/uu/echo/src/echo.rs +++ b/src/uu/echo/src/echo.rs @@ -6,6 +6,7 @@ use clap::{crate_version, Arg, ArgAction, Command}; use std::io::{self, Write}; use std::iter::Peekable; +use std::ops::ControlFlow; use std::str::Chars; use uucore::error::{FromIo, UResult}; use uucore::{format_usage, help_about, help_section, help_usage}; @@ -62,7 +63,7 @@ fn parse_code(input: &mut Peekable, base: Base) -> Option { Some(ret.into()) } -fn print_escaped(input: &str, mut output: impl Write) -> io::Result { +fn print_escaped(input: &str, mut output: impl Write) -> io::Result> { let mut iter = input.chars().peekable(); while let Some(c) = iter.next() { if c != '\\' { @@ -85,7 +86,7 @@ fn print_escaped(input: &str, mut output: impl Write) -> io::Result { '\\' => '\\', 'a' => '\x07', 'b' => '\x08', - 'c' => return Ok(true), + 'c' => return Ok(ControlFlow::Break(())), 'e' => '\x1b', 'f' => '\x0c', 'n' => '\n', @@ -112,7 +113,7 @@ fn print_escaped(input: &str, mut output: impl Write) -> io::Result { } } - Ok(false) + Ok(ControlFlow::Continue(())) } #[uucore::main] @@ -173,8 +174,7 @@ fn execute(no_newline: bool, escaped: bool, free: &[String]) -> io::Result<()> { write!(output, " ")?; } if escaped { - let should_stop = print_escaped(input, &mut output)?; - if should_stop { + if print_escaped(input, &mut output)?.is_break() { break; } } else {