Skip to content

Commit

Permalink
Merge pull request #5218 from tertsdiepraam/echo-fix-octal-wrapping
Browse files Browse the repository at this point in the history
`echo`: fix wrapping behavior of octal sequences
  • Loading branch information
cakebaker authored Oct 3, 2023
2 parents 689b21d + a107374 commit 139f205
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 56 deletions.
137 changes: 81 additions & 56 deletions src/uu/echo/src/echo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
use clap::{crate_version, Arg, ArgAction, Command};
use std::io::{self, Write};
use std::iter::Peekable;
use std::ops::ControlFlow;
use std::str::Chars;
use uucore::error::{FromIo, UResult};
use uucore::{format_usage, help_about, help_section, help_usage};
Expand All @@ -21,73 +22,98 @@ mod options {
pub const DISABLE_BACKSLASH_ESCAPE: &str = "disable_backslash_escape";
}

fn parse_code(
input: &mut Peekable<Chars>,
base: u32,
max_digits: u32,
bits_per_digit: u32,
) -> Option<char> {
let mut ret = 0x8000_0000;
for _ in 0..max_digits {
match input.peek().and_then(|c| c.to_digit(base)) {
Some(n) => ret = (ret << bits_per_digit) | n,
None => break,
#[repr(u8)]
#[derive(Clone, Copy)]
enum Base {
Oct = 8,
Hex = 16,
}

impl Base {
fn max_digits(&self) -> u8 {
match self {
Self::Oct => 3,
Self::Hex => 2,
}
input.next();
}
std::char::from_u32(ret)
}

fn print_escaped(input: &str, mut output: impl Write) -> io::Result<bool> {
let mut should_stop = false;
/// Parse the numeric part of the `\xHHH` and `\0NNN` escape sequences
fn parse_code(input: &mut Peekable<Chars>, base: Base) -> Option<char> {
// All arithmetic on `ret` needs to be wrapping, because octal input can
// take 3 digits, which is 9 bits, and therefore more than what fits in a
// `u8`. GNU just seems to wrap these values.
// Note that if we instead make `ret` a `u32` and use `char::from_u32` will
// yield incorrect results because it will interpret values larger than
// `u8::MAX` as unicode.
let mut ret = input.peek().and_then(|c| c.to_digit(base as u32))? as u8;

// We can safely ignore the None case because we just peeked it.
let _ = input.next();

for _ in 1..base.max_digits() {
match input.peek().and_then(|c| c.to_digit(base as u32)) {
Some(n) => ret = ret.wrapping_mul(base as u8).wrapping_add(n as u8),
None => break,
}
// We can safely ignore the None case because we just peeked it.
let _ = input.next();
}

let mut buffer = ['\\'; 2];
Some(ret.into())
}

// TODO `cargo +nightly clippy` complains that `.peek()` is never
// called on `iter`. However, `peek()` is called inside the
// `parse_code()` function that borrows `iter`.
fn print_escaped(input: &str, mut output: impl Write) -> io::Result<ControlFlow<()>> {
let mut iter = input.chars().peekable();
while let Some(mut c) = iter.next() {
let mut start = 1;

if c == '\\' {
if let Some(next) = iter.next() {
c = match next {
'\\' => '\\',
'a' => '\x07',
'b' => '\x08',
'c' => {
should_stop = true;
break;
}
'e' => '\x1b',
'f' => '\x0c',
'n' => '\n',
'r' => '\r',
't' => '\t',
'v' => '\x0b',
'x' => parse_code(&mut iter, 16, 2, 4).unwrap_or_else(|| {
start = 0;
next
}),
'0' => parse_code(&mut iter, 8, 3, 3).unwrap_or('\0'),
_ => {
start = 0;
next
}
};
}
while let Some(c) = iter.next() {
if c != '\\' {
write!(output, "{c}")?;
continue;
}

buffer[1] = c;
// This is for the \NNN syntax for octal sequences.
// Note that '0' is intentionally omitted because that
// would be the \0NNN syntax.
if let Some('1'..='8') = iter.peek() {
if let Some(parsed) = parse_code(&mut iter, Base::Oct) {
write!(output, "{parsed}")?;
continue;
}
}

// because printing char slices is apparently not available in the standard library
for ch in &buffer[start..] {
write!(output, "{ch}")?;
if let Some(next) = iter.next() {
let unescaped = match next {
'\\' => '\\',
'a' => '\x07',
'b' => '\x08',
'c' => return Ok(ControlFlow::Break(())),
'e' => '\x1b',
'f' => '\x0c',
'n' => '\n',
'r' => '\r',
't' => '\t',
'v' => '\x0b',
'x' => {
if let Some(c) = parse_code(&mut iter, Base::Hex) {
c
} else {
write!(output, "\\")?;
'x'
}
}
'0' => parse_code(&mut iter, Base::Oct).unwrap_or('\0'),
c => {
write!(output, "\\")?;
c
}
};
write!(output, "{unescaped}")?;
} else {
write!(output, "\\")?;
}
}

Ok(should_stop)
Ok(ControlFlow::Continue(()))
}

#[uucore::main]
Expand Down Expand Up @@ -148,8 +174,7 @@ fn execute(no_newline: bool, escaped: bool, free: &[String]) -> io::Result<()> {
write!(output, " ")?;
}
if escaped {
let should_stop = print_escaped(input, &mut output)?;
if should_stop {
if print_escaped(input, &mut output)?.is_break() {
break;
}
} else {
Expand Down
44 changes: 44 additions & 0 deletions tests/by-util/test_echo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,3 +236,47 @@ fn test_hyphen_values_between() {
.success()
.stdout_is("dumdum dum dum dum -e dum\n");
}

#[test]
fn wrapping_octal() {
// Some odd behavior of GNU. Values of \0400 and greater do not fit in the
// u8 that we write to stdout. So we test that it wraps:
//
// We give it this input:
// \o501 = 1_0100_0001 (yes, **9** bits)
// This should be wrapped into:
// \o101 = 'A' = 0100_0001,
// because we only write a single character
new_ucmd!()
.arg("-e")
.arg("\\0501")
.succeeds()
.stdout_is("A\n");
}

#[test]
fn old_octal_syntax() {
new_ucmd!()
.arg("-e")
.arg("\\1foo")
.succeeds()
.stdout_is("\x01foo\n");

new_ucmd!()
.arg("-e")
.arg("\\43foo")
.succeeds()
.stdout_is("#foo\n");

new_ucmd!()
.arg("-e")
.arg("\\101 foo")
.succeeds()
.stdout_is("A foo\n");

new_ucmd!()
.arg("-e")
.arg("\\1011")
.succeeds()
.stdout_is("A1\n");
}

0 comments on commit 139f205

Please sign in to comment.