Skip to content

Commit

Permalink
Merge pull request #3455 from mike-kfed/ptx_breakfile
Browse files Browse the repository at this point in the history
ptx: implement breakfile option
  • Loading branch information
sylvestre authored May 2, 2022
2 parents e8aca59 + 3078ca8 commit fe7829d
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 5 deletions.
52 changes: 47 additions & 5 deletions src/uu/ptx/src/ptx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.

// spell-checker:ignore (ToDOs) corasick memchr Roff trunc oset iset
// spell-checker:ignore (ToDOs) corasick memchr Roff trunc oset iset CHARCLASS

use clap::{crate_version, Arg, Command};
use regex::Regex;
Expand All @@ -31,6 +31,8 @@ const ABOUT: &str = "\
Mandatory arguments to long options are mandatory for short options too.\n\
With no FILE, or when FILE is -, read standard input. Default is '-F /'.";

const REGEX_CHARCLASS: &str = "^-]\\";

#[derive(Debug)]
enum OutFormat {
Dumb,
Expand Down Expand Up @@ -88,6 +90,18 @@ fn read_word_filter_file(
Ok(words)
}

/// reads contents of file as unique set of characters to be used with the break-file option
fn read_char_filter_file(
matches: &clap::ArgMatches,
option: &str,
) -> std::io::Result<HashSet<char>> {
let filename = matches.value_of(option).expect("parsing options failed!");
let mut reader = File::open(filename)?;
let mut buffer = String::new();
reader.read_to_string(&mut buffer)?;
Ok(buffer.chars().collect())
}

#[derive(Debug)]
struct WordFilter {
only_specified: bool,
Expand All @@ -113,9 +127,23 @@ impl WordFilter {
} else {
(false, HashSet::new())
};
if matches.is_present(options::BREAK_FILE) {
return Err(PtxError::NotImplemented("-b").into());
}
let break_set: Option<HashSet<char>> = if matches.is_present(options::BREAK_FILE)
&& !matches.is_present(options::WORD_REGEXP)
{
let chars =
read_char_filter_file(matches, options::BREAK_FILE).map_err_context(String::new)?;
let mut hs: HashSet<char> = if config.gnu_ext {
HashSet::new() // really only chars found in file
} else {
// GNU off means at least these are considered
[' ', '\t', '\n'].iter().cloned().collect()
};
hs.extend(chars);
Some(hs)
} else {
// if -W takes precedence or default
None
};
// Ignore empty string regex from cmd-line-args
let arg_reg: Option<String> = if matches.is_present(options::WORD_REGEXP) {
match matches.value_of(options::WORD_REGEXP) {
Expand All @@ -134,7 +162,21 @@ impl WordFilter {
let reg = match arg_reg {
Some(arg_reg) => arg_reg,
None => {
if config.gnu_ext {
if break_set.is_some() {
format!(
"[^{}]+",
break_set
.unwrap()
.into_iter()
.map(|c| if REGEX_CHARCLASS.contains(c) {
format!("\\{}", c)
} else {
c.to_string()
})
.collect::<Vec<String>>()
.join("")
)
} else if config.gnu_ext {
"\\w+".to_owned()
} else {
"[^ \t\n]+".to_owned()
Expand Down
32 changes: 32 additions & 0 deletions tests/by-util/test_ptx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,35 @@ fn gnu_ext_disabled_ignore_and_only_file() {
.succeeds()
.stdout_only_fixture("gnu_ext_disabled_ignore_and_only_file.expected");
}

#[test]
fn gnu_ext_disabled_output_width_50() {
new_ucmd!()
.args(&["-G", "-w", "50", "input"])
.succeeds()
.stdout_only_fixture("gnu_ext_disabled_output_width_50.expected");
}

#[test]
fn gnu_ext_disabled_output_width_70() {
new_ucmd!()
.args(&["-G", "-w", "70", "input"])
.succeeds()
.stdout_only_fixture("gnu_ext_disabled_output_width_70.expected");
}

#[test]
fn gnu_ext_disabled_break_file() {
new_ucmd!()
.args(&["-G", "-b", "break_file", "input"])
.succeeds()
.stdout_only_fixture("gnu_ext_disabled_break_file.expected");
}

#[test]
fn gnu_ext_disabled_empty_word_regexp_ignores_break_file() {
new_ucmd!()
.args(&["-G", "-b", "break_file", "-R", "-W", "", "input"])
.succeeds()
.stdout_only_fixture("gnu_ext_disabled_rightward_no_ref.expected");
}
1 change: 1 addition & 0 deletions tests/fixtures/ptx/break_file
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
abc_e^-]\
42 changes: 42 additions & 0 deletions tests/fixtures/ptx/gnu_ext_disabled_break_file.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
.xx "" "" """quotes"", for roff" ""
.xx "" "and some other like %a, b" "#, c$c" ""
.xx "" "and some other like %a, b#, c" "$c" ""
.xx "" "and some other like" "%a, b#, c$c" ""
.xx "" "and some other like %a" ", b#, c$c" ""
.xx "" """quotes""," "for roff" ""
.xx "" "{brackets}" "for tex" ""
.xx "" "" "hello world!" ""
.xx "" "let's c" "heck special characters:" ""
.xx "" "let's check special c" "haracters:" ""
.xx "" "let's check spec" "ial characters:" ""
.xx "" "let's chec" "k special characters:" ""
.xx "" "{brac" "kets} for tex" ""
.xx "" "oh, and bac" "k\slash" ""
.xx "" "" "let's check special characters:" ""
.xx "" "let's check specia" "l characters:" ""
.xx "" "and some other" "like %a, b#, c$c" ""
.xx "" "he" "llo world!" ""
.xx "" "maybe a" "lso~or^" ""
.xx "" "" "maybe also~or^" ""
.xx "" "a" "nd some other like %a, b#, c$c" ""
.xx "" "oh, a" "nd back\slash" ""
.xx "" "" "oh, and back\slash" ""
.xx "" "and some" "other like %a, b#, c$c" ""
.xx "" "let's check special cha" "racters:" ""
.xx "" "{b" "rackets} for tex" ""
.xx "" "and some othe" "r like %a, b#, c$c" ""
.xx "" """quotes"", for" "roff" ""
.xx "" "let's check special characte" "rs:" ""
.xx "" """quote" "s"", for roff" ""
.xx "" "oh, and back\sla" "sh" ""
.xx "" "oh, and back\" "slash" ""
.xx "" "and" "some other like %a, b#, c$c" ""
.xx "" "let's check" "special characters:" ""
.xx "" "let's check special charac" "ters:" ""
.xx "" "{brackets} for" "tex" ""
.xx "" "le" "t's check special characters:" ""
.xx "" "{bracke" "ts} for tex" ""
.xx "" "hello" "world!" ""
.xx "" "{brackets} for te" "x" ""
.xx "" "ma" "ybe also~or^" ""
.xx "" "" "{brackets} for tex" ""
24 changes: 24 additions & 0 deletions tests/fixtures/ptx/gnu_ext_disabled_output_width_50.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
.xx "" "" """quotes"", for roff" ""
.xx "" "and some other like" "%a, b#, c$c" ""
.xx "" "maybe" "also~or^" ""
.xx "%a, b#, c$c" "" "and some other like" ""
.xx "" "oh," "and back\slash" ""
.xx "" "some other like %a," "b#, c$c" "and"
.xx "" "oh, and" "back\slash" ""
.xx "" "other like %a, b#," "c$c" "and some"
.xx "" "let's check special" "characters:" ""
.xx "characters:" "let's" "check special" ""
.xx "" """quotes""," "for roff" ""
.xx "" "{brackets}" "for tex" ""
.xx "" "" "hello world!" ""
.xx "characters:" "" "let's check special" ""
.xx "" "and some other" "like %a, b#, c$c" ""
.xx "" "" "maybe also~or^" ""
.xx "" "" "oh, and back\slash" ""
.xx "" "and some" "other like %a, b#, c$c" ""
.xx "" """quotes"", for" "roff" ""
.xx "b#, c$c" "and" "some other like %a," ""
.xx "" "let's check" "special characters:" ""
.xx "" "{brackets} for" "tex" ""
.xx "" "hello" "world!" ""
.xx "" "" "{brackets} for tex" ""
24 changes: 24 additions & 0 deletions tests/fixtures/ptx/gnu_ext_disabled_output_width_70.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
.xx "" "" """quotes"", for roff" ""
.xx "" "and some other like" "%a, b#, c$c" ""
.xx "" "maybe" "also~or^" ""
.xx "" "" "and some other like %a, b#, c$c" ""
.xx "" "oh," "and back\slash" ""
.xx "" "and some other like %a," "b#, c$c" ""
.xx "" "oh, and" "back\slash" ""
.xx "" "and some other like %a, b#," "c$c" ""
.xx "" "let's check special" "characters:" ""
.xx "" "let's" "check special characters:" ""
.xx "" """quotes""," "for roff" ""
.xx "" "{brackets}" "for tex" ""
.xx "" "" "hello world!" ""
.xx "" "" "let's check special characters:" ""
.xx "" "and some other" "like %a, b#, c$c" ""
.xx "" "" "maybe also~or^" ""
.xx "" "" "oh, and back\slash" ""
.xx "" "and some" "other like %a, b#, c$c" ""
.xx "" """quotes"", for" "roff" ""
.xx "" "and" "some other like %a, b#, c$c" ""
.xx "" "let's check" "special characters:" ""
.xx "" "{brackets} for" "tex" ""
.xx "" "hello" "world!" ""
.xx "" "" "{brackets} for tex" ""

0 comments on commit fe7829d

Please sign in to comment.