diff --git a/Cargo.lock b/Cargo.lock index ab6396b2..bf9c233f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1497,6 +1497,7 @@ dependencies = [ "dirs 5.0.1", "gettext-rs", "libc", + "memchr", "notify-debouncer-full", "plib", "proptest", diff --git a/text/Cargo.toml b/text/Cargo.toml index 10554d3a..68610d4e 100644 --- a/text/Cargo.toml +++ b/text/Cargo.toml @@ -19,6 +19,7 @@ notify-debouncer-full = "0.3" diff = "0.1" dirs = "5.0" walkdir = "2" +memchr = "2" [dev-dependencies] proptest = "1" diff --git a/text/grep.rs b/text/grep.rs index 5221bdb8..a06002ac 100644 --- a/text/grep.rs +++ b/text/grep.rs @@ -9,11 +9,14 @@ use clap::Parser; use gettextrs::{bind_textdomain_codeset, setlocale, textdomain, LocaleCategory}; -use libc::{regcomp, regex_t, regexec, regfree, REG_EXTENDED, REG_ICASE, REG_NOMATCH}; +use libc::{regcomp, regex_t, regexec, regfree, regmatch_t, REG_EXTENDED, REG_ICASE, REG_NOMATCH}; +use memchr::memmem; use std::{ + error::Error, ffi::CString, fs::File, - io::{self, BufRead, BufReader}, + io::{self, BufRead, BufReader, StdoutLock, Write}, + mem::MaybeUninit, path::{Path, PathBuf}, ptr, }; @@ -46,7 +49,7 @@ struct Args { #[arg(short, long)] ignore_case: bool, - /// Write only the names of input_files containing selected lines to standard output. + /// Write only the names of input files containing selected lines to standard output. #[arg(short = 'l', long)] files_with_matches: bool, @@ -54,11 +57,15 @@ struct Args { #[arg(short = 'n', long)] line_number: bool, - /// Write only the names of input_files containing selected lines to standard output. + /// Only print the matching characters in each line. + #[arg(short = 'o', long = "only-matching")] + only_matching: bool, + + /// Do not print to standard output. The presence or absence of a match is communicated with the exit status. #[arg(short, long)] quiet: bool, - /// Suppress the error messages ordinarily written for nonexistent or unreadable input_files. + /// Suppress the error messages ordinarily written for nonexistent or unreadable input files. #[arg(short = 's', long)] no_messages: bool, @@ -80,9 +87,6 @@ struct Args { /// standard input shall be used. #[arg(name = "FILE")] input_files: Vec, - - #[arg(skip)] - any_errors: bool, } impl Args { @@ -91,34 +95,42 @@ impl Args { /// # Errors /// /// Returns an error if conflicting options are found. - fn validate_args(&self) -> Result<(), String> { + fn validate_args(&self) -> Result<(), Box> { if self.extended_regexp && self.fixed_strings { - return Err("Options '-E' and '-F' cannot be used together".to_string()); + return Err(Box::from("options '-E' and '-F' cannot be used together")); } + if self.count && self.files_with_matches { - return Err("Options '-c' and '-l' cannot be used together".to_string()); + return Err(Box::from("options '-c' and '-l' cannot be used together")); } + if self.count && self.quiet { - return Err("Options '-c' and '-q' cannot be used together".to_string()); + return Err(Box::from("options '-c' and '-q' cannot be used together")); } + if self.files_with_matches && self.quiet { - return Err("Options '-l' and '-q' cannot be used together".to_string()); + return Err(Box::from("options '-l' and '-q' cannot be used together")); } + if self.regexp.is_empty() && self.file.is_empty() && self.single_pattern.is_none() { - return Err("Required at least one pattern list or file".to_string()); + return Err(Box::from("a pattern list or at least one file is required")); } + Ok(()) } /// Resolves input patterns and input files. Reads patterns from pattern files and merges them with specified as argument. Handles input files if empty. - fn resolve(&mut self) { + fn resolve(&mut self, grep_model_state: &mut GrepModelState) { for path_buf in &self.file { match Self::get_file_patterns(path_buf) { Ok(patterns) => self.regexp.extend(patterns), Err(err) => { - self.any_errors = true; + grep_model_state.any_errors = true; + if !self.no_messages { - eprintln!("{}: {}", path_buf.display(), err); + let path_buf_display = path_buf.display(); + + eprintln!("grep: {path_buf_display}: {err}"); } } } @@ -128,9 +140,9 @@ impl Args { None => {} Some(pattern) => { if !self.regexp.is_empty() { - self.input_files.insert(0, pattern.clone()); + self.input_files.insert(0, pattern.to_owned()); } else { - self.regexp = vec![pattern.clone()]; + self.regexp = vec![pattern.to_owned()]; } } } @@ -138,7 +150,7 @@ impl Args { self.regexp = self .regexp .iter() - .flat_map(|pattern| pattern.split('\n').map(String::from)) + .flat_map(|pattern| pattern.split('\n').map(ToOwned::to_owned)) .collect(); self.regexp.sort_by_key(|r| r.len()); self.regexp.dedup(); @@ -168,17 +180,7 @@ impl Args { /// # Returns /// /// Returns [GrepModel](GrepModel) object. - fn into_grep_model(self) -> Result { - let output_mode = if self.count { - OutputMode::Count(0) - } else if self.files_with_matches { - OutputMode::FilesWithMatches - } else if self.quiet { - OutputMode::Quiet - } else { - OutputMode::Default - }; - + fn into_grep_model(self) -> Result> { let patterns = Patterns::new( self.regexp, self.extended_regexp, @@ -187,16 +189,17 @@ impl Args { self.line_regexp, )?; + let multiple_inputs = self.input_files.len() > 1_usize; + Ok(GrepModel { - any_matches: false, - any_errors: self.any_errors, + input_files: self.input_files, + invert_match: self.invert_match, line_number: self.line_number, + multiple_inputs, no_messages: self.no_messages, - invert_match: self.invert_match, - multiple_inputs: self.input_files.len() > 1, - output_mode, + only_matching: self.only_matching, patterns, - input_files: self.input_files, + quiet: self.quiet, }) } } @@ -231,7 +234,7 @@ impl Patterns { fixed_string: bool, ignore_case: bool, line_regexp: bool, - ) -> Result { + ) -> Result> { if fixed_string { Ok(Self::Fixed( patterns @@ -242,15 +245,18 @@ impl Patterns { line_regexp, )) } else { - let mut ps = vec![]; - let mut cflags = 0; + if extended_regexp { cflags |= REG_EXTENDED; } + if ignore_case { cflags |= REG_ICASE; } + + let mut regex_vec = Vec::::with_capacity(patterns.len()); + for mut pattern in patterns { // macOS version of [regcomp](regcomp) from `libc` // provides additional check for empty regex. In this case, @@ -266,61 +272,182 @@ impl Patterns { pattern }; } + pattern = if line_regexp { format!("^{pattern}$") } else { pattern }; - let c_pattern = CString::new(pattern).map_err(|err| err.to_string())?; - let mut regex = unsafe { std::mem::zeroed::() }; + let pattern_c_string = CString::new(pattern)?; + + let pattern_c_string_pointer = pattern_c_string.as_ptr(); + + let mut regex_maybe_uninit = MaybeUninit::::uninit(); + + let regex_maybe_uninit_pointer = regex_maybe_uninit.as_mut_ptr(); + + let regcomp_result = unsafe { + regcomp(regex_maybe_uninit_pointer, pattern_c_string_pointer, cflags) + }; + + if regcomp_result == 0 { + // Safety: just checked regcomp return value + let regex = unsafe { regex_maybe_uninit.assume_init() }; - let result = unsafe { regcomp(&mut regex, c_pattern.as_ptr(), cflags) }; - if result != 0 { - return Err(format!( - "Error compiling regex '{}'", - c_pattern.to_string_lossy() - )); + regex_vec.push(regex); + } else { + return Err(Box::from(format!( + "error compiling regex '{}'", + pattern_c_string.to_string_lossy() + ))); } - ps.push(regex); } - Ok(Self::Regex(ps)) + + Ok(Self::Regex(regex_vec)) } } /// Checks if input string matches the present patterns. - /// - /// # Arguments - /// - /// * `input` - object that implements [AsRef](AsRef) for [str](str) and describes line. - /// - /// # Returns - /// - /// Returns [bool](bool) - `true` if input matches present patterns, else `false`. - fn matches(&self, input: impl AsRef) -> bool { - let input = input.as_ref(); + fn matches( + &self, + input: &[u8], + collect_matching_substrings: bool, + ) -> Result> { + let mut matching_substrings = Vec::>::new(); + + let mut any_pattern_matched = false; + match self { Patterns::Fixed(patterns, ignore_case, line_regexp) => { - let input = if *ignore_case { - input.to_lowercase() + let input_ascii_lowercase: Vec; + + let input_after_ignore_case = if *ignore_case { + input_ascii_lowercase = input.to_ascii_lowercase(); + + input_ascii_lowercase.as_slice() } else { - input.to_string() + input }; - patterns.iter().any(|p| { + + for pattern in patterns { + let pattern_as_bytes = pattern.as_bytes(); + if *line_regexp { - input == *p + if input_after_ignore_case != pattern_as_bytes { + continue; + } + + if !collect_matching_substrings { + return Ok(MatchesData::fast_path_match()); + } + + any_pattern_matched = true; + + matching_substrings.push(input.to_vec()); } else { - input.contains(p) + if collect_matching_substrings { + let pattern_as_bytes_len = pattern_as_bytes.len(); + + for index in + memmem::find_iter(input_after_ignore_case, pattern_as_bytes) + { + any_pattern_matched = true; + + let match_slice = &input[index..(index + pattern_as_bytes_len)]; + + matching_substrings.push(match_slice.to_vec()); + } + } else { + if memmem::find(input_after_ignore_case, pattern_as_bytes).is_some() { + return Ok(MatchesData::fast_path_match()); + } + } } - }) + } } Patterns::Regex(patterns) => { - let c_input = CString::new(input).unwrap(); - patterns.iter().any(|p| unsafe { - regexec(p, c_input.as_ptr(), 0, ptr::null_mut(), 0) != REG_NOMATCH - }) + const SINGLE_ELEMENT_ARRAY_SIZE: usize = 1_usize; + + let mut regmatch_t_array = [const { + regmatch_t { + rm_so: -1, + rm_eo: -1, + } + }; SINGLE_ELEMENT_ARRAY_SIZE]; + + let (nmatch, pmatch) = if collect_matching_substrings { + (SINGLE_ELEMENT_ARRAY_SIZE, regmatch_t_array.as_mut_ptr()) + } else { + (0_usize, ptr::null_mut()) + }; + + for pattern in patterns { + let mut current_position = 0_usize; + + loop { + // Clear values from the last iteration + if collect_matching_substrings { + let [ref mut regmatch_t] = regmatch_t_array; + + regmatch_t.rm_so = -1; + regmatch_t.rm_eo = -1; + } + + let forward_slice = &input[current_position..]; + + // TODO + // How should slices containing null bytes be handled? + let current_string_c_string = CString::new(forward_slice)?; + + let current_string_pointer = current_string_c_string.as_ptr(); + + let regexec_return_value = + unsafe { regexec(pattern, current_string_pointer, nmatch, pmatch, 0) }; + + if regexec_return_value != 0 { + debug_assert!(regexec_return_value == REG_NOMATCH); + + break; + } + + if !collect_matching_substrings { + return Ok(MatchesData::fast_path_match()); + } + + any_pattern_matched = true; + + let [regmatch_t] = regmatch_t_array; + + let regmatch_t { rm_so, rm_eo } = regmatch_t; + + debug_assert!(rm_so != -1); + debug_assert!(rm_eo != -1); + + let start = usize::try_from(rm_so)?; + let end = usize::try_from(rm_eo)?; + + // TODO + // Is this the right fix? + // The edge case is: + // + // grep -o '' + if end == 0_usize { + break; + } + + matching_substrings.push(forward_slice[start..end].to_vec()); + + current_position += end; + } + } } } + + Ok(MatchesData { + any_pattern_matched, + matching_substrings, + }) } } @@ -346,17 +473,39 @@ enum OutputMode { Default, } +struct GrepModelState { + any_errors: bool, + any_matches: bool, + output_mode: OutputMode, + stdout_lock: StdoutLock<'static>, +} + /// Structure that contains all necessary information for `grep` utility processing. struct GrepModel { - any_matches: bool, - any_errors: bool, - line_number: bool, - no_messages: bool, + input_files: Vec, invert_match: bool, + line_number: bool, multiple_inputs: bool, - output_mode: OutputMode, + no_messages: bool, + only_matching: bool, patterns: Patterns, - input_files: Vec, + quiet: bool, +} + +struct MatchesData { + any_pattern_matched: bool, + /// Will always be empty if the -o option is not being used + matching_substrings: Vec>, +} + +impl MatchesData { + #[inline] + pub fn fast_path_match() -> MatchesData { + MatchesData { + any_pattern_matched: true, + matching_substrings: Vec::>::new(), + } + } } impl GrepModel { @@ -365,37 +514,60 @@ impl GrepModel { /// # Returns /// /// Returns [i32](i32) that represents *exit status code*. - fn grep(&mut self) -> i32 { - for input_name in self.input_files.drain(..).collect::>() { - if input_name == "-" { + fn grep(&self, grep_model_state: &mut GrepModelState) -> Result<(), Box> { + for input_name in &self.input_files { + let input_name_str = input_name.as_str(); + + if input_name_str == "-" { let reader = Box::new(BufReader::new(io::stdin())); - self.process_input("(standard input)", reader); + + if let Err(bo) = self.process_input(grep_model_state, "(standard input)", reader) { + grep_model_state.any_errors = true; + + if !self.no_messages { + eprintln!("grep: {input_name_str}: {bo}"); + } + } } else { - match File::open(&input_name) { + match File::open(input_name_str) { Ok(file) => { let reader = Box::new(BufReader::new(file)); - self.process_input(&input_name, reader) + + self.process_input(grep_model_state, input_name_str, reader)?; } - Err(err) => { - self.any_errors = true; + Err(er) => { + grep_model_state.any_errors = true; + if !self.no_messages { - eprintln!("{}: {}", input_name, err); + eprintln!("grep: {input_name_str}: {er}"); } } } } - if self.any_matches && self.output_mode == OutputMode::Quiet { - return 0; + + if grep_model_state.any_matches && grep_model_state.output_mode == OutputMode::Quiet { + return Ok(()); } } - if self.any_errors { - 2 - } else if !self.any_matches { - 1 - } else { - 0 + Ok(()) + } + + fn print_line_prefix( + &self, + grep_model_state: &mut GrepModelState, + input_name: &str, + line_number: u64, + ) -> Result<(), Box> { + if self.multiple_inputs { + write!(grep_model_state.stdout_lock, "{input_name}:")?; + } + + if self.line_number { + write!(grep_model_state.stdout_lock, "{line_number}:")?; } + + Ok(()) } /// Reads lines from buffer and processes them. @@ -404,81 +576,130 @@ impl GrepModel { /// /// * `input_name` - [str](str) that represents content source name. /// * `reader` - [Box](Box) that contains object that implements [BufRead] and reads lines. - fn process_input(&mut self, input_name: &str, mut reader: Box) { - let mut line_number: u64 = 0; + fn process_input( + &self, + grep_model_state: &mut GrepModelState, + input_name: &str, + mut reader: Box, + ) -> Result<(), Box> { + let mut line_number = 0_u64; + + let mut line = Vec::::new(); + loop { - let mut line = String::new(); - line_number += 1; - match reader.read_line(&mut line) { + line.clear(); + + match reader.read_until(b'\n', &mut line) { Ok(n_read) => { if n_read == 0 { break; } - let trimmed = if line.ends_with('\n') { - &line[..line.len() - 1] - } else { - &line + + line_number += 1; + + let mut iter = line.iter(); + + let line_without_newline = match iter.next_back() { + Some(b'\n') => iter.as_slice(), + _ => line.as_slice(), + }; + + let matches_result = self.patterns.matches( + line_without_newline, + self.only_matching + && matches!(grep_model_state.output_mode, OutputMode::Default), + ); + + let matches_data = match matches_result { + Ok(ma) => ma, + Err(bo) => { + if !self.no_messages { + eprintln!("grep: {input_name}: error matching patterns against line {line_number} ({bo})"); + } + + return Err(bo); + } }; - let init_matches = self.patterns.matches(trimmed); + let MatchesData { + any_pattern_matched, + matching_substrings, + } = matches_data; + let matches = if self.invert_match { - !init_matches + !any_pattern_matched } else { - init_matches + any_pattern_matched }; + if matches { - self.any_matches = true; - match &mut self.output_mode { + grep_model_state.any_matches = true; + + match &mut grep_model_state.output_mode { OutputMode::Count(count) => { *count += 1; } OutputMode::FilesWithMatches => { - println!("{input_name}"); + writeln!(&mut grep_model_state.stdout_lock, "{input_name}")?; + break; } OutputMode::Quiet => { - return; + return Ok(()); } OutputMode::Default => { - let result = format!( - "{}{}{}", - if self.multiple_inputs { - format!("{input_name}:") - } else { - String::new() - }, - if self.line_number { - format!("{line_number}:") - } else { - String::new() - }, - trimmed - ); - println!("{result}"); + if self.only_matching { + for matching_substring in matching_substrings { + self.print_line_prefix( + grep_model_state, + input_name, + line_number, + )?; + + grep_model_state + .stdout_lock + .write_all(matching_substring.as_slice())?; + + grep_model_state.stdout_lock.write_all(b"\n")?; + } + } else { + self.print_line_prefix( + grep_model_state, + input_name, + line_number, + )?; + + grep_model_state + .stdout_lock + .write_all(line_without_newline)?; + + grep_model_state.stdout_lock.write_all(b"\n")?; + } } } } - line.clear(); } - Err(err) => { - self.any_errors = true; + Err(er) => { if !self.no_messages { - eprintln!( - "{}: Error reading line {} ({})", - input_name, line_number, err - ); + eprintln!("grep: {input_name}: error reading line {line_number} ({er})"); } + + return Err(Box::new(er)); } } } - if let OutputMode::Count(count) = &mut self.output_mode { + + if let OutputMode::Count(count) = &mut grep_model_state.output_mode { if self.multiple_inputs { - println!("{input_name}:{count}"); + writeln!(grep_model_state.stdout_lock, "{input_name}:{count}")?; } else { - println!("{count}"); + writeln!(grep_model_state.stdout_lock, "{count}")?; } + *count = 0; } + + Ok(()) } } @@ -493,17 +714,52 @@ fn main() -> Result<(), Box> { let mut args = Args::parse(); - let exit_code = args + let output_mode = if args.count { + OutputMode::Count(0) + } else if args.files_with_matches { + OutputMode::FilesWithMatches + } else if args.quiet { + OutputMode::Quiet + } else { + OutputMode::Default + }; + + let mut grep_model_state = GrepModelState { + any_errors: false, + any_matches: false, + output_mode, + stdout_lock: io::stdout().lock(), + }; + + let result = args .validate_args() .and_then(|_| { - args.resolve(); + args.resolve(&mut grep_model_state); args.into_grep_model() }) - .map(|mut grep_model| grep_model.grep()) - .unwrap_or_else(|err| { - eprintln!("{}", err); - 2 + .and_then(|gr| match gr.grep(&mut grep_model_state) { + Ok(()) => Ok(gr), + Err(bo) => Err(bo), }); + let exit_code = match result { + Ok(grep_model) => { + if grep_model.quiet && grep_model_state.any_matches { + 0 + } else if grep_model_state.any_errors { + 2 + } else if !grep_model_state.any_matches { + 1 + } else { + 0 + } + } + Err(error_message) => { + eprintln!("grep: {error_message}"); + + 2 + } + }; + std::process::exit(exit_code); } diff --git a/text/tests/grep/mod.rs b/text/tests/grep/mod.rs index 49ef06f6..a63c5c36 100644 --- a/text/tests/grep/mod.rs +++ b/text/tests/grep/mod.rs @@ -18,7 +18,7 @@ const BAD_INPUT: &str = "(some text)\n"; const INPUT_FILE_1: &str = "tests/grep/f_1"; const INPUT_FILE_2: &str = "tests/grep/f_2"; const INPUT_FILE_3: &str = "tests/grep/f_3"; -const BAD_INPUT_FILE: &str = "tests/grep/inexisting_file"; +const NONEXISTENT_FILE: &str = "tests/grep/nonexistent_file"; const INVALID_LINE_INPUT_FILE: &str = "tests/grep/invalid_line"; const BRE: &str = r#"line_{[0-9]\{1,\}}"#; @@ -33,7 +33,7 @@ const EMPTY_PATTERN_FILE: &str = "tests/grep/empty_pattern"; fn grep_test( args: &[&str], - test_data: &str, + pipe_to_stdin: &str, expected_output: &str, expected_err: &str, expected_exit_code: i32, @@ -43,7 +43,7 @@ fn grep_test( run_test(TestPlan { cmd: String::from("grep"), args: str_args, - stdin_data: String::from(test_data), + stdin_data: String::from(pipe_to_stdin), expected_out: String::from(expected_output), expected_err: String::from(expected_err), expected_exit_code, @@ -56,21 +56,21 @@ fn test_incompatible_options() { &["-cl"], "", "", - "Options \'-c\' and \'-l\' cannot be used together\n", + "grep: options \'-c\' and \'-l\' cannot be used together\n", 2, ); grep_test( &["-cq"], "", "", - "Options \'-c\' and \'-q\' cannot be used together\n", + "grep: options \'-c\' and \'-q\' cannot be used together\n", 2, ); grep_test( &["-lq"], "", "", - "Options \'-l\' and \'-q\' cannot be used together\n", + "grep: options \'-l\' and \'-q\' cannot be used together\n", 2, ); } @@ -81,18 +81,18 @@ fn test_absent_pattern() { &[], "", "", - "Required at least one pattern list or file\n", + "grep: a pattern list or at least one file is required\n", 2, ); } #[test] -fn test_inexisting_file_pattern() { +fn test_nonexistent_file_pattern() { grep_test( - &["-f", BAD_INPUT_FILE], + &["-f", NONEXISTENT_FILE], "", "", - "tests/grep/inexisting_file: No such file or directory (os error 2)\n", + "grep: tests/grep/nonexistent_file: No such file or directory (os error 2)\n", 2, ); } @@ -103,7 +103,7 @@ fn test_regexp_compiling_error() { &[INVALID_BRE], "", "", - "Error compiling regex '\\{1,3\\}'\n", + "grep: error compiling regex '\\{1,3\\}'\n", 2, ); } @@ -129,9 +129,12 @@ fn test_basic_regexp_03() { grep_test( &[BRE, INVALID_LINE_INPUT_FILE], "", - "line_{1}\np_line_{2}_s\n", - "tests/grep/invalid_line: Error reading line 2 (stream did not contain valid UTF-8)\n", - 2, + "\ +line_{1} +p_line_{2}_s +", + "", + 0, ); } @@ -167,16 +170,16 @@ fn test_basic_regexp_quiet_without_error_02() { #[test] fn test_basic_regexp_quiet_with_error_01() { - grep_test(&["-q", BRE, "-", BAD_INPUT_FILE], LINES_INPUT, "", "", 0); + grep_test(&["-q", BRE, "-", NONEXISTENT_FILE], LINES_INPUT, "", "", 0); } #[test] fn test_basic_regexp_quiet_with_error_02() { grep_test( - &["-q", BRE, BAD_INPUT_FILE, "-"], + &["-q", BRE, NONEXISTENT_FILE, "-"], LINES_INPUT, "", - "tests/grep/inexisting_file: No such file or directory (os error 2)\n", + "grep: tests/grep/nonexistent_file: No such file or directory (os error 2)\n", 0, ); } @@ -184,10 +187,10 @@ fn test_basic_regexp_quiet_with_error_02() { #[test] fn test_basic_regexp_quiet_with_error_03() { grep_test( - &["-q", BRE, "-", BAD_INPUT_FILE], + &["-q", BRE, "-", NONEXISTENT_FILE], BAD_INPUT, "", - "tests/grep/inexisting_file: No such file or directory (os error 2)\n", + "grep: tests/grep/nonexistent_file: No such file or directory (os error 2)\n", 2, ); } @@ -229,9 +232,12 @@ fn test_basic_regexp_line_number_03() { grep_test( &["-n", BRE, INVALID_LINE_INPUT_FILE], "", - "1:line_{1}\n3:p_line_{2}_s\n", - "tests/grep/invalid_line: Error reading line 2 (stream did not contain valid UTF-8)\n", - 2, + "\ +1:line_{1} +3:p_line_{2}_s +", + "", + 0, ); } @@ -253,21 +259,21 @@ fn test_basic_regexp_no_messages_without_error_02() { #[test] fn test_basic_regexp_no_messages_with_error_01() { - grep_test(&["-s", BRE, "-", BAD_INPUT_FILE], LINES_INPUT, "(standard input):line_{1}\n(standard input):p_line_{2}_s\n(standard input): line_{3} \n(standard input):line_{70}\n", "", 2); + grep_test(&["-s", BRE, "-", NONEXISTENT_FILE], LINES_INPUT, "(standard input):line_{1}\n(standard input):p_line_{2}_s\n(standard input): line_{3} \n(standard input):line_{70}\n", "", 2); } #[test] fn test_basic_regexp_no_messages_with_error_02() { - grep_test(&["-s", BRE, "-", BAD_INPUT_FILE], BAD_INPUT, "", "", 2); + grep_test(&["-s", BRE, "-", NONEXISTENT_FILE], BAD_INPUT, "", "", 2); } #[test] fn test_basic_regexp_no_messages_with_error_03() { grep_test( - &["-s", INVALID_BRE, "-", BAD_INPUT_FILE], + &["-s", INVALID_BRE, "-", NONEXISTENT_FILE], LINES_INPUT, "", - "Error compiling regex '\\{1,3\\}'\n", + "grep: error compiling regex '\\{1,3\\}'\n", 2, ); } @@ -275,7 +281,7 @@ fn test_basic_regexp_no_messages_with_error_03() { #[test] fn test_basic_regexp_no_messages_with_error_04() { grep_test( - &["-q", "-s", BRE, BAD_INPUT_FILE, "-"], + &["-q", "-s", BRE, NONEXISTENT_FILE, "-"], LINES_INPUT, "", "", @@ -290,7 +296,7 @@ fn test_basic_regexp_no_messages_with_error_05() { "", "line_{1}\np_line_{2}_s\n", "", - 2, + 0, ); } @@ -352,9 +358,12 @@ fn test_extended_regexp_03() { grep_test( &["-E", ERE, INVALID_LINE_INPUT_FILE], "", - "line_{1}\np_line_{2}_s\n", - "tests/grep/invalid_line: Error reading line 2 (stream did not contain valid UTF-8)\n", - 2, + "\ +line_{1} +p_line_{2}_s +", + "", + 0, ); } @@ -391,7 +400,7 @@ fn test_extended_regexp_quiet_without_error_02() { #[test] fn test_extended_regexp_quiet_with_error_01() { grep_test( - &["-E", "-q", ERE, "-", BAD_INPUT_FILE], + &["-E", "-q", ERE, "-", NONEXISTENT_FILE], LINES_INPUT, "", "", @@ -402,10 +411,10 @@ fn test_extended_regexp_quiet_with_error_01() { #[test] fn test_extended_regexp_quiet_with_error_02() { grep_test( - &["-E", "-q", ERE, BAD_INPUT_FILE, "-"], + &["-E", "-q", ERE, NONEXISTENT_FILE, "-"], LINES_INPUT, "", - "tests/grep/inexisting_file: No such file or directory (os error 2)\n", + "grep: tests/grep/nonexistent_file: No such file or directory (os error 2)\n", 0, ); } @@ -413,10 +422,10 @@ fn test_extended_regexp_quiet_with_error_02() { #[test] fn test_extended_regexp_quiet_with_error_03() { grep_test( - &["-E", "-q", ERE, "-", BAD_INPUT_FILE], + &["-E", "-q", ERE, "-", NONEXISTENT_FILE], BAD_INPUT, "", - "tests/grep/inexisting_file: No such file or directory (os error 2)\n", + "grep: tests/grep/nonexistent_file: No such file or directory (os error 2)\n", 2, ); } @@ -458,9 +467,12 @@ fn test_extended_regexp_line_number_03() { grep_test( &["-E", "-n", ERE, INVALID_LINE_INPUT_FILE], "", - "1:line_{1}\n3:p_line_{2}_s\n", - "tests/grep/invalid_line: Error reading line 2 (stream did not contain valid UTF-8)\n", - 2, + "\ +1:line_{1} +3:p_line_{2}_s +", + "", + 0, ); } @@ -482,13 +494,13 @@ fn test_extended_regexp_no_messages_without_error_02() { #[test] fn test_extended_regexp_no_messages_with_error_01() { - grep_test(&["-E", "-s", ERE, "-", BAD_INPUT_FILE], LINES_INPUT, "(standard input):line_{1}\n(standard input):p_line_{2}_s\n(standard input): line_{3} \n(standard input):line_{70}\n", "", 2); + grep_test(&["-E", "-s", ERE, "-", NONEXISTENT_FILE], LINES_INPUT, "(standard input):line_{1}\n(standard input):p_line_{2}_s\n(standard input): line_{3} \n(standard input):line_{70}\n", "", 2); } #[test] fn test_extended_regexp_no_messages_with_error_02() { grep_test( - &["-E", "-s", ERE, "-", BAD_INPUT_FILE], + &["-E", "-s", ERE, "-", NONEXISTENT_FILE], BAD_INPUT, "", "", @@ -499,10 +511,10 @@ fn test_extended_regexp_no_messages_with_error_02() { #[test] fn test_extended_regexp_no_messages_with_error_03() { grep_test( - &["-E", "-s", INVALID_ERE, "-", BAD_INPUT_FILE], + &["-E", "-s", INVALID_ERE, "-", NONEXISTENT_FILE], LINES_INPUT, "", - "Error compiling regex '{1,3}'\n", + "grep: error compiling regex '{1,3}'\n", 2, ); } @@ -510,7 +522,7 @@ fn test_extended_regexp_no_messages_with_error_03() { #[test] fn test_extended_regexp_no_messages_with_error_04() { grep_test( - &["-E", "-q", "-s", ERE, BAD_INPUT_FILE, "-"], + &["-E", "-q", "-s", ERE, NONEXISTENT_FILE, "-"], LINES_INPUT, "", "", @@ -523,9 +535,12 @@ fn test_extended_regexp_no_messages_with_error_05() { grep_test( &["-E", "-s", ERE, INVALID_LINE_INPUT_FILE], "", - "line_{1}\np_line_{2}_s\n", + "\ +line_{1} +p_line_{2}_s +", "", - 2, + 0, ); } @@ -593,9 +608,12 @@ fn test_fixed_strings_03() { grep_test( &["-F", FIXED, INVALID_LINE_INPUT_FILE], "", - "line_{1}\np_line_{2}_s\n", - "tests/grep/invalid_line: Error reading line 2 (stream did not contain valid UTF-8)\n", - 2, + "\ +line_{1} +p_line_{2}_s +", + "", + 0, ); } @@ -638,7 +656,7 @@ fn test_fixed_strings_quiet_without_error_02() { #[test] fn test_fixed_strings_quiet_with_error_01() { grep_test( - &["-F", "-q", FIXED, "-", BAD_INPUT_FILE], + &["-F", "-q", FIXED, "-", NONEXISTENT_FILE], LINES_INPUT, "", "", @@ -649,10 +667,10 @@ fn test_fixed_strings_quiet_with_error_01() { #[test] fn test_fixed_strings_quiet_with_error_02() { grep_test( - &["-F", "-q", FIXED, BAD_INPUT_FILE, "-"], + &["-F", "-q", FIXED, NONEXISTENT_FILE, "-"], LINES_INPUT, "", - "tests/grep/inexisting_file: No such file or directory (os error 2)\n", + "grep: tests/grep/nonexistent_file: No such file or directory (os error 2)\n", 0, ); } @@ -660,10 +678,10 @@ fn test_fixed_strings_quiet_with_error_02() { #[test] fn test_fixed_strings_quiet_with_error_03() { grep_test( - &["-F", "-q", FIXED, "-", BAD_INPUT_FILE], + &["-F", "-q", FIXED, "-", NONEXISTENT_FILE], BAD_INPUT, "", - "tests/grep/inexisting_file: No such file or directory (os error 2)\n", + "grep: tests/grep/nonexistent_file: No such file or directory (os error 2)\n", 2, ); } @@ -705,9 +723,12 @@ fn test_fixed_strings_line_number_03() { grep_test( &["-F", "-n", FIXED, INVALID_LINE_INPUT_FILE], "", - "1:line_{1}\n3:p_line_{2}_s\n", - "tests/grep/invalid_line: Error reading line 2 (stream did not contain valid UTF-8)\n", - 2, + "\ +1:line_{1} +3:p_line_{2}_s +", + "", + 0, ); } @@ -729,13 +750,13 @@ fn test_fixed_strings_no_messages_without_error_02() { #[test] fn test_fixed_strings_no_messages_with_error_01() { - grep_test(&["-F", "-s", FIXED, "-", BAD_INPUT_FILE], LINES_INPUT, "(standard input):line_{1}\n(standard input):p_line_{2}_s\n(standard input): line_{3} \n(standard input):line_{70}\n", "", 2); + grep_test(&["-F", "-s", FIXED, "-", NONEXISTENT_FILE], LINES_INPUT, "(standard input):line_{1}\n(standard input):p_line_{2}_s\n(standard input): line_{3} \n(standard input):line_{70}\n", "", 2); } #[test] fn test_fixed_strings_no_messages_with_error_02() { grep_test( - &["-F", "-s", FIXED, "-", BAD_INPUT_FILE], + &["-F", "-s", FIXED, "-", NONEXISTENT_FILE], BAD_INPUT, "", "", @@ -746,10 +767,10 @@ fn test_fixed_strings_no_messages_with_error_02() { #[test] fn test_fixed_strings_no_messages_with_error_03() { grep_test( - &["-F", "-cl", "-s", FIXED, "-", BAD_INPUT_FILE], + &["-F", "-cl", "-s", FIXED, "-", NONEXISTENT_FILE], LINES_INPUT, "", - "Options '-c' and '-l' cannot be used together\n", + "grep: options '-c' and '-l' cannot be used together\n", 2, ); } @@ -757,7 +778,7 @@ fn test_fixed_strings_no_messages_with_error_03() { #[test] fn test_fixed_strings_no_messages_with_error_04() { grep_test( - &["-F", "-q", "-s", FIXED, BAD_INPUT_FILE, "-"], + &["-F", "-q", "-s", FIXED, NONEXISTENT_FILE, "-"], LINES_INPUT, "", "", @@ -772,7 +793,7 @@ fn test_fixed_strings_no_messages_with_error_05() { "", "line_{1}\np_line_{2}_s\n", "", - 2, + 0, ); } @@ -1005,7 +1026,7 @@ fn test_stdin_and_input_files_quiet() { #[test] fn test_stdin_and_input_files_other_options() { - grep_test(&["-insvx", BRE, "-", INPUT_FILE_1, BAD_INPUT_FILE], LINES_INPUT, "(standard input):2:p_line_{2}_s\n(standard input):3: line_{3} \n(standard input):5:p_LINE_{5}_s\n(standard input):6:l_{6}\ntests/grep/f_1:2:p_line_{2}_s\ntests/grep/f_1:3: line_{3} \ntests/grep/f_1:5:p_LINE_{5}_s\ntests/grep/f_1:6:l_{6}\n", "", 2); + grep_test(&["-insvx", BRE, "-", INPUT_FILE_1, NONEXISTENT_FILE], LINES_INPUT, "(standard input):2:p_line_{2}_s\n(standard input):3: line_{3} \n(standard input):5:p_LINE_{5}_s\n(standard input):6:l_{6}\ntests/grep/f_1:2:p_line_{2}_s\ntests/grep/f_1:3: line_{3} \ntests/grep/f_1:5:p_LINE_{5}_s\ntests/grep/f_1:6:l_{6}\n", "", 2); } #[test] @@ -1389,3 +1410,47 @@ fn test_long_names_files() { 0, ); } + +#[test] +fn test_dash_o() { + const INPUT: &str = "\ +Contains KEYWORD here and also here: KEYWORD +Output is not organized: one KEYWORD per output line +Regardless of which input line it was found in +"; + + grep_test( + &["-o", "KEYWORD"], + INPUT, + "\ +KEYWORD +KEYWORD +KEYWORD +", + "", + 0_i32, + ); + + grep_test(&["-o", "NOT PRESENT"], INPUT, "", "", 1_i32); + + grep_test(&["-o", "-v", "NOT PRESENT"], INPUT, "", "", 0_i32); + + grep_test( + &["-o", "KEYWORD", "--", "-", "-"], + INPUT, + "\ +(standard input):KEYWORD +(standard input):KEYWORD +(standard input):KEYWORD +", + "", + 0_i32, + ); +} + +// Disabled due to implementation replacing empty patterns with ".*" on this platform +#[test] +#[cfg(not(target_os = "macos"))] +fn test_dash_o_empty_pattern() { + grep_test(&["-o", ""], "ABC", "", "", 0_i32); +}