diff --git a/text/Cargo.toml b/text/Cargo.toml index 59888999..ac91fa50 100644 --- a/text/Cargo.toml +++ b/text/Cargo.toml @@ -42,6 +42,10 @@ path = "src/cut.rs" name = "nohup" path = "src/nohup.rs" +[[bin]] +name = "uniq" +path = "src/uniq.rs" + [[bin]] name = "sort" path = "src/sort.rs" diff --git a/text/src/uniq.rs b/text/src/uniq.rs new file mode 100644 index 00000000..412ebf75 --- /dev/null +++ b/text/src/uniq.rs @@ -0,0 +1,212 @@ +use clap::Parser; +use gettextrs::{bind_textdomain_codeset, textdomain}; +use plib::PROJECT_NAME; +use std::fs::File; +use std::io::{self, BufRead, BufReader, Write}; +use std::path::PathBuf; + +/// The uniq utility - filters out duplicate lines in a file +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + /// Count the number of repeated lines + #[arg(short = 'c')] + count: bool, + + /// Print only the repeated lines + #[arg(short = 'd')] + repeated: bool, + + /// Print only unique lines + #[arg(short = 'u')] + unique: bool, + + /// Ignore the first fields fields on each input line + #[arg(short = 'f')] + fields: Option, + + /// Ignore the first chars characters on each input line + #[arg(short = 's')] + chars: Option, + + /// Input file (if not specified, use stdin) + input_file: Option, + + /// Output file (if not specified, use stdout) + output_file: Option, +} + +impl Args { + /// Validates the arguments to ensure no conflicting options are used together. + /// + /// # Errors + /// + /// Returns an error if conflicting options are found. + fn validate_args(&self) -> Result<(), String> { + // Check if conflicting options are used together + if self.unique && self.repeated { + return Err("Options '-u' and '-d' cannot be used together".to_string()); + } + if self.count && self.repeated { + return Err("Options '-c' and '-d' cannot be used together".to_string()); + } + if self.count && self.unique { + return Err("Options '-c' and '-u' cannot be used together".to_string()); + } + Ok(()) + } +} + +/// Processes the input according to the specified arguments and writes the output. +/// +/// # Arguments +/// +/// * `args` - A reference to the `Args` struct containing the command line arguments. +/// +/// # Errors +/// +/// Returns an error if there is an issue reading the input or writing the output. +fn uniq(args: &Args) -> Result<(), Box> { + let input: Box = match &args.input_file { + Some(file) => { + if *file == PathBuf::from("-") { + Box::new(BufReader::new(io::stdin())) + } else { + Box::new(BufReader::new(File::open(file)?)) + } + } + None => Box::new(BufReader::new(io::stdin())), + }; + + let mut output: Box = match &args.output_file { + Some(file) => Box::new(File::create(file)?), + None => Box::new(io::stdout()), + }; + + let lines: Vec = input.lines().collect::>()?; + + let mut last_line: Option = None; + let mut current_count = 0; + + for line in &lines { + let processed_line = process_line(line, args.fields, args.chars); + + if let Some(last_line) = &last_line { + let processed_last_line = process_line(last_line, args.fields, args.chars); + if processed_line == processed_last_line { + current_count += 1; + continue; + } else { + output_result(&mut output, last_line, current_count, args)?; + } + } + last_line = Some(line.to_string()); + current_count = 1; + } + + if let Some(last) = last_line { + output_result(&mut output, &last, current_count, args)?; + } + Ok(()) +} + +/// Processes a line according to the specified field and character options. +/// +/// # Arguments +/// +/// * `line` - The line to be processed. +/// * `fields` - The number of fields to skip. +/// * `chars` - The number of characters to skip. +/// +/// # Returns +/// +/// Returns the processed line as a `String`. +fn process_line(line: &str, fields: Option, chars: Option) -> String { + let mut processed_line = line.to_string(); + if line.is_empty() { + return line.to_string(); + } + if let Some(f) = fields { + if f == 0 { + processed_line = line.to_string(); + } else { + let mut field_count = 0; + + let chars = line.chars().skip_while(|c| { + if c.is_whitespace() { + if field_count >= f - 1 { + return false; + } + field_count += 1; + } + true + }); + processed_line = chars.collect::(); + } + } + + if let Some(c) = chars { + if c < processed_line.len() { + processed_line = processed_line[c..].to_string(); + } else { + processed_line.clear(); + } + } + + if processed_line.is_empty() { + line.to_string() + } else { + processed_line + } +} + +/// Writes the result to the output according to the specified arguments. +/// +/// # Arguments +/// +/// * `output` - The output writer. +/// * `line` - The line to be written. +/// * `count` - The count of the line occurrences. +/// * `args` - A reference to the `Args` struct containing the command line arguments. +/// +/// # Errors +/// +/// Returns an error if there is an issue writing to the output. +fn output_result( + output: &mut W, + line: &str, + count: usize, + args: &Args, +) -> Result<(), io::Error> { + if args.count { + writeln!(output, "{} {}", count, line)?; + } else if args.repeated && count > 1 { + writeln!(output, "{}", line)?; + } else if args.unique && count == 1 { + writeln!(output, "{}", line)?; + } else if !args.repeated && !args.unique { + writeln!(output, "{}", line)?; + } + Ok(()) +} + +/// The main function that initializes the application, parses the arguments, and runs the uniq function. +/// +/// # Errors +/// +/// Returns an error if there is an issue with the arguments or the uniq function. +fn main() -> Result<(), Box> { + textdomain(PROJECT_NAME)?; + bind_textdomain_codeset(PROJECT_NAME, "UTF-8")?; + let args = Args::parse(); + + args.validate_args()?; + let mut exit_code = 0; + + if let Err(err) = uniq(&args) { + exit_code = 1; + eprintln!("{}", err); + } + + std::process::exit(exit_code) +} diff --git a/text/tests/integration.rs b/text/tests/integration.rs index cc454748..a129de90 100644 --- a/text/tests/integration.rs +++ b/text/tests/integration.rs @@ -121,6 +121,19 @@ fn sort_test( }); } +fn uniq_test(args: &[&str], test_data: &str, expected_output: &str) { + let str_args: Vec = args.iter().map(|s| String::from(*s)).collect(); + + run_test(TestPlan { + cmd: String::from("uniq"), + args: str_args, + stdin_data: String::from(test_data), + expected_out: String::from(expected_output), + expected_err: String::from(""), + expected_exit_code: 0, + }); +} + fn pr_read_test_file( output_filename: &str, input_filename: &str, @@ -1551,3 +1564,187 @@ mod sort_tests { ); } } + +#[cfg(test)] +mod uniq_tests { + use crate::uniq_test; + #[test] + fn test_uniq_2() { + uniq_test(&[], "a\na\n", "a\n"); + } + + #[test] + fn test_uniq_3() { + uniq_test(&[], "a\na", "a\n"); + } + + #[test] + fn test_uniq_4() { + uniq_test(&[], "a\nb", "a\nb\n"); + } + + #[test] + fn test_uniq_5() { + uniq_test(&[], "a\na\nb", "a\nb\n"); + } + + #[test] + fn test_uniq_6() { + uniq_test(&[], "b\na\na\n", "b\na\n"); + } + + #[test] + fn test_uniq_7() { + uniq_test(&[], "a\nb\nc\n", "a\nb\nc\n"); + } + + #[test] + fn test_uniq_8() { + uniq_test(&[], "ö\nv\n", "ö\nv\n"); + } + + #[test] + fn test_uniq_9() { + uniq_test(&["-u"], "a\na\n", ""); + } + + #[test] + fn test_uniq_10() { + uniq_test(&["-u"], "a\nb\n", "a\nb\n"); + } + + #[test] + fn test_uniq_11() { + uniq_test(&["-u"], "a\nb\na\n", "a\nb\na\n"); + } + + #[test] + fn test_uniq_12() { + uniq_test(&["-u"], "a\na\n", ""); + } + + #[test] + fn test_uniq_13() { + uniq_test(&["-u"], "a\na\n", ""); + } + + #[test] + fn test_uniq_20() { + uniq_test(&["-d"], "a\na\n", "a\n"); + } + + #[test] + fn test_uniq_21() { + uniq_test(&["-d"], "a\nb\n", ""); + } + + #[test] + fn test_uniq_22() { + uniq_test(&["-d"], "a\nb\na\n", ""); + } + + #[test] + fn test_uniq_23() { + uniq_test(&["-d"], "a\na\nb\n", "a\n"); + } + + #[test] + fn test_uniq_24() { + uniq_test(&["-f", "1"], "a a\nb a\n", "a a\n"); + } + + #[test] + fn test_uniq_25() { + uniq_test(&["-f", "1"], "a a\nb b\n", "a a\nb b\n"); + } + + #[test] + fn test_uniq_26() { + uniq_test(&["-f", "1"], "a a a\nb a c\n", "a a a\nb a c\n"); + } + + #[test] + fn test_uniq_27() { + uniq_test(&["-f", "1"], "b a\na a\n", "b a\n"); + } + + #[test] + fn test_uniq_28() { + uniq_test(&["-f", "2"], "a a c\nb a c\n", "a a c\n"); + } + + #[test] + fn test_uniq_29() { + uniq_test(&["-s", "1"], "aaa\naaa\n", "aaa\n"); + } + + #[test] + fn test_uniq_30() { + uniq_test(&["-s", "2"], "baa\naaa\n", "baa\n"); + } + + #[test] + fn test_uniq_31() { + uniq_test(&["-f", "1", "-s", "1"], "a aaa\nb ab\n", "a aaa\nb ab\n"); + } + + #[test] + fn test_uniq_32() { + uniq_test(&["-f", "1", "-s", "1"], "a aaa\nb aaa\n", "a aaa\n"); + } + + #[test] + fn test_uniq_33() { + uniq_test(&["-f", "1", "-s", "1"], "a aaa\nb ab\n", "a aaa\nb ab\n"); + } + + #[test] + fn test_uniq_34() { + uniq_test(&["-f", "1", "-s", "1"], "a aaa\nb aaa\n", "a aaa\n"); + } + + #[test] + fn test_uniq_35() { + uniq_test(&["-s", "0"], "abc\nabcd\n", "abc\nabcd\n"); + } + + #[test] + fn test_uniq_36() { + uniq_test(&["-s", "0"], "abc\n", "abc\n"); + } + + #[test] + fn test_uniq_37() { + uniq_test(&[], "a\0a\na\n", "a\0a\na\n"); + } + + #[test] + fn test_uniq_38() { + uniq_test(&[], "a\ta\na a\n", "a\ta\na a\n"); + } + + #[test] + fn test_uniq_39() { + uniq_test(&["-f", "1"], "a\ta\na a\n", "a\ta\na a\n"); + } + + #[test] + fn test_uniq_40() { + uniq_test(&["-f", "2"], "a\ta a\na a a\n", "a\ta a\n"); + } + + #[test] + fn test_uniq_41() { + uniq_test(&["-f", "1"], "a\ta\na\ta\n", "a\ta\n"); + } + + #[test] + fn test_uniq_42() { + uniq_test(&["-c"], "a\nb\n", "1 a\n1 b\n"); + } + + #[test] + fn test_uniq_43() { + uniq_test(&["-c"], "a\na\n", "2 a\n"); + } +}