diff --git a/fuzz/fuzz_targets/fuzz_side.rs b/fuzz/fuzz_targets/fuzz_side.rs index 8a69c07..050a015 100644 --- a/fuzz/fuzz_targets/fuzz_side.rs +++ b/fuzz/fuzz_targets/fuzz_side.rs @@ -2,11 +2,10 @@ #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::side_diff; +use diffutilslib::side_diff::{self, Params}; use std::fs::File; use std::io::Write; -use diffutilslib::params::Params; fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { let (original, new, /* width, tabsize, */ expand) = x; @@ -22,7 +21,16 @@ fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { ..Default::default() }; let mut output_buf = vec![]; - side_diff::diff(&original, &new, &mut output_buf, ¶ms); + side_diff::diff( + &original, + &new, + &mut output_buf, + &Params { + width: params.width, + tabsize: params.tabsize, + expand_tabs: params.expand_tabs, + }, + ); File::create("target/fuzz.file.original") .unwrap() .write_all(&original) @@ -39,4 +47,5 @@ fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { .unwrap() .write_all(&output_buf) .unwrap(); -}); \ No newline at end of file +}); + diff --git a/src/diff.rs b/src/diff.rs index f4c0614..1933686 100644 --- a/src/diff.rs +++ b/src/diff.rs @@ -4,12 +4,10 @@ // files that was distributed with this source code. use crate::params::{parse_params, Format}; -use crate::utils::report_failure_to_read_input_file; +use crate::utils; use crate::{context_diff, ed_diff, normal_diff, side_diff, unified_diff}; use std::env::ArgsOs; -use std::ffi::OsString; -use std::fs; -use std::io::{self, stdout, Read, Write}; +use std::io::{self, stdout, Write}; use std::iter::Peekable; use std::process::{exit, ExitCode}; @@ -23,6 +21,7 @@ pub fn main(opts: Peekable) -> ExitCode { eprintln!("{error}"); exit(2); }); + // if from and to are the same file, no need to perform any comparison let maybe_report_identical_files = || { if params.report_identical_files { @@ -40,35 +39,16 @@ pub fn main(opts: Peekable) -> ExitCode { return ExitCode::SUCCESS; } - // read files - fn read_file_contents(filepath: &OsString) -> io::Result> { - if filepath == "-" { - let mut content = Vec::new(); - io::stdin().read_to_end(&mut content).and(Ok(content)) - } else { - fs::read(filepath) - } - } - let mut io_error = false; - let from_content = match read_file_contents(¶ms.from) { - Ok(from_content) => from_content, - Err(e) => { - report_failure_to_read_input_file(¶ms.executable, ¶ms.from, &e); - io_error = true; - vec![] - } - }; - let to_content = match read_file_contents(¶ms.to) { - Ok(to_content) => to_content, - Err(e) => { - report_failure_to_read_input_file(¶ms.executable, ¶ms.to, &e); - io_error = true; - vec![] + let (from_content, to_content) = match utils::read_both_files(¶ms.from, ¶ms.to) { + Ok(contents) => contents, + Err((filepath, error)) => { + eprintln!( + "{}", + utils::format_failure_to_read_input_file(¶ms.executable, &filepath, &error) + ); + return ExitCode::from(2); } }; - if io_error { - return ExitCode::from(2); - } // run diff let result: Vec = match params.format { @@ -81,7 +61,16 @@ pub fn main(opts: Peekable) -> ExitCode { }), Format::SideBySide => { let mut output = stdout().lock(); - side_diff::diff(&from_content, &to_content, &mut output, ¶ms) + side_diff::diff( + &from_content, + &to_content, + &mut output, + &side_diff::Params { + tabsize: params.tabsize, + width: params.width, + expand_tabs: params.expand_tabs, + }, + ) } }; if params.brief && !result.is_empty() { diff --git a/src/main.rs b/src/main.rs index b7c2712..792ce95 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,6 +18,7 @@ mod ed_diff; mod macros; mod normal_diff; mod params; +mod sdiff; mod side_diff; mod unified_diff; mod utils; @@ -72,6 +73,7 @@ fn main() -> ExitCode { match util_name.to_str() { Some("diff") => diff::main(args), Some("cmp") => cmp::main(args), + Some("sdiff") => sdiff::main(args), Some(name) => { eprintln!("{name}: utility not supported"); ExitCode::from(2) diff --git a/src/sdiff.rs b/src/sdiff.rs new file mode 100644 index 0000000..4a8721d --- /dev/null +++ b/src/sdiff.rs @@ -0,0 +1,404 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +use regex::Regex; + +use crate::side_diff; +use crate::utils; +use std::env::ArgsOs; +use std::ffi::OsString; +use std::io::{self, stdout, Write}; +use std::iter::Peekable; +use std::process::{exit, ExitCode}; + +#[derive(Eq, PartialEq, Debug)] +pub struct Params { + pub executable: OsString, + pub from: OsString, + pub to: OsString, + pub expand_tabs: bool, + pub tabsize: usize, + pub width: usize, +} + +impl Default for Params { + fn default() -> Self { + Self { + executable: OsString::default(), + from: OsString::default(), + to: OsString::default(), + expand_tabs: false, + tabsize: 8, + width: 130, + } + } +} + +pub fn parse_params>(mut opts: Peekable) -> Result { + let Some(executable) = opts.next() else { + return Err("Usage: ".to_string()); + }; + + let mut params = Params { + executable, + ..Default::default() + }; + + let mut from = None; + let mut to = None; + let tabsize_re = Regex::new(r"^--tabsize=(?\d+)$").unwrap(); + let width_re = Regex::new(r"--width=(?P\d+)$").unwrap(); + + for param in opts.by_ref() { + if param == "-" { + if from.is_none() { + from = Some(param); + } else if to.is_none() { + to = Some(param); + } else { + return Err(format!( + "Usage: {} ", + params.executable.to_string_lossy() + )); + } + continue; + } + + if param == "-t" || param == "--expand-tabs" { + params.expand_tabs = true; + continue; + } + + if tabsize_re.is_match(param.to_string_lossy().as_ref()) { + // Because param matches the regular expression, + // it is safe to assume it is valid UTF-8. + let param = param.into_string().unwrap(); + let tabsize_str = tabsize_re + .captures(param.as_str()) + .unwrap() + .name("num") + .unwrap() + .as_str(); + params.tabsize = match tabsize_str.parse::() { + Ok(num) => { + if num == 0 { + return Err("invalid tabsize «0»".to_string()); + } + + num + } + Err(_) => return Err(format!("invalid tabsize «{tabsize_str}»")), + }; + + continue; + } + + if width_re.is_match(param.to_string_lossy().as_ref()) { + let param = param.into_string().unwrap(); + let width_str: &str = width_re + .captures(param.as_str()) + .unwrap() + .name("long") + .unwrap() + .as_str(); + + params.width = match width_str.parse::() { + Ok(num) => { + if num == 0 { + return Err("invalid width «0»".to_string()); + } + + num + } + Err(_) => return Err(format!("invalid width «{width_str}»")), + }; + continue; + } + + if from.is_none() { + from = Some(param); + } else if to.is_none() { + to = Some(param); + } else { + return Err(format!( + "Usage: {} ", + params.executable.to_string_lossy() + )); + } + } + + params.from = if let Some(from) = from { + from + } else if let Some(param) = opts.next() { + param + } else { + return Err(format!( + "Usage: {} ", + params.executable.to_string_lossy() + )); + }; + + params.to = if let Some(to) = to { + to + } else if let Some(param) = opts.next() { + param + } else { + return Err(format!( + "Usage: {} ", + params.executable.to_string_lossy() + )); + }; + + Ok(params) +} + +pub fn main(opts: Peekable) -> ExitCode { + let params = parse_params(opts).unwrap_or_else(|error| { + eprintln!("{error}"); + exit(2); + }); + + if params.from == "-" && params.to == "-" + || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) + { + return ExitCode::SUCCESS; + } + + let (from_content, to_content) = match utils::read_both_files(¶ms.from, ¶ms.to) { + Ok(contents) => contents, + Err((filepath, error)) => { + eprintln!( + "{}", + utils::format_failure_to_read_input_file(¶ms.executable, &filepath, &error) + ); + return ExitCode::from(2); + } + }; + + // run diff + let mut output = stdout().lock(); + let result = side_diff::diff( + &from_content, + &to_content, + &mut output, + &side_diff::Params { + tabsize: params.tabsize, + width: params.width, + expand_tabs: params.expand_tabs, + }, + ); + + io::stdout().write_all(&result).unwrap(); + if result.is_empty() { + ExitCode::SUCCESS + } else { + ExitCode::from(1) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn os(s: &str) -> OsString { + OsString::from(s) + } + + #[test] + fn sdiff_params() { + assert_eq!( + Ok(Params { + executable: os("sdiff"), + from: os("foo"), + to: os("bar"), + ..Default::default() + }), + parse_params( + [os("sdiff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("sdiff"), + from: os("-"), + to: os("-"), + ..Default::default() + }), + parse_params([os("sdiff"), os("-"), os("-")].iter().cloned().peekable()) + ); + + assert!(parse_params( + [os("sdiff"), os("foo"), os("bar"), os("-"), os("-")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + + for option in ["-t", "--expand-tabs"] { + assert_eq!( + Ok(Params { + executable: os("sdiff"), + from: os("foo"), + to: os("bar"), + expand_tabs: true, + ..Default::default() + }), + parse_params( + [os("sdiff"), os(option), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + } + + assert_eq!( + Ok(Params { + executable: os("sdiff"), + from: os("foo"), + to: os("bar"), + width: 10, + ..Default::default() + }), + parse_params( + [os("sdiff"), os("--width=10"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert!(parse_params( + [os("sdiff"), os("--width=0"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + + assert!(parse_params( + [os("sdiff"), os("--width=.1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + + assert_eq!( + Ok(Params { + executable: os("sdiff"), + from: os("foo"), + to: os("bar"), + ..Default::default() + }), + parse_params( + [os("sdiff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("sdiff"), + from: os("foo"), + to: os("bar"), + tabsize: 1, + ..Default::default() + }), + parse_params( + [os("sdiff"), os("--tabsize=1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert!(parse_params( + [os("sdiff"), os("--tabsize=0"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + + assert_eq!( + Ok(Params { + executable: os("sdiff"), + from: os("foo"), + to: os("bar"), + tabsize: 42, + ..Default::default() + }), + parse_params( + [os("sdiff"), os("--tabsize=42"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert!(parse_params([os("sdiff")].iter().cloned().peekable()).is_err()); + + assert!(parse_params( + [os("sdiff"), os("--tabsize"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + + assert!(parse_params( + [os("sdiff"), os("--tabsize="), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + + assert!(parse_params( + [os("sdiff"), os("--tabsize=r2"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + + assert!(parse_params( + [os("sdiff"), os("--tabsize=-1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + + assert!(parse_params( + [os("sdiff"), os("--tabsize=r2"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + + assert!(parse_params( + [ + os("sdiff"), + os("--tabsize=92233720368547758088"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + .is_err()); + } +} diff --git a/src/side_diff.rs b/src/side_diff.rs index 56953d2..082b774 100644 --- a/src/side_diff.rs +++ b/src/side_diff.rs @@ -8,8 +8,6 @@ use diff::Result; use std::{io::Write, vec}; use unicode_width::UnicodeWidthStr; -use crate::params::Params; - const GUTTER_WIDTH_MIN: usize = 3; struct CharIter<'a> { @@ -306,6 +304,13 @@ fn push_output( Ok(()) } +#[derive(Default)] +pub struct Params { + pub width: usize, + pub tabsize: usize, + pub expand_tabs: bool, +} + pub fn diff( from_file: &[u8], to_file: &[u8], diff --git a/src/utils.rs b/src/utils.rs index daca18d..2e80814 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -4,7 +4,8 @@ // files that was distributed with this source code. use regex::Regex; -use std::{ffi::OsString, io::Write}; +use std::io::{self, Error, Read, Write}; +use std::{ffi::OsString, fs}; use unicode_width::UnicodeWidthStr; /// Replace tabs by spaces in the input line. @@ -87,15 +88,22 @@ pub fn format_failure_to_read_input_file( ) } -pub fn report_failure_to_read_input_file( - executable: &OsString, - filepath: &OsString, - error: &std::io::Error, -) { - eprintln!( - "{}", - format_failure_to_read_input_file(executable, filepath, error) - ); +pub fn read_file_contents(filepath: &OsString) -> io::Result> { + if filepath == "-" { + let mut content = Vec::new(); + io::stdin().read_to_end(&mut content).and(Ok(content)) + } else { + fs::read(filepath) + } +} + +pub fn read_both_files( + from: &OsString, + to: &OsString, +) -> Result<(Vec, Vec), (OsString, Error)> { + let from_content = read_file_contents(from).map_err(|e| (from.clone(), e))?; + let to_content = read_file_contents(to).map_err(|e| (to.clone(), e))?; + Ok((from_content, to_content)) } #[cfg(test)] @@ -145,6 +153,52 @@ mod tests { } } + mod read_file { + use super::*; + use tempfile::NamedTempFile; + + #[test] + fn read_two_valid_files() { + let content1 = "content-1"; + let content2 = "content-2"; + + let mut from_file = NamedTempFile::new().unwrap(); + let mut to_file = NamedTempFile::new().unwrap(); + + from_file.write_all(content1.as_bytes()).unwrap(); + to_file.write_all(content2.as_bytes()).unwrap(); + + let from_path = OsString::from(from_file.path()); + let to_path = OsString::from(to_file.path()); + + let res = read_both_files(&from_path, &to_path); + + assert!(res.is_ok()); + let (from_content, to_content) = res.unwrap(); + assert_eq!(from_content, content1.as_bytes()); + assert_eq!(to_content, content2.as_bytes()); + } + + #[test] + fn read_not_exist_file() { + let mut file = NamedTempFile::new().unwrap(); + file.write_all(b"valid-file").unwrap(); + let exist_file_path = OsString::from(file.path()); + + let non_exist_file_path = OsString::from("non-exist-file"); + + let res = read_both_files(&non_exist_file_path, &exist_file_path); + assert!(res.is_err()); + let (err_path, _) = res.unwrap_err(); + assert_eq!(err_path, non_exist_file_path); + + let res = read_both_files(&exist_file_path, &non_exist_file_path); + assert!(res.is_err()); + let (err_path, _) = res.unwrap_err(); + assert_eq!(err_path, non_exist_file_path); + } + } + mod write_line { use super::*; use pretty_assertions::assert_eq; diff --git a/tests/integration.rs b/tests/integration.rs index b37d7e6..3935181 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -57,7 +57,7 @@ mod common { #[cfg(windows)] let error_message = "The system cannot find the file specified."; - for subcmd in ["diff", "cmp"] { + for subcmd in ["diff", "cmp", "sdiff"] { let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg(subcmd); cmd.arg(&nopath).arg(file.path()); @@ -84,13 +84,13 @@ mod common { let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("diff"); cmd.arg(&nopath).arg(&nopath); - cmd.assert().code(predicate::eq(2)).failure().stderr( - predicate::str::contains(format!( + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::contains(format!( ": {}: {error_message}\n", &nopath.as_os_str().to_string_lossy() - )) - .count(2), - ); + ))); Ok(()) } @@ -888,3 +888,27 @@ mod cmp { Ok(()) } } + +mod sdiff { + use super::*; + + #[test] + fn differences() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("diff"); + cmd.arg(file1.path()).arg(file2.path()); + + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::is_empty().not()); + + Ok(()) + } +}