diff --git a/src/sed/command.rs b/src/sed/command.rs index 250bcbc..8605617 100644 --- a/src/sed/command.rs +++ b/src/sed/command.rs @@ -16,6 +16,8 @@ use crate::sed::script_line_provider::ScriptLineProvider; use std::cell::RefCell; use std::collections::HashMap; +use std::fs::File; +use std::io::BufReader; use std::path::PathBuf; // For file descriptors and equivalent use std::rc::Rc; use uucore::error::UResult; @@ -66,6 +68,8 @@ pub struct ProcessingContext { pub label_to_command_map: HashMap>>, /// Commands with a (latchable and resetable) address range pub range_commands: Vec>>, + /// Files read line-by-line by the GNU R command + pub read_line_files: HashMap>>, /// True if a substitution was made as specified in the t command pub substitution_made: bool, /// Elements to append at the end of each command processing cycle @@ -308,6 +312,7 @@ pub enum CommandData { BranchTarget(Option>>), // Commands for 'b', 't', '{' Label(Option), // Label name for 'b', 't', ':' Path(PathBuf), // File path for 'r' + ReadLineFile(Rc>), // File state for 'R' NamedWriter(Rc>), // File output for 'w' Number(usize), // Number for 'l', 'q', 'Q' (GNU) Substitution(Box), // Substitute command 's' @@ -315,6 +320,14 @@ pub enum CommandData { Transliteration(Box), // Transliteration command 'y' } +#[derive(Debug)] +/// Shared state for files read one line at a time by GNU sed's R command. +pub struct ReadLineFile { + pub path: PathBuf, + pub reader: Option>, + pub done: bool, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] /// Flag for space modifications pub enum SpaceFlag { diff --git a/src/sed/compiler.rs b/src/sed/compiler.rs index bcfff31..3bb745c 100644 --- a/src/sed/compiler.rs +++ b/src/sed/compiler.rs @@ -9,8 +9,8 @@ // file that was distributed with this source code. use crate::sed::command::{ - Address, Command, CommandData, ProcessingContext, ReplacementPart, ReplacementTemplate, - Substitution, Transliteration, + Address, Command, CommandData, ProcessingContext, ReadLineFile, ReplacementPart, + ReplacementTemplate, Substitution, Transliteration, }; use crate::sed::delimited_parser::{parse_char_escape, parse_regex, parse_transliteration}; use crate::sed::error_handling::{ScriptLocation, compilation_error, semantic_error}; @@ -1002,15 +1002,30 @@ fn compile_empty_command( Ok(CommandHandling::Continue) } -// Handles r +// Handles r, R fn compile_read_file_command( lines: &mut ScriptLineProvider, line: &mut ScriptCharProvider, cmd: &mut Command, - _context: &mut ProcessingContext, + context: &mut ProcessingContext, ) -> UResult { let path = read_file_path(lines, line)?; - cmd.data = CommandData::Path(path); + cmd.data = if cmd.code == 'R' { + let file_state = context + .read_line_files + .entry(path.clone()) + .or_insert_with(|| { + Rc::new(RefCell::new(ReadLineFile { + path, + reader: None, + done: false, + })) + }) + .clone(); + CommandData::ReadLineFile(file_state) + } else { + CommandData::Path(path) + }; Ok(CommandHandling::Continue) } @@ -1340,6 +1355,11 @@ fn get_cmd_spec( n_addr: 1, handler: compile_number_command, }), + // R is a GNU extension + 'R' if !posix => Ok(CommandSpec { + n_addr: 2, + handler: compile_read_file_command, + }), 'r' => Ok(CommandSpec { n_addr: if posix { 1 } else { 2 }, handler: compile_read_file_command, diff --git a/src/sed/mod.rs b/src/sed/mod.rs index 1206140..3f787c8 100644 --- a/src/sed/mod.rs +++ b/src/sed/mod.rs @@ -218,6 +218,7 @@ fn build_context(matches: &ArgMatches) -> ProcessingContext { parsed_block_nesting: 0, label_to_command_map: HashMap::new(), range_commands: Vec::new(), + read_line_files: HashMap::new(), substitution_made: false, append_elements: Vec::new(), } diff --git a/src/sed/processor.rs b/src/sed/processor.rs index 45d6b26..01e9e93 100644 --- a/src/sed/processor.rs +++ b/src/sed/processor.rs @@ -19,7 +19,8 @@ use crate::sed::named_writer; use std::borrow::Cow; use std::cell::RefCell; -use std::io::{self, IsTerminal}; +use std::fs::File; +use std::io::{self, BufRead, BufReader, IsTerminal}; use std::path::PathBuf; use std::rc::Rc; use uucore::display::Quotable; @@ -585,6 +586,33 @@ fn process_file( .append_elements .push(AppendElement::Path(path.clone())); } + 'R' => { + // Copy one line from the file to standard output later. + match &command.data { + CommandData::ReadLineFile(file_state) => { + let mut file_state = file_state.borrow_mut(); + if !file_state.done { + if file_state.reader.is_none() { + match File::open(&file_state.path) { + Ok(file) => file_state.reader = Some(BufReader::new(file)), + Err(_) => file_state.done = true, + } + } + + if let Some(reader) = &mut file_state.reader { + let mut line = String::new(); + match reader.read_line(&mut line) { + Ok(0) | Err(_) => file_state.done = true, + Ok(_) => context + .append_elements + .push(AppendElement::Text(Rc::from(line))), + } + } + } + } + _ => panic!("Expected ReadLineFile command data"), + } + } 's' => { substitute(&mut pattern, &command, context, output)?; } diff --git a/tests/by-util/test_sed.rs b/tests/by-util/test_sed.rs index b31fed7..a27046c 100644 --- a/tests/by-util/test_sed.rs +++ b/tests/by-util/test_sed.rs @@ -775,6 +775,53 @@ check_output!(read_ok, [format!("4r {LINES2}"), LINES1.to_string()]); check_output!(read_missing, ["5r /xyzzyxyzy42", LINES1]); check_output!(read_empty, ["6r input/empty", LINES1]); +#[test] +fn read_one_line_with_r_command() -> std::io::Result<()> { + let mut source = NamedTempFile::new()?; + source.write_all(b"1\n2\n")?; + let cmd = format!( + "1R{}\n2R{}", + source.path().display(), + source.path().display() + ); + + new_ucmd!() + .args(&["-e", &cmd]) + .pipe_in("x\ny\n") + .succeeds() + .stdout_is("x\n1\ny\n2\n"); + + Ok(()) +} + +#[test] +fn read_one_line_missing_and_eof_are_ignored() -> std::io::Result<()> { + let mut source = NamedTempFile::new()?; + source.write_all(b"1\n")?; + let cmd = format!( + "1R{}\n2R{}\n3R/xyzzyxyzy42", + source.path().display(), + source.path().display() + ); + + new_ucmd!() + .args(&["-e", &cmd]) + .pipe_in("x\ny\nz\n") + .succeeds() + .stdout_is("x\n1\ny\nz\n"); + + Ok(()) +} + +#[test] +fn read_one_line_rejected_in_posix_mode() { + new_ucmd!() + .args(&["--posix", "R /tmp/read-one-line"]) + .fails() + .code_is(1) + .stderr_is("sed: