From 3c9171feff6a62b054c03b3f0e689a07e0368020 Mon Sep 17 00:00:00 2001 From: Joining7943 <111500881+Joining7943@users.noreply.github.com> Date: Sun, 9 Apr 2023 15:55:27 +0200 Subject: [PATCH] tail: Refactor chunks.rs and small parts of tail.rs. Replace unwraps with error propagation ... Return io::Result from methods in chunks.rs to be able to replace unwraps with error propagation and return the bytes read from the source so far. Store how many bytes are read in LinesChunkBuffer. Fixes: tail does not output anything when there's no newline in the source and the `--follow` option is given. Flush the buffered writer after a write explicitly. Simplify the BytesChunkBuffer::fill and LinesChunkBuffer::fill methods and make use of some new helper methods in these structs. Add more unit tests for structs in chunks.rs --- src/uu/tail/src/chunks.rs | 600 +++++++++++++++++++++++++++----- src/uu/tail/src/follow/files.rs | 7 +- src/uu/tail/src/tail.rs | 96 ++--- 3 files changed, 575 insertions(+), 128 deletions(-) diff --git a/src/uu/tail/src/chunks.rs b/src/uu/tail/src/chunks.rs index 3aa380e20e..b34907d489 100644 --- a/src/uu/tail/src/chunks.rs +++ b/src/uu/tail/src/chunks.rs @@ -4,7 +4,7 @@ // * file that was distributed with this source code. //! Iterating over a file by chunks, either starting at the end of the file with [`ReverseChunks`] -//! or at the end of piped stdin with [`LinesChunk`] or [`BytesChunk`]. +//! or else with [`LinesChunk`] or [`BytesChunk`]. //! //! Use [`ReverseChunks::new`] to create a new iterator over chunks of bytes from the file. @@ -12,8 +12,7 @@ use std::collections::VecDeque; use std::fs::File; -use std::io::{BufRead, Read, Seek, SeekFrom, Write}; -use uucore::error::UResult; +use std::io::{self, Read, Seek, SeekFrom, Write}; /// When reading files in reverse in `bounded_tail`, this is the size of each /// block read at a time. @@ -46,26 +45,28 @@ pub struct ReverseChunks<'a> { } impl<'a> ReverseChunks<'a> { - pub fn new(file: &'a mut File) -> ReverseChunks<'a> { + pub fn new(file: &'a mut File) -> io::Result> { + // TODO: why is this platform dependent ? let current = if cfg!(unix) { - file.stream_position().unwrap() + file.stream_position()? } else { 0 }; - let size = file.seek(SeekFrom::End(0)).unwrap() - current; + let size = file.seek(SeekFrom::End(0))? - current; + // TODO: is the cast to usize safe ? on 32-bit systems ? let max_blocks_to_read = (size as f64 / BLOCK_SIZE as f64).ceil() as usize; let block_idx = 0; - ReverseChunks { + Ok(ReverseChunks { file, size, max_blocks_to_read, block_idx, - } + }) } } impl<'a> Iterator for ReverseChunks<'a> { - type Item = Vec; + type Item = io::Result>; fn next(&mut self) -> Option { // If there are no more chunks to read, terminate the iterator. @@ -85,22 +86,24 @@ impl<'a> Iterator for ReverseChunks<'a> { // Seek backwards by the next chunk, read the full chunk into // `buf`, and then seek back to the start of the chunk again. let mut buf = vec![0; BLOCK_SIZE as usize]; - let pos = self - .file - .seek(SeekFrom::Current(-(block_size as i64))) - .unwrap(); - self.file - .read_exact(&mut buf[0..(block_size as usize)]) - .unwrap(); - let pos2 = self - .file - .seek(SeekFrom::Current(-(block_size as i64))) - .unwrap(); + let pos = match self.file.seek(SeekFrom::Current(-(block_size as i64))) { + Ok(pos) => pos, + Err(error) => return Some(Err(error)), + }; + + if let Err(error) = self.file.read_exact(&mut buf[0..(block_size as usize)]) { + return Some(Err(error)); + } + + let pos2 = match self.file.seek(SeekFrom::Current(-(block_size as i64))) { + Ok(pos) => pos, + Err(error) => return Some(Err(error)), + }; assert_eq!(pos, pos2); self.block_idx += 1; - Some(buf[0..(block_size as usize)].to_vec()) + Some(Ok(buf[0..(block_size as usize)].to_vec())) } } @@ -202,15 +205,27 @@ impl BytesChunk { &self.buffer[offset..self.bytes] } + /// Return true if the [`BytesChunk`] has bytes stored. pub fn has_data(&self) -> bool { self.bytes > 0 } - /// Fills `self.buffer` with maximal [`BUFFER_SIZE`] number of bytes, draining the reader by - /// that number of bytes. If EOF is reached (so 0 bytes are read), then returns - /// [`UResult`] or else the result with [`Some(bytes)`] where bytes is the number of bytes - /// read from the source. - pub fn fill(&mut self, filehandle: &mut impl BufRead) -> UResult> { + /// Return true if the [`BytesChunk`] has no bytes stored. + pub fn is_empty(&self) -> bool { + !self.has_data() + } + + /// Return the amount of bytes stored in this [`BytesChunk`]. + pub fn len(&self) -> usize { + self.bytes + } + + /// Fills `self.buffer` with maximal [`BUFFER_SIZE`] number of bytes, + /// draining the reader by that number of bytes. If EOF is reached (so 0 + /// bytes are read), then returns [`io::Result`] or else the result + /// with [`io::Result io::Result> { let num_bytes = filehandle.read(&mut self.buffer)?; self.bytes = num_bytes; if num_bytes == 0 { @@ -285,18 +300,20 @@ impl BytesChunkBuffer { /// let mut chunks = BytesChunkBuffer::new(num_print); /// chunks.fill(&mut reader).unwrap(); /// ``` - pub fn fill(&mut self, reader: &mut impl BufRead) -> UResult<()> { + pub fn fill(&mut self, reader: &mut impl Read) -> io::Result { + if self.num_print == 0 { + return Ok(0); + } + let mut chunk = Box::new(BytesChunk::new()); // fill chunks with all bytes from reader and reuse already instantiated chunks if possible while (chunk.fill(reader)?).is_some() { - self.bytes += chunk.bytes as u64; - self.chunks.push_back(chunk); + self.push_back(chunk); let first = &self.chunks[0]; if self.bytes - first.bytes as u64 > self.num_print { - chunk = self.chunks.pop_front().unwrap(); - self.bytes -= chunk.bytes as u64; + chunk = self.pop_front().unwrap(); } else { chunk = Box::new(BytesChunk::new()); } @@ -304,31 +321,61 @@ impl BytesChunkBuffer { // quit early if there are no chunks for example in case the pipe was empty if self.chunks.is_empty() { - return Ok(()); + return Ok(0); } - let chunk = self.chunks.pop_front().unwrap(); - // calculate the offset in the first chunk and put the calculated chunk as first element in // the self.chunks collection. The calculated offset must be in the range 0 to BUFFER_SIZE // and is therefore safely convertible to a usize without losses. let offset = self.bytes.saturating_sub(self.num_print) as usize; - self.chunks - .push_front(Box::new(BytesChunk::from_chunk(&chunk, offset))); - Ok(()) + let chunk = self.pop_front().unwrap(); + let chunk = Box::new(BytesChunk::from_chunk(&chunk, offset)); + self.push_front(chunk); + + Ok(self.bytes) } - pub fn print(&self, mut writer: impl Write) -> UResult<()> { + /// Print the whole [`BytesChunkBuffer`]. + pub fn print(&self, writer: &mut impl Write) -> io::Result<()> { for chunk in &self.chunks { writer.write_all(chunk.get_buffer())?; } Ok(()) } + /// Return true if the [`BytesChunkBuffer`] has bytes stored. pub fn has_data(&self) -> bool { !self.chunks.is_empty() } + + /// Return the amount of bytes this [`BytesChunkBuffer`] has stored. + pub fn get_bytes(&self) -> u64 { + self.bytes + } + + /// Return and remove the first [`BytesChunk`] stored in this [`BytesChunkBuffer`]. + fn pop_front(&mut self) -> Option> { + let chunk = self.chunks.pop_front(); + if let Some(chunk) = chunk { + self.bytes -= chunk.bytes as u64; + Some(chunk) + } else { + None + } + } + + /// Add a [`BytesChunk`] at the start of this [`BytesChunkBuffer`]. + fn push_front(&mut self, chunk: Box) { + self.bytes += chunk.bytes as u64; + self.chunks.push_front(chunk); + } + + /// Add a [`BytesChunk`] at the end of this [`BytesChunkBuffer`]. + fn push_back(&mut self, chunk: Box) { + self.bytes += chunk.bytes as u64; + self.chunks.push_back(chunk); + } } /// Works similar to a [`BytesChunk`] but also stores the number of lines encountered in the current @@ -346,6 +393,7 @@ pub struct LinesChunk { } impl LinesChunk { + /// Create a new [`LinesChunk`] with an arbitrary `u8` as `delimiter`. pub fn new(delimiter: u8) -> Self { Self { chunk: BytesChunk::new(), @@ -433,6 +481,16 @@ impl LinesChunk { self.chunk.has_data() } + /// Return true if the [`LinesChunk`] has no bytes stored. + pub fn is_empty(&self) -> bool { + !self.has_data() + } + + /// Return the amount of bytes stored in this [`LinesChunk`]. + pub fn len(&self) -> usize { + self.chunk.len() + } + /// Returns this buffer safely. See [`BytesChunk::get_buffer`] /// /// returns: &[u8] with length `self.bytes` @@ -458,7 +516,7 @@ impl LinesChunk { /// that number of bytes. This function works like the [`BytesChunk::fill`] function besides /// that this function also counts and stores the number of lines encountered while reading from /// the `filehandle`. - pub fn fill(&mut self, filehandle: &mut impl BufRead) -> UResult> { + pub fn fill(&mut self, filehandle: &mut impl Read) -> io::Result> { match self.chunk.fill(filehandle)? { None => { self.lines = 0; @@ -514,7 +572,7 @@ impl LinesChunk { /// /// * `writer`: must implement [`Write`] /// * `offset`: An offset in number of lines. - pub fn print_lines(&self, writer: &mut impl Write, offset: usize) -> UResult<()> { + pub fn print_lines(&self, writer: &mut impl Write, offset: usize) -> io::Result { self.print_bytes(writer, self.calculate_bytes_offset_from(offset)) } @@ -524,9 +582,10 @@ impl LinesChunk { /// /// * `writer`: must implement [`Write`] /// * `offset`: An offset in number of bytes. - pub fn print_bytes(&self, writer: &mut impl Write, offset: usize) -> UResult<()> { - writer.write_all(self.get_buffer_with(offset))?; - Ok(()) + pub fn print_bytes(&self, writer: &mut impl Write, offset: usize) -> io::Result { + let buffer = self.get_buffer_with(offset); + writer.write_all(buffer)?; + Ok(buffer.len()) } } @@ -544,6 +603,8 @@ pub struct LinesChunkBuffer { num_print: u64, /// Stores the [`LinesChunk`] chunks: VecDeque>, + /// The total amount of bytes stored in all chunks + bytes: u64, } impl LinesChunkBuffer { @@ -554,35 +615,33 @@ impl LinesChunkBuffer { num_print, lines: 0, chunks: VecDeque::new(), + bytes: 0, } } - /// Fills this buffer with chunks and consumes the reader completely. This method ensures that - /// there are exactly as many chunks as needed to match `self.num_print` lines, so there are - /// in sum exactly `self.num_print` lines stored in all chunks. The method returns an iterator - /// over these chunks. If there are no chunks, for example because the piped stdin contained no - /// lines, or `num_print = 0` then `iterator.next` will return None. - pub fn fill(&mut self, reader: &mut impl BufRead) -> UResult<()> { - let mut chunk = Box::new(LinesChunk::new(self.delimiter)); + /// Fills this buffer with chunks and consumes the reader completely. + /// + /// This method ensures that there are exactly as many chunks as needed to match + /// `self.num_print` lines, so there are in sum exactly `self.num_print` lines stored in all + /// chunks. + pub fn fill(&mut self, reader: &mut impl Read) -> io::Result { + if self.num_print == 0 { + return Ok(0); + } + let mut chunk = Box::new(LinesChunk::new(self.delimiter)); while (chunk.fill(reader)?).is_some() { - self.lines += chunk.lines as u64; - self.chunks.push_back(chunk); + self.push_back(chunk); let first = &self.chunks[0]; if self.lines - first.lines as u64 > self.num_print { - chunk = self.chunks.pop_front().unwrap(); - - self.lines -= chunk.lines as u64; + chunk = self.pop_front().unwrap(); } else { chunk = Box::new(LinesChunk::new(self.delimiter)); } } - if self.chunks.is_empty() { - // chunks is empty when a file is empty so quitting early here - return Ok(()); - } else { + if self.has_data() { let length = &self.chunks.len(); let last = &mut self.chunks[length - 1]; if !last.get_buffer().ends_with(&[self.delimiter]) { @@ -593,41 +652,95 @@ impl LinesChunkBuffer { // skip unnecessary chunks and save the first chunk which may hold some lines we have to // print - let chunk = loop { - // it's safe to call unwrap here because there is at least one chunk and sorting out - // more chunks than exist shouldn't be possible. - let chunk = self.chunks.pop_front().unwrap(); - - // skip is true as long there are enough lines left in the other stored chunks. - let skip = self.lines - chunk.lines as u64 > self.num_print; - if skip { - self.lines -= chunk.lines as u64; - } else { - break chunk; + while let Some(chunk) = self.pop_front() { + // this is false as long there are enough lines left in the other stored chunks. + if self.lines <= self.num_print { + // Calculate the number of lines to skip in the current chunk. The calculated value must be + // in the range 0 to BUFFER_SIZE and is therefore safely convertible to a usize without + // losses. + let skip_lines = + (self.lines + chunk.lines as u64).saturating_sub(self.num_print) as usize; + + let chunk = Box::new(LinesChunk::from_chunk(&chunk, skip_lines)); + self.push_front(chunk); + break; } - }; - - // Calculate the number of lines to skip in the current chunk. The calculated value must be - // in the range 0 to BUFFER_SIZE and is therefore safely convertible to a usize without - // losses. - let skip_lines = self.lines.saturating_sub(self.num_print) as usize; - let chunk = LinesChunk::from_chunk(&chunk, skip_lines); - self.chunks.push_front(Box::new(chunk)); + } - Ok(()) + Ok(self.bytes) } - pub fn print(&self, mut writer: impl Write) -> UResult<()> { + /// Writes the whole [`LinesChunkBuffer`] into a `writer`. + pub fn print(&self, writer: &mut impl Write) -> io::Result<()> { for chunk in &self.chunks { - chunk.print_bytes(&mut writer, 0)?; + chunk.print_bytes(writer, 0)?; } Ok(()) } + + /// Return the amount of lines this buffer has stored. + pub fn get_lines(&self) -> u64 { + self.lines + } + + /// Return true if this buffer has stored any bytes. + pub fn has_data(&self) -> bool { + !self.chunks.is_empty() + } + + /// Return and remove the first [`LinesChunk`] stored in this [`LinesChunkBuffer`]. + fn pop_front(&mut self) -> Option> { + let chunk = self.chunks.pop_front(); + if let Some(chunk) = chunk { + self.bytes -= chunk.len() as u64; + self.lines -= chunk.lines as u64; + Some(chunk) + } else { + None + } + } + + /// Add a [`LinesChunk`] at the end of this [`LinesChunkBuffer`]. + fn push_back(&mut self, chunk: Box) { + self.bytes += chunk.len() as u64; + self.lines += chunk.lines as u64; + self.chunks.push_back(chunk); + } + + /// Add a [`LinesChunk`] at the start of this [`LinesChunkBuffer`]. + fn push_front(&mut self, chunk: Box) { + self.bytes += chunk.len() as u64; + self.lines += chunk.lines as u64; + self.chunks.push_front(chunk); + } } #[cfg(test)] mod tests { - use crate::chunks::{BytesChunk, BUFFER_SIZE}; + use super::*; + use std::io::{BufWriter, Cursor}; + + fn fill_lines_chunk(chunk: &mut LinesChunk, data: &str) -> usize { + let mut cursor = Cursor::new(data); + let result = chunk.fill(&mut cursor); + assert!(result.is_ok()); + let option = result.unwrap(); + option.unwrap_or(0) + } + + fn fill_lines_chunk_buffer(buffer: &mut LinesChunkBuffer, data: &str) -> u64 { + let mut cursor = Cursor::new(data); + let result = buffer.fill(&mut cursor); + assert!(result.is_ok()); + result.unwrap() + } + + fn fill_bytes_chunk_buffer(buffer: &mut BytesChunkBuffer, data: &str) -> u64 { + let mut cursor = Cursor::new(data); + let result = buffer.fill(&mut cursor); + assert!(result.is_ok()); + result.unwrap() + } #[test] fn test_bytes_chunk_from_when_offset_is_zero() { @@ -701,4 +814,327 @@ mod tests { let new_chunk = BytesChunk::from_chunk(&chunk, 1); assert_eq!(0, new_chunk.bytes); } + + #[test] + fn test_lines_chunk_fill_when_unix_line_endings() { + let mut chunk = LinesChunk::new(b'\n'); + + let bytes = fill_lines_chunk(&mut chunk, ""); + assert_eq!(bytes, 0); + assert_eq!(chunk.get_lines(), 0); + + let bytes = fill_lines_chunk(&mut chunk, "\n"); + assert_eq!(bytes, 1); + assert_eq!(chunk.get_lines(), 1); + + let bytes = fill_lines_chunk(&mut chunk, "a"); + assert_eq!(bytes, 1); + assert_eq!(chunk.get_lines(), 0); + + let bytes = fill_lines_chunk(&mut chunk, "aa"); + assert_eq!(bytes, 2); + assert_eq!(chunk.get_lines(), 0); + + let bytes = fill_lines_chunk(&mut chunk, "a".repeat(BUFFER_SIZE).as_str()); + assert_eq!(bytes, BUFFER_SIZE); + assert_eq!(chunk.get_lines(), 0); + + let bytes = fill_lines_chunk(&mut chunk, "a".repeat(BUFFER_SIZE + 1).as_str()); + assert_eq!(bytes, BUFFER_SIZE); + assert_eq!(chunk.get_lines(), 0); + + let bytes = fill_lines_chunk(&mut chunk, "a\n".repeat(BUFFER_SIZE / 2).as_str()); + assert_eq!(bytes, BUFFER_SIZE); + assert_eq!(chunk.get_lines(), BUFFER_SIZE / 2); + + let bytes = fill_lines_chunk(&mut chunk, "a\n".repeat(BUFFER_SIZE).as_str()); + assert_eq!(bytes, BUFFER_SIZE); + assert_eq!(chunk.get_lines(), BUFFER_SIZE / 2); + + let bytes = fill_lines_chunk(&mut chunk, "\n".repeat(BUFFER_SIZE).as_str()); + assert_eq!(bytes, BUFFER_SIZE); + assert_eq!(chunk.get_lines(), BUFFER_SIZE); + + let bytes = fill_lines_chunk(&mut chunk, "\n".repeat(BUFFER_SIZE + 1).as_str()); + assert_eq!(bytes, BUFFER_SIZE); + assert_eq!(chunk.get_lines(), BUFFER_SIZE); + } + + #[test] + fn test_lines_chunk_fill_when_windows_line_endings() { + let mut chunk = LinesChunk::new(b'\n'); + + let bytes = fill_lines_chunk(&mut chunk, "\r\n"); + assert_eq!(bytes, 2); + assert_eq!(chunk.get_lines(), 1); + + let bytes = fill_lines_chunk(&mut chunk, "a\r\n"); + assert_eq!(bytes, 3); + assert_eq!(chunk.get_lines(), 1); + + let bytes = fill_lines_chunk(&mut chunk, "a\r\na"); + assert_eq!(bytes, 4); + assert_eq!(chunk.get_lines(), 1); + + let bytes = fill_lines_chunk(&mut chunk, "a\r\na\r\n"); + assert_eq!(bytes, 6); + assert_eq!(chunk.get_lines(), 2); + + let bytes = fill_lines_chunk(&mut chunk, "\r\n".repeat(BUFFER_SIZE / 2).as_str()); + assert_eq!(bytes, BUFFER_SIZE); + assert_eq!(chunk.get_lines(), BUFFER_SIZE / 2); + + // tests the correct amount of lines when \r\n is split across different chunks + let mut data = "\r\n".repeat(BUFFER_SIZE / 2 - 1); + data.push('a'); + data.push('\r'); + data.push('\n'); + let bytes = fill_lines_chunk(&mut chunk, data.as_str()); + assert_eq!(bytes, BUFFER_SIZE); + assert_eq!(chunk.get_lines(), BUFFER_SIZE / 2 - 1); + } + + #[test] + fn test_lines_chunk_when_print_lines_no_offset_then_correct_amount_of_bytes() { + let mut chunk = LinesChunk::new(b'\n'); + let expected = fill_lines_chunk(&mut chunk, ""); + + let mut writer = BufWriter::new(vec![]); + let result = chunk.print_lines(&mut writer, 0); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), expected); + + let expected = fill_lines_chunk(&mut chunk, "a"); + let result = chunk.print_lines(&mut writer, 0); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), expected); + + let expected = fill_lines_chunk(&mut chunk, "\n"); + let result = chunk.print_lines(&mut writer, 0); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), expected); + + let expected = fill_lines_chunk(&mut chunk, "a\n"); + let result = chunk.print_lines(&mut writer, 0); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), expected); + + let expected = fill_lines_chunk(&mut chunk, "\n".repeat(BUFFER_SIZE).as_str()); + let result = chunk.print_lines(&mut writer, 0); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), expected); + + fill_lines_chunk(&mut chunk, "a\n".repeat(BUFFER_SIZE / 2 + 1).as_str()); + let result = chunk.print_lines(&mut writer, 0); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), BUFFER_SIZE); + } + + #[test] + fn test_lines_chunk_when_print_lines_with_offset_then_correct_amount_of_bytes() { + let mut chunk = LinesChunk::new(b'\n'); + fill_lines_chunk(&mut chunk, ""); + + let mut writer = BufWriter::new(vec![]); + let result = chunk.print_lines(&mut writer, 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + + fill_lines_chunk(&mut chunk, "a"); + let result = chunk.print_lines(&mut writer, 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + + fill_lines_chunk(&mut chunk, "a"); + let result = chunk.print_lines(&mut writer, 2); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + + fill_lines_chunk(&mut chunk, "a"); + let result = chunk.print_lines(&mut writer, 100); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + + fill_lines_chunk(&mut chunk, "a\n"); + let result = chunk.print_lines(&mut writer, 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + + fill_lines_chunk(&mut chunk, "a\n\n"); + let result = chunk.print_lines(&mut writer, 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 1); + + fill_lines_chunk(&mut chunk, "a\na\n"); + let result = chunk.print_lines(&mut writer, 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 2); + + fill_lines_chunk(&mut chunk, "a\na\n"); + let result = chunk.print_lines(&mut writer, 2); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + + fill_lines_chunk(&mut chunk, "a\naa\n"); + let result = chunk.print_lines(&mut writer, 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 3); + + fill_lines_chunk(&mut chunk, "a".repeat(BUFFER_SIZE).as_str()); + let result = chunk.print_lines(&mut writer, 0); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), BUFFER_SIZE); + + fill_lines_chunk(&mut chunk, "a".repeat(BUFFER_SIZE).as_str()); + let result = chunk.print_lines(&mut writer, 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + + fill_lines_chunk(&mut chunk, "\n".repeat(BUFFER_SIZE).as_str()); + let result = chunk.print_lines(&mut writer, 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), BUFFER_SIZE - 1); + + fill_lines_chunk(&mut chunk, "\n".repeat(BUFFER_SIZE).as_str()); + let result = chunk.print_lines(&mut writer, BUFFER_SIZE - 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 1); + + fill_lines_chunk(&mut chunk, "\n".repeat(BUFFER_SIZE).as_str()); + let result = chunk.print_lines(&mut writer, BUFFER_SIZE); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + } + + #[test] + fn test_lines_chunk_buffer_fill_when_num_print_is_equal_to_size() { + let size = 0; + let mut buffer = LinesChunkBuffer::new(b'\n', size as u64); + let bytes = fill_lines_chunk_buffer(&mut buffer, ""); + assert_eq!(buffer.get_lines(), 0); + assert_eq!(bytes, size as u64); + assert!(!buffer.has_data()); + + let size = 1; + let mut buffer = LinesChunkBuffer::new(b'\n', size as u64); + let bytes = fill_lines_chunk_buffer(&mut buffer, "a"); + assert_eq!(buffer.get_lines(), 1); + assert_eq!(bytes, size as u64); + + let size = 1; + let mut buffer = LinesChunkBuffer::new(b'\n', size as u64); + let bytes = fill_lines_chunk_buffer(&mut buffer, "\n"); + assert_eq!(buffer.get_lines(), 1); + assert_eq!(bytes, size as u64); + + let size = BUFFER_SIZE + 1; + let mut buffer = LinesChunkBuffer::new(b'\n', size as u64); + let bytes = fill_lines_chunk_buffer(&mut buffer, "\n".repeat(size).as_str()); + assert_eq!(buffer.get_lines(), size as u64); + assert_eq!(bytes, size as u64); + + let size = BUFFER_SIZE + 1; + let mut data = "a".repeat(BUFFER_SIZE); + data.push('\n'); + let mut buffer = LinesChunkBuffer::new(b'\n', size as u64); + let bytes = fill_lines_chunk_buffer(&mut buffer, data.as_str()); + assert_eq!(buffer.get_lines(), 1); + assert_eq!(bytes, size as u64); + + let size = BUFFER_SIZE + 1; + let mut data = "a".repeat(BUFFER_SIZE - 1); + data.push('\n'); + data.push('\n'); + let mut buffer = LinesChunkBuffer::new(b'\n', size as u64); + let bytes = fill_lines_chunk_buffer(&mut buffer, data.as_str()); + assert_eq!(buffer.get_lines(), 2); + assert_eq!(bytes, size as u64); + + let size = BUFFER_SIZE * 2; + let mut buffer = LinesChunkBuffer::new(b'\n', size as u64); + let bytes = fill_lines_chunk_buffer(&mut buffer, "a".repeat(size).as_str()); + assert_eq!(buffer.get_lines(), 1); + assert_eq!(bytes, size as u64); + } + + #[test] + fn test_lines_chunk_buffer_fill_when_num_print_is_not_equal_to_size() { + let size = 0; + let mut buffer = LinesChunkBuffer::new(b'\n', 1); + let bytes = fill_lines_chunk_buffer(&mut buffer, ""); + assert_eq!(buffer.get_lines(), 0); + assert_eq!(bytes, size as u64); + assert!(!buffer.has_data()); + + let size = 1; + let mut buffer = LinesChunkBuffer::new(b'\n', 2); + let bytes = fill_lines_chunk_buffer(&mut buffer, "a"); + assert_eq!(buffer.get_lines(), 1); + assert_eq!(bytes, size as u64); + + let mut buffer = LinesChunkBuffer::new(b'\n', 0); + let bytes = fill_lines_chunk_buffer(&mut buffer, "a"); + assert_eq!(buffer.get_lines(), 0); + assert_eq!(bytes, 0); + assert!(!buffer.has_data()); + } + + #[test] + fn test_bytes_chunk_buffer_fill_when_num_print_is_equal_to_size() { + let size = 0; + let mut buffer = BytesChunkBuffer::new(size as u64); + let bytes = fill_bytes_chunk_buffer(&mut buffer, ""); + assert_eq!(buffer.get_bytes(), 0); + assert_eq!(bytes, size as u64); + assert!(!buffer.has_data()); + + let size = 1; + let mut buffer = BytesChunkBuffer::new(size as u64); + let bytes = fill_bytes_chunk_buffer(&mut buffer, "a"); + assert_eq!(buffer.get_bytes(), 1); + assert_eq!(bytes, size as u64); + assert!(buffer.has_data()); + + let size = BUFFER_SIZE; + let mut buffer = BytesChunkBuffer::new(size as u64); + let bytes = fill_bytes_chunk_buffer(&mut buffer, "a".repeat(size).as_str()); + assert_eq!(buffer.get_bytes(), size as u64); + assert_eq!(bytes, size as u64); + assert!(buffer.has_data()); + + let size = BUFFER_SIZE + 1; + let mut buffer = BytesChunkBuffer::new(size as u64); + let bytes = fill_bytes_chunk_buffer(&mut buffer, "a".repeat(size).as_str()); + assert_eq!(buffer.get_bytes(), size as u64); + assert_eq!(bytes, size as u64); + assert!(buffer.has_data()); + + let size = BUFFER_SIZE * 2; + let mut buffer = BytesChunkBuffer::new(size as u64); + let bytes = fill_bytes_chunk_buffer(&mut buffer, "a".repeat(size).as_str()); + assert_eq!(buffer.get_bytes(), size as u64); + assert_eq!(bytes, size as u64); + assert!(buffer.has_data()); + } + + #[test] + fn test_bytes_chunk_buffer_fill_when_num_print_is_not_equal_to_size() { + let mut buffer = BytesChunkBuffer::new(0); + let bytes = fill_bytes_chunk_buffer(&mut buffer, "a"); + assert_eq!(buffer.get_bytes(), 0); + assert_eq!(bytes, 0); + assert!(!buffer.has_data()); + + let mut buffer = BytesChunkBuffer::new(1); + let bytes = fill_bytes_chunk_buffer(&mut buffer, ""); + assert_eq!(buffer.get_bytes(), 0); + assert_eq!(bytes, 0); + assert!(!buffer.has_data()); + + let mut buffer = BytesChunkBuffer::new(2); + let bytes = fill_bytes_chunk_buffer(&mut buffer, "a"); + assert_eq!(buffer.get_bytes(), 1); + assert_eq!(bytes, 1); + assert!(buffer.has_data()); + } } diff --git a/src/uu/tail/src/follow/files.rs b/src/uu/tail/src/follow/files.rs index 8686e73f41..74639243e5 100644 --- a/src/uu/tail/src/follow/files.rs +++ b/src/uu/tail/src/follow/files.rs @@ -12,7 +12,7 @@ use crate::text; use std::collections::hash_map::Keys; use std::collections::HashMap; use std::fs::{File, Metadata}; -use std::io::{stdout, BufRead, BufReader, BufWriter}; +use std::io::{stdout, BufRead, BufReader, BufWriter, Write}; use std::path::{Path, PathBuf}; use uucore::error::UResult; @@ -147,8 +147,9 @@ impl FileHandling { } let stdout = stdout(); - let writer = BufWriter::new(stdout.lock()); - chunks.print(writer)?; + let mut writer = BufWriter::new(stdout.lock()); + chunks.print(&mut writer)?; + writer.flush()?; self.last.replace(path.to_owned()); self.update_metadata(path, None); diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index e07616c6f5..749b21a212 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -151,7 +151,7 @@ fn tail_file( && file.is_seekable(if input.is_stdin() { offset } else { 0 }) && metadata.as_ref().unwrap().get_block_size() > 0 { - bounded_tail(&mut file, settings); + bounded_tail(&mut file, settings)?; reader = BufReader::new(file); } else { reader = BufReader::new(file); @@ -287,11 +287,7 @@ fn tail_stdin( /// let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap(); /// assert_eq!(i, 2); /// ``` -fn forwards_thru_file( - reader: &mut R, - num_delimiters: u64, - delimiter: u8, -) -> std::io::Result +fn forwards_thru_file(reader: &mut R, num_delimiters: u64, delimiter: u8) -> io::Result where R: Read, { @@ -320,12 +316,14 @@ where /// Iterate over bytes in the file, in reverse, until we find the /// `num_delimiters` instance of `delimiter`. The `file` is left seek'd to the /// position just after that delimiter. -fn backwards_thru_file(file: &mut File, num_delimiters: u64, delimiter: u8) { +fn backwards_thru_file(file: &mut File, num_delimiters: u64, delimiter: u8) -> io::Result<()> { // This variable counts the number of delimiters found in the file // so far (reading from the end of the file toward the beginning). let mut counter = 0; - for (block_idx, slice) in ReverseChunks::new(file).enumerate() { + for (block_idx, slice) in ReverseChunks::new(file)?.enumerate() { + let slice = slice?; + // Iterate over each byte in the slice in reverse order. let mut iter = slice.iter().enumerate().rev(); @@ -350,12 +348,14 @@ fn backwards_thru_file(file: &mut File, num_delimiters: u64, delimiter: u8) { // cursor in the file is at the *beginning* of the // block, so seeking forward by `i + 1` bytes puts // us right after the found delimiter. - file.seek(SeekFrom::Current((i + 1) as i64)).unwrap(); - return; + file.seek(SeekFrom::Current((i + 1) as i64))?; + return Ok(()); } } } } + + Ok(()) } /// When tail'ing a file, we do not need to read the whole file from start to @@ -363,54 +363,54 @@ fn backwards_thru_file(file: &mut File, num_delimiters: u64, delimiter: u8) { /// end of the file, and then read the file "backwards" in blocks of size /// `BLOCK_SIZE` until we find the location of the first line/byte. This ends up /// being a nice performance win for very large files. -fn bounded_tail(file: &mut File, settings: &Settings) { +fn bounded_tail(file: &mut File, settings: &Settings) -> io::Result { debug_assert!(!settings.presume_input_pipe); // Find the position in the file to start printing from. match &settings.mode { FilterMode::Lines(Signum::Negative(count), delimiter) => { - backwards_thru_file(file, *count, *delimiter); + backwards_thru_file(file, *count, *delimiter)?; } FilterMode::Lines(Signum::Positive(count), delimiter) if count > &1 => { - let i = forwards_thru_file(file, *count - 1, *delimiter).unwrap(); - file.seek(SeekFrom::Start(i as u64)).unwrap(); + let i = forwards_thru_file(file, *count - 1, *delimiter)?; + file.seek(SeekFrom::Start(i as u64))?; } FilterMode::Lines(Signum::MinusZero, _) => { - return; + return Ok(0); } FilterMode::Bytes(Signum::Negative(count)) => { - let len = file.seek(SeekFrom::End(0)).unwrap(); - file.seek(SeekFrom::End(-((*count).min(len) as i64))) - .unwrap(); + let len = file.seek(SeekFrom::End(0))?; + file.seek(SeekFrom::End(-((*count).min(len) as i64)))?; } FilterMode::Bytes(Signum::Positive(count)) if count > &1 => { // GNU `tail` seems to index bytes and lines starting at 1, not // at 0. It seems to treat `+0` and `+1` as the same thing. - file.seek(SeekFrom::Start(*count - 1)).unwrap(); + file.seek(SeekFrom::Start(*count - 1))?; } FilterMode::Bytes(Signum::MinusZero) => { - return; + return Ok(0); } _ => {} } // Print the target section of the file. let stdout = stdout(); - let mut stdout = stdout.lock(); - std::io::copy(file, &mut stdout).unwrap(); + let mut writer = BufWriter::new(stdout.lock()); + io::copy(file, &mut writer).and_then(|a| writer.flush().map(|_| a)) } -fn unbounded_tail(reader: &mut BufReader, settings: &Settings) -> UResult<()> { +fn unbounded_tail(reader: &mut BufReader, settings: &Settings) -> io::Result { let stdout = stdout(); let mut writer = BufWriter::new(stdout.lock()); - match &settings.mode { + let result = match &settings.mode { FilterMode::Lines(Signum::Negative(count), sep) => { let mut chunks = chunks::LinesChunkBuffer::new(*sep, *count); - chunks.fill(reader)?; + let bytes = chunks.fill(reader)?; chunks.print(&mut writer)?; + Ok(bytes) } FilterMode::Lines(Signum::PlusZero | Signum::Positive(1), _) => { - io::copy(reader, &mut writer)?; + io::copy(reader, &mut writer) } FilterMode::Lines(Signum::Positive(count), sep) => { let mut num_skip = *count - 1; @@ -424,50 +424,60 @@ fn unbounded_tail(reader: &mut BufReader, settings: &Settings) -> UR } } if chunk.has_data() { - chunk.print_lines(&mut writer, num_skip as usize)?; - io::copy(reader, &mut writer)?; + let bytes = chunk.print_lines(&mut writer, num_skip as usize)?; + io::copy(reader, &mut writer).map(|b| b + bytes as u64) + } else { + Ok(0) } } FilterMode::Bytes(Signum::Negative(count)) => { let mut chunks = chunks::BytesChunkBuffer::new(*count); - chunks.fill(reader)?; + let bytes = chunks.fill(reader)?; chunks.print(&mut writer)?; + Ok(bytes) } - FilterMode::Bytes(Signum::PlusZero | Signum::Positive(1)) => { - io::copy(reader, &mut writer)?; - } + FilterMode::Bytes(Signum::PlusZero | Signum::Positive(1)) => io::copy(reader, &mut writer), FilterMode::Bytes(Signum::Positive(count)) => { let mut num_skip = *count - 1; let mut chunk = chunks::BytesChunk::new(); - loop { + let mut sum_bytes = 0u64; + let bytes = loop { if let Some(bytes) = chunk.fill(reader)? { let bytes: u64 = bytes as u64; match bytes.cmp(&num_skip) { Ordering::Less => num_skip -= bytes, Ordering::Equal => { - break; + break None; } Ordering::Greater => { - writer.write_all(chunk.get_buffer_with(num_skip as usize))?; - break; + let buffer = chunk.get_buffer_with(num_skip as usize); + writer.write_all(buffer)?; + sum_bytes += buffer.len() as u64; + break None; } } } else { - return Ok(()); + break Some(sum_bytes); } - } + }; - io::copy(reader, &mut writer)?; + if let Some(bytes) = bytes { + Ok(bytes) + } else { + io::copy(reader, &mut writer).map(|b| b + sum_bytes) + } } - _ => {} - } - Ok(()) + _ => Ok(0), + }; + + writer.flush()?; + result } #[cfg(test)] mod tests { - use crate::forwards_thru_file; + use crate::*; use std::io::Cursor; #[test]