From 9a3acadc988d28b2f6c5c220343ec2debcc40f60 Mon Sep 17 00:00:00 2001 From: Arijit Dey Date: Fri, 15 Mar 2024 18:50:03 +0530 Subject: [PATCH 1/5] Put benchmarking code --- Cargo.toml | 5 +++++ src/lib.rs | 1 + src/state.rs | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 90639cb..f1c33fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,3 +70,8 @@ required-features = ["static_output"] name = "msg-tokio" path = "examples/msg-tokio.rs" required-features = ["dynamic_output"] + + +[profile.profiling] +inherits = "release" +debug = 1 diff --git a/src/lib.rs b/src/lib.rs index 7a868a9..0282315 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,7 @@ #![warn(clippy::nursery)] #![allow(clippy::doc_markdown)] #![cfg_attr(doctest, doc = include_str!("../README.md"))] +#![feature(test)] //! `minus`: A library for asynchronous terminal [paging], written in Rust. //! diff --git a/src/state.rs b/src/state.rs index 1b80053..cdce524 100644 --- a/src/state.rs +++ b/src/state.rs @@ -398,3 +398,41 @@ impl PagerState { AppendStyle::PartialUpdate(fmt_lines) } } + +#[cfg(test)] +mod bench { + extern crate test; + use super::PagerState; + use test::Bencher; + + #[bench] + fn bench_append_str_chunks(b: &mut Bencher) { + let mut buffer = "This is a line\n".repeat(20); + // Remove the last \n from the text block + buffer.pop(); + + b.iter(|| { + let mut ps = PagerState::new().unwrap(); + for _ in 0..4_400_000 { + ps.append_str(&buffer); + } + }); + } + + #[bench] + fn bench_append_str_big(b: &mut Bencher) { + let mut buffer = String::with_capacity(20 * 15 * 4_400_000); + let mut line = "This is a line\n".repeat(20); + // Remove the last \n from the text block + line.pop(); + + for _ in 0..4_400_000 { + buffer.push_str(&line); + } + + b.iter(|| { + let mut ps = PagerState::new().unwrap(); + ps.append_str(&buffer); + }); + } +} From 5c1f5fcc9f8b6848977b41c92d248d333924847d Mon Sep 17 00:00:00 2001 From: Arijit Dey Date: Sat, 16 Mar 2024 23:54:36 +0530 Subject: [PATCH 2/5] Use bytecount to find line count --- Cargo.toml | 1 + src/screen/mod.rs | 110 +++++++++++++++++++--------------------------- 2 files changed, 45 insertions(+), 66 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f1c33fd..31c5d2e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ regex = { version = "^1", optional = true } crossbeam-channel = "^0.5" parking_lot = "0.12.1" once_cell = { version = "^1.18", features = ["parking_lot"] } +bytecount = { version = "0.6.7", features = ["runtime-dispatch-simd"] } [features] search = [ "regex" ] diff --git a/src/screen/mod.rs b/src/screen/mod.rs index 0ebd184..137b31d 100644 --- a/src/screen/mod.rs +++ b/src/screen/mod.rs @@ -149,7 +149,7 @@ impl Default for Screen { Self { line_wrapping: true, orig_text: String::with_capacity(100 * 1024), - formatted_lines: Vec::with_capacity(500 * 1024), + formatted_lines: Vec::with_capacity(10_000), line_count: 0, max_line_length: 0, unterminated: 0, @@ -359,12 +359,8 @@ where to_format = opts.text.to_string(); } - let lines = to_format - .lines() - .enumerate() - .collect::>(); - - let to_format_size = lines.len(); + let lines = to_format.lines().enumerate(); + let to_format_size = calculate_format_sizr(&to_format); let mut fr = FormatResult { lines_formatted: to_format_size, @@ -380,7 +376,7 @@ where let line_number_digits = minus_core::utils::digits(opts.lines_count + to_format_size); // Return if we have nothing to format - if lines.is_empty() { + if to_format_size == 0 { return fr; } @@ -388,63 +384,39 @@ where // Whenever a line is formatted, this will be incremented to te number of rows that the formatted line has occupied let mut formatted_row_count = opts.formatted_lines_count; - { - let line_numbers = opts.line_numbers; - let cols = opts.cols; - let lines_count = opts.lines_count; - let line_wrapping = opts.line_wrapping; - #[cfg(feature = "search")] - let search_term = opts.search_term; - - let rest_lines = - lines - .iter() - .take(lines.len().saturating_sub(1)) - .flat_map(|(idx, line)| { - let fmt_line = formatted_line( - line, - line_number_digits, - lines_count + idx, - line_numbers, - cols, - line_wrapping, - #[cfg(feature = "search")] - formatted_row_count, - #[cfg(feature = "search")] - &mut fr.append_search_idx, - #[cfg(feature = "search")] - search_term, - ); - fr.lines_to_row_map.insert(formatted_row_count, true); - formatted_row_count += fmt_line.len(); - if lines.len() > fr.max_line_length { - fr.max_line_length = line.len(); - } - - fmt_line - }); - opts.buffer.extend_buffer(rest_lines); - }; + let line_numbers = opts.line_numbers; + let cols = opts.cols; + let lines_count = opts.lines_count; + let line_wrapping = opts.line_wrapping; + let mut last_line_row_span = 0; + #[cfg(feature = "search")] + let search_term = opts.search_term; + + let formatted_lines = lines.flat_map(|(idx, line)| { + let fmt_line = formatted_line( + line, + line_number_digits, + lines_count + idx, + line_numbers, + cols, + line_wrapping, + #[cfg(feature = "search")] + formatted_row_count, + #[cfg(feature = "search")] + &mut fr.append_search_idx, + #[cfg(feature = "search")] + search_term, + ); + fr.lines_to_row_map.insert(formatted_row_count, true); + formatted_row_count += fmt_line.len(); + last_line_row_span = fmt_line.len(); + if line.len() > fr.max_line_length { + fr.max_line_length = line.len(); + } - let mut last_line = formatted_line( - lines.last().unwrap().1, - line_number_digits, - opts.lines_count + to_format_size - 1, - opts.line_numbers, - opts.cols, - opts.line_wrapping, - #[cfg(feature = "search")] - formatted_row_count, - #[cfg(feature = "search")] - &mut fr.append_search_idx, - #[cfg(feature = "search")] - opts.search_term, - ); - fr.lines_to_row_map.insert(formatted_row_count, true); - formatted_row_count += last_line.len(); - if lines.last().unwrap().1.len() > fr.max_line_length { - fr.max_line_length = lines.last().unwrap().1.len(); - } + fmt_line + }); + opts.buffer.extend_buffer(formatted_lines); #[cfg(feature = "search")] { @@ -478,14 +450,20 @@ where // If the last line ends with \n, then the line is complete so nothing is left as unterminated 0 } else { - last_line.len() + last_line_row_span }; - opts.buffer.append_to_buffer(&mut last_line); fr.rows_formatted = formatted_row_count - opts.formatted_lines_count; fr } +fn calculate_format_sizr(text: &str) -> usize { + if text.is_empty() { + return 0; + } + bytecount::count(text.as_bytes(), b'\n') + usize::from(!text.ends_with('\n')) +} + /// Formats the given `line` /// /// - `line`: The line to format From 5423d6af6968c78a1168d71c3b86f8115bfb1901 Mon Sep 17 00:00:00 2001 From: Arijit Dey Date: Fri, 5 Apr 2024 02:35:27 +0530 Subject: [PATCH 3/5] perf: Use smol_str for storing strings This increases the overall text sppending speed by 18% on large text data. --- Cargo.toml | 1 + src/core/init.rs | 3 ++- src/core/utils/display/mod.rs | 12 ++++++------ src/screen/mod.rs | 11 ++++++----- src/search.rs | 4 ++-- src/state.rs | 31 ++++++++++++++++++------------- 6 files changed, 35 insertions(+), 27 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 31c5d2e..cf4e2f9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ crossbeam-channel = "^0.5" parking_lot = "0.12.1" once_cell = { version = "^1.18", features = ["parking_lot"] } bytecount = { version = "0.6.7", features = ["runtime-dispatch-simd"] } +smol_str = "0.2.1" [features] search = [ "regex" ] diff --git a/src/core/init.rs b/src/core/init.rs index bbd6039..ae46861 100644 --- a/src/core/init.rs +++ b/src/core/init.rs @@ -24,6 +24,7 @@ use crate::{ use crossbeam_channel::{Receiver, Sender, TrySendError}; use crossterm::event; +use smol_str::ToSmolStr; use std::{ io::{stdout, Stdout}, panic, @@ -98,7 +99,7 @@ pub fn init_core(pager: &Pager, rm: RunMode) -> std::result::Result<(), MinusErr if *RUNMODE.lock() == RunMode::Static { // If stdout is not a tty, write everything and quit if !out.is_tty() { - write_raw_lines(&mut out, &[ps.screen.orig_text], None)?; + write_raw_lines(&mut out, &[ps.screen.orig_text.to_smolstr()], None)?; let mut rm = RUNMODE.lock(); *rm = RunMode::Uninitialized; drop(rm); diff --git a/src/core/utils/display/mod.rs b/src/core/utils/display/mod.rs index b686869..c229915 100644 --- a/src/core/utils/display/mod.rs +++ b/src/core/utils/display/mod.rs @@ -231,7 +231,7 @@ pub fn draw_append_text( #[allow(clippy::too_many_arguments)] pub fn write_text_checked( out: &mut impl Write, - lines: &[String], + lines: &[Row], mut upper_mark: usize, rows: usize, cols: usize, @@ -257,7 +257,7 @@ pub fn write_text_checked( } // Add \r to ensure cursor is placed at the beginning of each row - let display_lines: &[String] = &lines[upper_mark..lower_mark]; + let display_lines = &lines[upper_mark..lower_mark]; term::move_cursor(out, 0, 0, false)?; term::clear_entire_screen(out, false)?; @@ -287,7 +287,7 @@ pub fn write_from_pagerstate(out: &mut impl Write, ps: &mut PagerState) -> Resul } // Add \r to ensure cursor is placed at the beginning of each row - let display_lines: &[String] = ps + let display_lines = ps .screen .get_formatted_lines_with_bounds(ps.upper_mark, lower_mark); @@ -304,7 +304,7 @@ pub fn write_from_pagerstate(out: &mut impl Write, ps: &mut PagerState) -> Resul pub fn write_lines( out: &mut impl Write, - lines: &[String], + lines: &[Row], cols: usize, line_wrapping: bool, left_mark: usize, @@ -320,7 +320,7 @@ pub fn write_lines( pub fn write_lines_in_horizontal_scroll( out: &mut impl Write, - lines: &[String], + lines: &[Row], cols: usize, start: usize, line_numbers: bool, @@ -368,7 +368,7 @@ pub fn write_lines_in_horizontal_scroll( /// The `\r` resets the cursor to the start of the line. pub fn write_raw_lines( out: &mut impl Write, - lines: &[String], + lines: &[Row], initial: Option<&str>, ) -> Result<(), MinusError> { for line in lines { diff --git a/src/screen/mod.rs b/src/screen/mod.rs index 137b31d..b85a305 100644 --- a/src/screen/mod.rs +++ b/src/screen/mod.rs @@ -7,6 +7,7 @@ use crate::{ }; #[cfg(feature = "search")] use regex::Regex; +use smol_str::{SmolStr, ToSmolStr}; use std::borrow::Cow; @@ -16,8 +17,8 @@ use {crate::search, std::collections::BTreeSet}; // ||||||||||||||||||||||||||||||||||||||||||||||||||||||| // TYPES TO BETTER DESCRIBE THE PURPOSE OF STRINGS // ||||||||||||||||||||||||||||||||||||||||||||||||||||||| -pub type Row = String; -pub type Rows = Vec; +pub type Row = SmolStr; +pub type Rows = Vec; pub type Line<'a> = &'a str; pub type TextBlock<'a> = &'a str; pub type OwnedTextBlock = String; @@ -546,7 +547,7 @@ pub(crate) fn formatted_line<'a>( // extra difficulty while writing tests // * Line number is added only to the first row of a line. This makes a better UI overall let formatter = |row: Cow<'_, str>, is_first_row: bool, idx: usize| { - format!( + smol_str::format_smolstr!( "{bold}{number: >len$}{reset} {row}", bold = if cfg!(not(test)) && is_first_row { crossterm::style::Attribute::Bold.to_string() @@ -593,9 +594,9 @@ pub(crate) fn formatted_line<'a>( enumerated_rows .map(|(wrap_idx, mut row)| { handle_search(&mut row, wrap_idx); - row.to_string() + row.to_smolstr() }) - .collect::>() + .collect::>() } } diff --git a/src/search.rs b/src/search.rs index 36eca0b..ed4a7c0 100644 --- a/src/search.rs +++ b/src/search.rs @@ -52,7 +52,7 @@ #![allow(unused_imports)] use crate::minus_core::utils::{display, term}; -use crate::screen::Screen; +use crate::screen::{Row, Screen}; use crate::{error::MinusError, input::HashedEventRegister, screen}; use crate::{LineNumbers, PagerState}; use crossterm::{ @@ -242,7 +242,7 @@ impl FetchInputResult { /// A cache for storing all the new data obtained by running incremental search pub(crate) struct IncrementalSearchCache { /// Lines to be displayed with highlighted search matches - pub(crate) formatted_lines: Vec, + pub(crate) formatted_lines: Vec, /// Index from `search_idx` where a search match after current upper mark may be found /// NOTE: There is no guarantee that this will stay within the bounds of `search_idx` pub(crate) search_mark: usize, diff --git a/src/state.rs b/src/state.rs index cdce524..e152ffa 100644 --- a/src/state.rs +++ b/src/state.rs @@ -403,21 +403,22 @@ impl PagerState { mod bench { extern crate test; use super::PagerState; + use smol_str::{SmolStr, ToSmolStr}; use test::Bencher; - #[bench] - fn bench_append_str_chunks(b: &mut Bencher) { - let mut buffer = "This is a line\n".repeat(20); - // Remove the last \n from the text block - buffer.pop(); - - b.iter(|| { - let mut ps = PagerState::new().unwrap(); - for _ in 0..4_400_000 { - ps.append_str(&buffer); - } - }); - } + // #[bench] + // fn bench_append_str_chunks(b: &mut Bencher) { + // let mut buffer = "This is a line\n".repeat(20); + // // Remove the last \n from the text block + // buffer.pop(); + // + // b.iter(|| { + // let mut ps = PagerState::new().unwrap(); + // for _ in 0..4_400_000 { + // ps.append_str(&buffer); + // } + // }); + // } #[bench] fn bench_append_str_big(b: &mut Bencher) { @@ -431,6 +432,10 @@ mod bench { } b.iter(|| { + // let _t = textwrap::wrap(&buffer, 80) + // .iter() + // .map(|c| c.to_smolstr()) + // .collect::>(); let mut ps = PagerState::new().unwrap(); ps.append_str(&buffer); }); From 417faaa802c11f7d998126c4f96bb9de62f567ee Mon Sep 17 00:00:00 2001 From: Arijit Dey Date: Sat, 13 Apr 2024 09:57:48 +0530 Subject: [PATCH 4/5] fix: benchmarking code --- src/state.rs | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/state.rs b/src/state.rs index e152ffa..89eaf88 100644 --- a/src/state.rs +++ b/src/state.rs @@ -403,22 +403,21 @@ impl PagerState { mod bench { extern crate test; use super::PagerState; - use smol_str::{SmolStr, ToSmolStr}; use test::Bencher; - // #[bench] - // fn bench_append_str_chunks(b: &mut Bencher) { - // let mut buffer = "This is a line\n".repeat(20); - // // Remove the last \n from the text block - // buffer.pop(); - // - // b.iter(|| { - // let mut ps = PagerState::new().unwrap(); - // for _ in 0..4_400_000 { - // ps.append_str(&buffer); - // } - // }); - // } + #[bench] + fn bench_append_str_chunks(b: &mut Bencher) { + let mut buffer = "This is a line\n".repeat(20); + // Remove the last \n from the text block + buffer.pop(); + + b.iter(|| { + for _ in 0..4_400_000 { + let mut ps = PagerState::new().unwrap(); + ps.append_str(&buffer); + } + }); + } #[bench] fn bench_append_str_big(b: &mut Bencher) { @@ -432,10 +431,6 @@ mod bench { } b.iter(|| { - // let _t = textwrap::wrap(&buffer, 80) - // .iter() - // .map(|c| c.to_smolstr()) - // .collect::>(); let mut ps = PagerState::new().unwrap(); ps.append_str(&buffer); }); From c2300d0cc800b150e95e2111e0c492fca7d682c7 Mon Sep 17 00:00:00 2001 From: Arijit Dey Date: Mon, 29 Apr 2024 00:03:58 +0530 Subject: [PATCH 5/5] feat: Add DataSource trait and impl on String --- Cargo.toml | 1 + src/screen/data_source.rs | 39 +++++++++++++++++++++++++++++++++++++++ src/screen/mod.rs | 3 ++- 3 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 src/screen/data_source.rs diff --git a/Cargo.toml b/Cargo.toml index cf4e2f9..80b2b07 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ parking_lot = "0.12.1" once_cell = { version = "^1.18", features = ["parking_lot"] } bytecount = { version = "0.6.7", features = ["runtime-dispatch-simd"] } smol_str = "0.2.1" +arrayvec = "0.7.4" [features] search = [ "regex" ] diff --git a/src/screen/data_source.rs b/src/screen/data_source.rs new file mode 100644 index 0000000..562c81b --- /dev/null +++ b/src/screen/data_source.rs @@ -0,0 +1,39 @@ +use arrayvec::ArrayString; + +type InsertionBuffer = ArrayString<240>; + +pub trait DataSource { + fn reads_forward(&mut self, buffer: &mut InsertionBuffer) -> bool; + fn reads_backward(&mut self, buffer: &mut InsertionBuffer) -> bool; + fn readr_line(&mut self, line: &mut String, pb: &mut InsertionBuffer, fb: &mut InsertionBuffer); +} + +pub struct StringSource { + text: String, + cursor: usize, +} + +impl DataSource for StringSource { + fn reads_forward(&mut self, buffer: &mut InsertionBuffer) -> bool { + let max_read = (self.text.len() - self.cursor).min(buffer.capacity()); + buffer.push_str(&self.text[self.cursor..self.cursor + max_read]); + self.cursor += max_read + 1; + + self.cursor < self.text.len() + } + fn reads_backward(&mut self, buffer: &mut InsertionBuffer) -> bool { + let max_read = self.text.len().min(buffer.capacity()); + buffer.push_str(&self.text[self.cursor..max_read]); + self.cursor += max_read + 1; + + self.cursor != 0 + } + + fn readr_line( + &mut self, + line: &mut String, + pb: &mut InsertionBuffer, + fb: &mut InsertionBuffer, + ) { + } +} diff --git a/src/screen/mod.rs b/src/screen/mod.rs index b85a305..8dbb0bf 100644 --- a/src/screen/mod.rs +++ b/src/screen/mod.rs @@ -8,9 +8,10 @@ use crate::{ #[cfg(feature = "search")] use regex::Regex; use smol_str::{SmolStr, ToSmolStr}; - use std::borrow::Cow; +mod data_source; + #[cfg(feature = "search")] use {crate::search, std::collections::BTreeSet};