From e633a16ef52d7b88211684def9cb9eb017e9d46f Mon Sep 17 00:00:00 2001 From: Ryan Breen Date: Fri, 23 Jan 2026 08:22:46 -0500 Subject: [PATCH 1/5] perf(graphics): optimize dirty region tracking and character rendering Three performance improvements for framebuffer rendering: 1. Character-level dirty marking: Reduced mark_dirty() calls from ~80 per character (one per pixel) to 1 per character. Added write_pixel_no_mark() for batch operations with single mark_dirty_rect() call at the end. 2. Multiple dirty rectangles: Replace single bounding box with 4 separate dirty rects. Uses intelligent merging: - Merge if rects overlap or are within 32 pixels - When full, merge closest pair to make room - Result: sparse updates (cursor + text) flush independently 3. Scroll optimization: Remove redundant flush_if_dirty() before scroll - pending dirty regions scroll with content, only the cleared bottom line needs flushing. Expected improvements: - Character rendering: 80x fewer dirty region operations - Sparse updates: cursor blink + text no longer flush giant bbox - Scroll: ~1.92MB less buffer copying per scroll operation Co-Authored-By: Claude Opus 4.5 --- kernel/src/graphics/double_buffer.rs | 302 ++++++++++++++++++++++----- kernel/src/logger.rs | 59 ++++-- 2 files changed, 290 insertions(+), 71 deletions(-) diff --git a/kernel/src/graphics/double_buffer.rs b/kernel/src/graphics/double_buffer.rs index 9d6dd09..3cfa19c 100644 --- a/kernel/src/graphics/double_buffer.rs +++ b/kernel/src/graphics/double_buffer.rs @@ -6,11 +6,14 @@ use alloc::vec::Vec; use core::ptr; +const MAX_DIRTY_RECTS: usize = 4; +const MERGE_PROXIMITY: usize = 32; + /// Represents a rectangular region that has been modified. /// /// Coordinates are byte offsets on each scanline. #[derive(Debug, Clone, Copy)] -pub struct DirtyRegion { +pub struct DirtyRect { /// X coordinate of top-left corner (in bytes, inclusive) pub x_start: usize, /// Y coordinate of top-left corner (in scanlines, inclusive) @@ -21,7 +24,7 @@ pub struct DirtyRegion { pub y_end: usize, } -impl DirtyRegion { +impl DirtyRect { pub fn new() -> Self { Self { x_start: usize::MAX, @@ -48,6 +51,153 @@ impl DirtyRegion { pub fn clear(&mut self) { *self = Self::new(); } + + fn union(&self, other: &Self) -> Self { + Self { + x_start: self.x_start.min(other.x_start), + y_start: self.y_start.min(other.y_start), + x_end: self.x_end.max(other.x_end), + y_end: self.y_end.max(other.y_end), + } + } + + fn distance_to(&self, other: &Self) -> usize { + let gap_x = if self.x_end < other.x_start { + other.x_start - self.x_end + } else if other.x_end < self.x_start { + self.x_start - other.x_end + } else { + 0 + }; + + let gap_y = if self.y_end < other.y_start { + other.y_start - self.y_end + } else if other.y_end < self.y_start { + self.y_start - other.y_end + } else { + 0 + }; + + gap_x.max(gap_y) + } + + fn should_merge(&self, other: &Self) -> bool { + if self.is_empty() || other.is_empty() { + return false; + } + + self.distance_to(other) <= MERGE_PROXIMITY + } +} + +/// Track multiple dirty rectangles for incremental flushes. +pub struct DirtyRegionTracker { + rects: [Option; MAX_DIRTY_RECTS], + count: usize, +} + +impl DirtyRegionTracker { + pub fn new() -> Self { + Self { + rects: [None; MAX_DIRTY_RECTS], + count: 0, + } + } + + pub fn is_empty(&self) -> bool { + self.count == 0 + } + + pub fn clear(&mut self) { + self.rects = [None; MAX_DIRTY_RECTS]; + self.count = 0; + } + + pub fn rects(&self) -> impl Iterator + '_ { + self.rects.iter().filter_map(|rect| *rect) + } + + pub fn mark_dirty(&mut self, y: usize, x_start: usize, x_end: usize) { + if x_start >= x_end { + return; + } + + let mut new_rect = DirtyRect::new(); + new_rect.mark_dirty(y, x_start, x_end); + + self.absorb_merges(&mut new_rect); + + if self.count == MAX_DIRTY_RECTS { + self.merge_closest_pair(); + self.absorb_merges(&mut new_rect); + } + + self.insert(new_rect); + } + + fn absorb_merges(&mut self, rect: &mut DirtyRect) { + loop { + let mut merged_any = false; + for slot in self.rects.iter_mut() { + if let Some(existing) = *slot { + if existing.should_merge(rect) { + *rect = existing.union(rect); + *slot = None; + self.count = self.count.saturating_sub(1); + merged_any = true; + } + } + } + if !merged_any { + break; + } + } + } + + fn insert(&mut self, rect: DirtyRect) { + if rect.is_empty() { + return; + } + + for slot in self.rects.iter_mut() { + if slot.is_none() { + *slot = Some(rect); + self.count += 1; + return; + } + } + } + + fn merge_closest_pair(&mut self) { + if self.count < 2 { + return; + } + + let mut best: Option<(usize, usize, usize)> = None; + for i in 0..MAX_DIRTY_RECTS { + let Some(rect_i) = self.rects[i] else { + continue; + }; + for j in (i + 1)..MAX_DIRTY_RECTS { + let Some(rect_j) = self.rects[j] else { + continue; + }; + let distance = rect_i.distance_to(&rect_j); + let replace = best.map_or(true, |(best_distance, _, _)| distance < best_distance); + if replace { + best = Some((distance, i, j)); + } + } + } + + if let Some((_, i, j)) = best { + if let (Some(rect_i), Some(rect_j)) = (self.rects[i], self.rects[j]) { + self.rects[i] = Some(rect_i.union(&rect_j)); + self.rects[j] = None; + self.count = self.count.saturating_sub(1); + } + } + } } /// Double-buffered framebuffer for tear-free rendering. @@ -63,8 +213,8 @@ pub struct DoubleBufferedFrameBuffer { shadow_buffer: Vec, /// Track if shadow buffer has been modified since last flush dirty: bool, - /// Track the bounding box of modified regions - dirty_region: DirtyRegion, + /// Track dirty rectangles for incremental flushes + dirty_regions: DirtyRegionTracker, /// Bytes per scanline stride: usize, /// Number of scanlines @@ -90,7 +240,7 @@ impl DoubleBufferedFrameBuffer { hardware_len, shadow_buffer, dirty: false, - dirty_region: DirtyRegion::new(), + dirty_regions: DirtyRegionTracker::new(), stride, height, } @@ -112,47 +262,53 @@ impl DoubleBufferedFrameBuffer { /// /// This is the "page flip" operation that makes rendered content visible. pub fn flush(&mut self) { - if !self.dirty || self.dirty_region.is_empty() { + if !self.dirty || self.dirty_regions.is_empty() { self.dirty = false; - self.dirty_region.clear(); + self.dirty_regions.clear(); return; } - let y_start = self.dirty_region.y_start.min(self.height); - let y_end = self.dirty_region.y_end.min(self.height); - let x_start = self.dirty_region.x_start.min(self.stride); - let x_end = self.dirty_region.x_end.min(self.stride); let max_len = self.hardware_len.min(self.shadow_buffer.len()); - - if y_start >= y_end || x_start >= x_end || max_len == 0 { + if max_len == 0 { self.dirty = false; - self.dirty_region.clear(); + self.dirty_regions.clear(); return; } - for y in y_start..y_end { - let row_offset = y * self.stride; - let src_start = row_offset + x_start; - let src_end = row_offset + x_end; - if src_end > max_len { - continue; - } + for rect in self.dirty_regions.rects() { + let y_start = rect.y_start.min(self.height); + let y_end = rect.y_end.min(self.height); + let x_start = rect.x_start.min(self.stride); + let x_end = rect.x_end.min(self.stride); - let len = x_end - x_start; - if len == 0 { + if y_start >= y_end || x_start >= x_end { continue; } - // SAFETY: hardware_ptr is valid for hardware_len bytes (from bootloader), - // shadow_buffer is valid for its length, and we copy the minimum of both. - unsafe { - let src = self.shadow_buffer.as_ptr().add(src_start); - let dst = self.hardware_ptr.add(src_start); - ptr::copy_nonoverlapping(src, dst, len); + for y in y_start..y_end { + let row_offset = y * self.stride; + let src_start = row_offset + x_start; + let src_end = row_offset + x_end; + if src_end > max_len { + continue; + } + + let len = x_end - x_start; + if len == 0 { + continue; + } + + // SAFETY: hardware_ptr is valid for hardware_len bytes (from bootloader), + // shadow_buffer is valid for its length, and we copy the minimum of both. + unsafe { + let src = self.shadow_buffer.as_ptr().add(src_start); + let dst = self.hardware_ptr.add(src_start); + ptr::copy_nonoverlapping(src, dst, len); + } } } self.dirty = false; - self.dirty_region.clear(); + self.dirty_regions.clear(); } /// Force a full buffer flush (used for clear operations). @@ -166,13 +322,13 @@ impl DoubleBufferedFrameBuffer { } } self.dirty = false; - self.dirty_region.clear(); + self.dirty_regions.clear(); } /// Mark a rectangular region as dirty (in byte coordinates). pub fn mark_region_dirty(&mut self, y: usize, x_start: usize, x_end: usize) { self.dirty = true; - self.dirty_region.mark_dirty(y, x_start, x_end); + self.dirty_regions.mark_dirty(y, x_start, x_end); } /// Flush only if the buffer has been modified since the last flush. @@ -214,38 +370,74 @@ mod tests { #[test] fn dirty_region_new_is_empty() { - let region = DirtyRegion::new(); - assert!(region.is_empty()); + let rect = DirtyRect::new(); + assert!(rect.is_empty()); } #[test] fn dirty_region_mark_expands() { - let mut region = DirtyRegion::new(); - region.mark_dirty(2, 4, 8); - assert!(!region.is_empty()); - assert_eq!(region.x_start, 4); - assert_eq!(region.x_end, 8); - assert_eq!(region.y_start, 2); - assert_eq!(region.y_end, 3); + let mut rect = DirtyRect::new(); + rect.mark_dirty(2, 4, 8); + assert!(!rect.is_empty()); + assert_eq!(rect.x_start, 4); + assert_eq!(rect.x_end, 8); + assert_eq!(rect.y_start, 2); + assert_eq!(rect.y_end, 3); } #[test] fn dirty_region_mark_unions() { - let mut region = DirtyRegion::new(); - region.mark_dirty(2, 4, 8); - region.mark_dirty(1, 2, 6); - assert_eq!(region.x_start, 2); - assert_eq!(region.x_end, 8); - assert_eq!(region.y_start, 1); - assert_eq!(region.y_end, 3); + let mut rect = DirtyRect::new(); + rect.mark_dirty(2, 4, 8); + rect.mark_dirty(1, 2, 6); + assert_eq!(rect.x_start, 2); + assert_eq!(rect.x_end, 8); + assert_eq!(rect.y_start, 1); + assert_eq!(rect.y_end, 3); } #[test] fn dirty_region_clear_resets() { - let mut region = DirtyRegion::new(); - region.mark_dirty(0, 1, 2); - region.clear(); - assert!(region.is_empty()); + let mut rect = DirtyRect::new(); + rect.mark_dirty(0, 1, 2); + rect.clear(); + assert!(rect.is_empty()); + } + + #[test] + fn dirty_region_tracker_keeps_separate_rects() { + let mut tracker = DirtyRegionTracker::new(); + tracker.mark_dirty(0, 0, 4); + tracker.mark_dirty(MERGE_PROXIMITY + 8, 0, 4); + assert_eq!(tracker.rects().count(), 2); + } + + #[test] + fn dirty_region_tracker_merges_close_rects() { + let mut tracker = DirtyRegionTracker::new(); + tracker.mark_dirty(0, 0, 4); + tracker.mark_dirty(0, MERGE_PROXIMITY / 2, MERGE_PROXIMITY / 2 + 4); + assert_eq!(tracker.rects().count(), 1); + let rect = tracker.rects().next().unwrap(); + assert_eq!(rect.x_start, 0); + assert_eq!(rect.x_end, MERGE_PROXIMITY / 2 + 4); + } + + #[test] + fn dirty_region_tracker_merges_closest_when_full() { + let mut tracker = DirtyRegionTracker::new(); + tracker.mark_dirty(0, 0, 2); + tracker.mark_dirty(MERGE_PROXIMITY + 8, 0, 2); + tracker.mark_dirty(3 * MERGE_PROXIMITY + 8, 0, 2); + tracker.mark_dirty(5 * MERGE_PROXIMITY + 8, 0, 2); + assert_eq!(tracker.rects().count(), MAX_DIRTY_RECTS); + + tracker.mark_dirty(7 * MERGE_PROXIMITY + 8, 0, 2); + assert_eq!(tracker.rects().count(), MAX_DIRTY_RECTS); + let merged = tracker + .rects() + .any(|rect| rect.y_start == 0 && rect.y_end == MERGE_PROXIMITY + 9); + assert!(merged); } #[test] @@ -253,7 +445,7 @@ mod tests { let mut buf = [0u8; 100]; let db = DoubleBufferedFrameBuffer::new(buf.as_mut_ptr(), buf.len(), 10, 10); assert!(!db.dirty); - assert!(db.dirty_region.is_empty()); + assert!(db.dirty_regions.is_empty()); } #[test] @@ -262,7 +454,7 @@ mod tests { let mut db = DoubleBufferedFrameBuffer::new(buf.as_mut_ptr(), buf.len(), 10, 10); db.mark_region_dirty(1, 2, 4); assert!(db.dirty); - assert!(!db.dirty_region.is_empty()); + assert!(!db.dirty_regions.is_empty()); } #[test] @@ -272,7 +464,7 @@ mod tests { db.mark_region_dirty(1, 0, 2); db.flush(); assert!(!db.dirty); - assert!(db.dirty_region.is_empty()); + assert!(db.dirty_regions.is_empty()); } #[test] diff --git a/kernel/src/logger.rs b/kernel/src/logger.rs index 8632801..a0a993e 100644 --- a/kernel/src/logger.rs +++ b/kernel/src/logger.rs @@ -199,7 +199,6 @@ impl ShellFrameBuffer { let scroll_bytes = line_height * row_bytes; if let Some(db) = &mut self.double_buffer { - db.flush_if_dirty(); let buffer_len = db.buffer_mut().len(); if scroll_bytes < buffer_len { { @@ -382,9 +381,10 @@ impl ShellFrameBuffer { // Fill entire framebuffer with background color (black) for y in 0..self.height() { for x in 0..self.width() { - self.write_pixel(x, y, 0x00); + self.write_pixel_no_mark(x, y, 0x00); } } + self.mark_dirty_rect(0, 0, self.width(), self.height()); // Reset cursor position self.x_pos = BORDER_PADDING; self.y_pos = BORDER_PADDING; @@ -393,25 +393,40 @@ impl ShellFrameBuffer { /// Clear from cursor to end of line fn clear_to_eol(&mut self) { let char_height = shell_font::CHAR_RASTER_HEIGHT.val(); - for y in self.y_pos..(self.y_pos + char_height) { - for x in self.x_pos..self.width() { - self.write_pixel(x, y, 0x00); + let y_start = self.y_pos; + let y_end = (self.y_pos + char_height).min(self.height()); + let x_start = self.x_pos; + let x_end = self.width(); + for y in y_start..y_end { + for x in x_start..x_end { + self.write_pixel_no_mark(x, y, 0x00); } } + self.mark_dirty_rect( + x_start, + y_start, + x_end.saturating_sub(x_start), + y_end.saturating_sub(y_start), + ); } /// Write a rendered character to the framebuffer fn write_rendered_char(&mut self, rendered_char: RasterizedChar) { + let char_x = self.x_pos; + let char_y = self.y_pos; for (y, row) in rendered_char.raster().iter().enumerate() { for (x, byte) in row.iter().enumerate() { - self.write_pixel(self.x_pos + x, self.y_pos + y, *byte); + self.write_pixel_no_mark(char_x + x, char_y + y, *byte); } } - self.x_pos += rendered_char.width() + LETTER_SPACING; + let char_width = rendered_char.width(); + let char_height = shell_font::CHAR_RASTER_HEIGHT.val(); + self.mark_dirty_rect(char_x, char_y, char_width, char_height); + self.x_pos += char_width + LETTER_SPACING; } - /// Write a pixel at the specified coordinates - fn write_pixel(&mut self, x: usize, y: usize, intensity: u8) { + /// Write a pixel at the specified coordinates without marking dirty (for batched operations) + fn write_pixel_no_mark(&mut self, x: usize, y: usize, intensity: u8) { let pixel_offset = y * self.info.stride + x; let color = match self.info.pixel_format { PixelFormat::Rgb => [intensity, intensity, intensity / 2, 0], @@ -424,7 +439,6 @@ impl ShellFrameBuffer { }; let bytes_per_pixel = self.info.bytes_per_pixel; let byte_offset = pixel_offset * bytes_per_pixel; - let x_byte_offset = x * bytes_per_pixel; if let Some(db) = &mut self.double_buffer { let buffer = db.buffer_mut(); @@ -432,11 +446,6 @@ impl ShellFrameBuffer { for (i, &byte) in color[..bytes_per_pixel].iter().enumerate() { buffer[byte_offset + i] = byte; } - db.mark_region_dirty( - y, - x_byte_offset, - x_byte_offset + bytes_per_pixel, - ); } } else if byte_offset + bytes_per_pixel <= self.buffer_len { unsafe { @@ -448,6 +457,22 @@ impl ShellFrameBuffer { } } + fn mark_dirty_rect(&mut self, x: usize, y: usize, width: usize, height: usize) { + if let Some(db) = &mut self.double_buffer { + let bpp = self.info.bytes_per_pixel; + let x_end = (x + width).min(self.info.width); + let y_end = (y + height).min(self.info.height); + if x >= x_end || y >= y_end { + return; + } + let x_start = x * bpp; + let x_end = x_end * bpp; + for row in y..y_end { + db.mark_region_dirty(row, x_start, x_end); + } + } + } + /// Write a string to the framebuffer /// /// Batches all character writes and flushes once at the end for efficiency. @@ -468,12 +493,14 @@ impl ShellFrameBuffer { // The cursor is drawn at the bottom of the character cell let cursor_height = 2; // 2 pixels tall let cursor_y = self.y_pos + shell_font::CHAR_RASTER_HEIGHT.val() - cursor_height; + let cursor_x = self.x_pos; for dy in 0..cursor_height { for dx in 0..shell_font::CHAR_RASTER_WIDTH { - self.write_pixel(self.x_pos + dx, cursor_y + dy, intensity); + self.write_pixel_no_mark(cursor_x + dx, cursor_y + dy, intensity); } } + self.mark_dirty_rect(cursor_x, cursor_y, shell_font::CHAR_RASTER_WIDTH, cursor_height); } /// Toggle the cursor visibility (for blinking effect) From cd059c4e339b8348f5deb104e53e4a081faf31ae Mon Sep 17 00:00:00 2001 From: Ryan Breen Date: Fri, 23 Jan 2026 08:30:17 -0500 Subject: [PATCH 2/5] perf(graphics): add batch dirty marking and optimize flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Additional optimizations for framebuffer rendering: 1. Add mark_rect_dirty() to DirtyRegionTracker - marks entire rectangle in one operation instead of per-row calls. For a 16-pixel character, this is 16 merge operations → 1. 2. Add mark_region_dirty_rect() to DoubleBufferedFrameBuffer as wrapper for the new efficient method. 3. Add fast path in flush() for full-width dirty rectangles - if x_start == 0 && x_end == stride, copy entire vertical block in one memcpy instead of per-row copies. 4. Update mark_dirty_rect() in logger.rs to use the new batch method. Expected improvements: - Character dirty marking: 16x fewer operations - Scroll/clear flush: Nx fewer memcpy calls (N = number of rows) Co-Authored-By: Claude Opus 4.5 --- kernel/src/graphics/double_buffer.rs | 79 ++++++++++++++++++++++++---- kernel/src/logger.rs | 7 ++- 2 files changed, 72 insertions(+), 14 deletions(-) diff --git a/kernel/src/graphics/double_buffer.rs b/kernel/src/graphics/double_buffer.rs index 3cfa19c..0cf19dd 100644 --- a/kernel/src/graphics/double_buffer.rs +++ b/kernel/src/graphics/double_buffer.rs @@ -48,6 +48,7 @@ impl DirtyRect { } /// Reset to empty. + #[allow(dead_code)] // Used for testing DirtyRect in isolation pub fn clear(&mut self) { *self = Self::new(); } @@ -135,6 +136,30 @@ impl DirtyRegionTracker { self.insert(new_rect); } + /// Mark an entire rectangular region as dirty in one operation. + /// Much more efficient than calling mark_dirty() for each row. + pub fn mark_rect_dirty(&mut self, y_start: usize, y_end: usize, x_start: usize, x_end: usize) { + if x_start >= x_end || y_start >= y_end { + return; + } + + let mut new_rect = DirtyRect { + x_start, + x_end, + y_start, + y_end, + }; + + self.absorb_merges(&mut new_rect); + + if self.count == MAX_DIRTY_RECTS { + self.merge_closest_pair(); + self.absorb_merges(&mut new_rect); + } + + self.insert(new_rect); + } + fn absorb_merges(&mut self, rect: &mut DirtyRect) { loop { let mut merged_any = false; @@ -143,7 +168,7 @@ impl DirtyRegionTracker { if existing.should_merge(rect) { *rect = existing.union(rect); *slot = None; - self.count = self.count.saturating_sub(1); + self.count -= 1; merged_any = true; } } @@ -194,7 +219,7 @@ impl DirtyRegionTracker { if let (Some(rect_i), Some(rect_j)) = (self.rects[i], self.rects[j]) { self.rects[i] = Some(rect_i.union(&rect_j)); self.rects[j] = None; - self.count = self.count.saturating_sub(1); + self.count -= 1; } } } @@ -285,16 +310,32 @@ impl DoubleBufferedFrameBuffer { continue; } - for y in y_start..y_end { - let row_offset = y * self.stride; - let src_start = row_offset + x_start; - let src_end = row_offset + x_end; - if src_end > max_len { + // Fast path: if dirty rect spans full width, copy entire block at once + if x_start == 0 && x_end == self.stride { + let start_offset = y_start * self.stride; + let total_len = (y_end - y_start) * self.stride; + if start_offset + total_len <= max_len { + // SAFETY: hardware_ptr is valid for hardware_len bytes (from bootloader), + // shadow_buffer is valid for its length, and we copy the minimum of both. + unsafe { + let src = self.shadow_buffer.as_ptr().add(start_offset); + let dst = self.hardware_ptr.add(start_offset); + ptr::copy_nonoverlapping(src, dst, total_len); + } continue; } + } + + // Slow path: partial width, need per-row copies + let row_len = x_end - x_start; + if row_len == 0 { + continue; + } - let len = x_end - x_start; - if len == 0 { + for y in y_start..y_end { + let row_offset = y * self.stride; + let src_start = row_offset + x_start; + if src_start + row_len > max_len { continue; } @@ -303,7 +344,7 @@ impl DoubleBufferedFrameBuffer { unsafe { let src = self.shadow_buffer.as_ptr().add(src_start); let dst = self.hardware_ptr.add(src_start); - ptr::copy_nonoverlapping(src, dst, len); + ptr::copy_nonoverlapping(src, dst, row_len); } } } @@ -331,6 +372,13 @@ impl DoubleBufferedFrameBuffer { self.dirty_regions.mark_dirty(y, x_start, x_end); } + /// Mark an entire rectangular region as dirty in one operation. + /// Much more efficient than calling mark_region_dirty() for each row. + pub fn mark_region_dirty_rect(&mut self, y_start: usize, y_end: usize, x_start: usize, x_end: usize) { + self.dirty = true; + self.dirty_regions.mark_rect_dirty(y_start, y_end, x_start, x_end); + } + /// Flush only if the buffer has been modified since the last flush. #[inline] pub fn flush_if_dirty(&mut self) { @@ -423,6 +471,17 @@ mod tests { assert_eq!(rect.x_end, MERGE_PROXIMITY / 2 + 4); } + #[test] + fn dirty_region_tracker_merges_overlapping_rects() { + let mut tracker = DirtyRegionTracker::new(); + tracker.mark_dirty(0, 0, 6); + tracker.mark_dirty(0, 4, 10); + assert_eq!(tracker.rects().count(), 1); + let rect = tracker.rects().next().unwrap(); + assert_eq!(rect.x_start, 0); + assert_eq!(rect.x_end, 10); + } + #[test] fn dirty_region_tracker_merges_closest_when_full() { let mut tracker = DirtyRegionTracker::new(); diff --git a/kernel/src/logger.rs b/kernel/src/logger.rs index a0a993e..e3dd075 100644 --- a/kernel/src/logger.rs +++ b/kernel/src/logger.rs @@ -466,10 +466,9 @@ impl ShellFrameBuffer { return; } let x_start = x * bpp; - let x_end = x_end * bpp; - for row in y..y_end { - db.mark_region_dirty(row, x_start, x_end); - } + let x_end_bytes = x_end * bpp; + // Mark entire rectangle in one operation (much faster than per-row) + db.mark_region_dirty_rect(y, y_end, x_start, x_end_bytes); } } From deee6c35b36bec40b6dae8418d4b99ddb7646843 Mon Sep 17 00:00:00 2001 From: Ryan Breen Date: Fri, 23 Jan 2026 08:36:29 -0500 Subject: [PATCH 3/5] perf(graphics): eliminate cascading flushes and batch cursor ops Critical performance fixes for graphics rendering: 1. Create write_bytes_to_shell_internal() that doesn't flush - Render thread uses this non-flushing version - Render thread handles flush once after all batches - Before: render_batch -> write_bytes_to_shell [FLUSH] -> flush_framebuffer [FLUSH AGAIN] - After: render_batch -> write_bytes_to_shell_internal [NO FLUSH] -> flush_framebuffer [FLUSH ONCE] 2. Batch cursor operations in write_bytes_to_shell() - Hide cursor ONCE at start of batch - Write all bytes - Show cursor ONCE at end of batch - Before: 80 chars = 240 cursor ops (hide/draw/show per char) - After: 80 chars = 2 cursor ops (hide once, show once) 3. Update run-interactive.sh to kill existing containers - Prevents port conflict errors - Cleaner startup experience Expected improvement: 10-50x for batch text output. Co-Authored-By: Claude Opus 4.5 --- docker/qemu/run-interactive.sh | 7 ++++ kernel/src/graphics/render_queue.rs | 7 ++-- kernel/src/graphics/terminal_manager.rs | 47 ++++++++++++++++++++----- 3 files changed, 49 insertions(+), 12 deletions(-) diff --git a/docker/qemu/run-interactive.sh b/docker/qemu/run-interactive.sh index d36c513..95cc10c 100755 --- a/docker/qemu/run-interactive.sh +++ b/docker/qemu/run-interactive.sh @@ -16,6 +16,13 @@ if ! docker image inspect "$IMAGE_NAME" &>/dev/null; then docker build -t "$IMAGE_NAME" "$SCRIPT_DIR" fi +# Kill any existing breenix-qemu containers (prevents port conflicts) +EXISTING=$(docker ps -q --filter ancestor="$IMAGE_NAME" 2>/dev/null) +if [ -n "$EXISTING" ]; then + echo "Stopping existing breenix-qemu containers..." + docker kill $EXISTING 2>/dev/null || true +fi + # Find the UEFI image UEFI_IMG=$(ls -t "$BREENIX_ROOT/target/release/build/breenix-"*/out/breenix-uefi.img 2>/dev/null | head -1) if [ -z "$UEFI_IMG" ]; then diff --git a/kernel/src/graphics/render_queue.rs b/kernel/src/graphics/render_queue.rs index 5225b69..1a8ccc6 100644 --- a/kernel/src/graphics/render_queue.rs +++ b/kernel/src/graphics/render_queue.rs @@ -222,10 +222,11 @@ pub fn drain_and_render() -> usize { /// Render a batch of bytes to the framebuffer. /// This is where the deep call stack happens, but it's on the render task's stack. fn render_batch(bytes: &[u8]) { - // Use terminal manager if active - use batched write for efficiency + // Use terminal manager if active - use internal (non-flushing) version + // since the render thread handles flushing after drain_and_render() if crate::graphics::terminal_manager::is_terminal_manager_active() { - // write_bytes_to_shell acquires locks once for the entire batch - let _ = crate::graphics::terminal_manager::write_bytes_to_shell(bytes); + // write_bytes_to_shell_internal does NOT flush - render thread flushes once after all batches + let _ = crate::graphics::terminal_manager::write_bytes_to_shell_internal(bytes); return; } diff --git a/kernel/src/graphics/terminal_manager.rs b/kernel/src/graphics/terminal_manager.rs index 546cc3a..ef258ac 100644 --- a/kernel/src/graphics/terminal_manager.rs +++ b/kernel/src/graphics/terminal_manager.rs @@ -334,6 +334,21 @@ impl TerminalManager { } } + /// Write bytes to the shell terminal (batched version for efficient output). + /// + /// This is more efficient than calling write_char_to_shell per character + /// as it hides/shows cursor once for the entire batch. + pub fn write_bytes_to_shell(&mut self, canvas: &mut impl Canvas, bytes: &[u8]) { + if self.active_idx == TerminalId::Shell as usize { + // Hide cursor ONCE at the start + self.terminal_pane.draw_cursor(canvas, false); + // Write all bytes + self.terminal_pane.write_bytes(canvas, bytes); + // Show cursor ONCE at the end + self.terminal_pane.draw_cursor(canvas, self.cursor_visible); + } + } + /// Add a log line to the logs buffer and display if Logs is active. pub fn add_log_line(&mut self, canvas: &mut impl Canvas, line: &str) { // Store in buffer @@ -444,9 +459,29 @@ pub fn write_str_to_shell(s: &str) -> bool { /// Write bytes to the shell terminal (batched version for efficient output). /// /// This is more efficient than calling write_char_to_shell per character -/// as it acquires locks once for the entire buffer. +/// as it acquires locks once for the entire buffer and batches cursor operations. #[allow(dead_code)] pub fn write_bytes_to_shell(bytes: &[u8]) -> bool { + if !write_bytes_to_shell_internal(bytes) { + return false; + } + + // Flush after writing + if let Some(fb) = crate::logger::SHELL_FRAMEBUFFER.get() { + if let Some(mut fb_guard) = fb.try_lock() { + if let Some(db) = fb_guard.double_buffer_mut() { + db.flush_if_dirty(); + } + } + } + true +} + +/// Write bytes to the shell terminal without flushing. +/// +/// Internal version for use by render thread which handles its own flushing. +/// This avoids double-flushing when the render thread batches work. +pub fn write_bytes_to_shell_internal(bytes: &[u8]) -> bool { if IN_TERMINAL_CALL.swap(true, core::sync::atomic::Ordering::SeqCst) { return false; } @@ -457,15 +492,9 @@ pub fn write_bytes_to_shell(bytes: &[u8]) -> bool { let fb = crate::logger::SHELL_FRAMEBUFFER.get()?; let mut fb_guard = fb.try_lock()?; - // Only write to shell if it's the active terminal - if manager.active_idx == 0 { - // Write all bytes at once - manager.terminal_pane.write_bytes(&mut *fb_guard, bytes); - } + // Use the batched method which hides cursor once, writes all, shows cursor once + manager.write_bytes_to_shell(&mut *fb_guard, bytes); - if let Some(db) = fb_guard.double_buffer_mut() { - db.flush_if_dirty(); - } Some(()) })() .is_some(); From 462ee08b193e90780c5cddf8db6d1091db3b9b5b Mon Sep 17 00:00:00 2001 From: Ryan Breen Date: Fri, 23 Jan 2026 08:49:51 -0500 Subject: [PATCH 4/5] perf(graphics): fix critical per-pixel dirty marking in Canvas CRITICAL FIX: The Canvas::set_pixel implementation was calling mark_region_dirty for EVERY SINGLE PIXEL, completely defeating all our batching optimizations. For a character with ~100 drawn pixels: - Before: 100 dirty region operations (with merge logic each time) - After: 1 dirty region operation for the entire glyph Changes: 1. Remove mark_region_dirty from set_pixel in logger.rs - Pixel data still written, just not marked dirty per-pixel 2. Add mark_dirty_region at end of fill_rect in primitives.rs - Marks entire filled rectangle once 3. Add mark_dirty_region at end of draw_glyph in primitives.rs - Marks entire glyph bounding box once This was the root cause of the slow rendering - O(n) dirty marking per character where n = pixels in glyph. Co-Authored-By: Claude Opus 4.5 --- kernel/src/graphics/primitives.rs | 13 +++++++++++++ kernel/src/logger.rs | 5 +++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/kernel/src/graphics/primitives.rs b/kernel/src/graphics/primitives.rs index 5eb8854..97827bb 100644 --- a/kernel/src/graphics/primitives.rs +++ b/kernel/src/graphics/primitives.rs @@ -326,6 +326,14 @@ pub fn fill_rect(canvas: &mut impl Canvas, rect: Rect, color: Color) { chunk.copy_from_slice(&pixel[..bpp]); } } + + // Mark the entire filled rectangle dirty once (not per-pixel) + canvas.mark_dirty_region( + clipped.x as usize, + clipped.y as usize, + clipped.width as usize, + clipped.height as usize, + ); } /// Draw circle outline using midpoint circle algorithm. @@ -510,6 +518,11 @@ fn draw_glyph(canvas: &mut impl Canvas, x: i32, y: i32, glyph: &Glyph, style: &T canvas.set_pixel(px, py, color); } + + // Mark the entire glyph bounding box dirty once (not per-pixel) + if x >= 0 && y >= 0 { + canvas.mark_dirty_region(x as usize, y as usize, glyph.width(), glyph.height()); + } } /// Draw a text string at the specified position. diff --git a/kernel/src/logger.rs b/kernel/src/logger.rs index e3dd075..ce43e24 100644 --- a/kernel/src/logger.rs +++ b/kernel/src/logger.rs @@ -596,8 +596,9 @@ impl Canvas for ShellFrameBuffer { for (i, &byte) in pixel_bytes[..bytes_per_pixel].iter().enumerate() { buffer[byte_offset + i] = byte; } - let x_byte_offset = x * bytes_per_pixel; - db.mark_region_dirty(y, x_byte_offset, x_byte_offset + bytes_per_pixel); + // Note: We do NOT mark dirty here per-pixel. Higher-level functions + // (draw_glyph, fill_rect, etc.) call mark_dirty_region() once for + // the entire operation to avoid O(n) dirty marking overhead. } } else if byte_offset + bytes_per_pixel <= self.buffer_len { unsafe { From 7948de4828d2399e41dde6b248452db4890c88bf Mon Sep 17 00:00:00 2001 From: Ryan Breen Date: Fri, 23 Jan 2026 09:09:29 -0500 Subject: [PATCH 5/5] perf(graphics): optimize terminal switching performance - Change flush_full() to flush() in switch_terminal - only flush dirty regions instead of copying entire 8MB framebuffer - Reduce LOG_BUFFER_SIZE from 200 to 50 lines for faster tab switching - Skip offscreen log lines during replay - only render visible rows, eliminating expensive scroll operations entirely Previously switching to Logs tab would render all 200 lines causing ~160 scroll operations. Now we calculate visible rows and skip lines that would scroll off, reducing to zero scroll operations. Co-Authored-By: Claude Opus 4.5 --- kernel/src/graphics/terminal_manager.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel/src/graphics/terminal_manager.rs b/kernel/src/graphics/terminal_manager.rs index ef258ac..6f25f34 100644 --- a/kernel/src/graphics/terminal_manager.rs +++ b/kernel/src/graphics/terminal_manager.rs @@ -17,7 +17,7 @@ const TAB_HEIGHT: usize = 24; const TAB_PADDING: usize = 12; /// Maximum log lines to keep in buffer -const LOG_BUFFER_SIZE: usize = 200; +const LOG_BUFFER_SIZE: usize = 50; // Reduced from 200 for faster tab switching /// Terminal identifiers #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -151,7 +151,13 @@ impl TerminalManager { } TerminalId::Logs => { // Logs: replay from buffer - for line in self.log_buffer.iter() { + // Optimization: only replay lines that will be visible + // Skip early lines that would scroll off, avoiding expensive scroll operations + let visible_rows = self.terminal_pane.rows(); + let total_lines = self.log_buffer.lines.len(); + let skip_count = total_lines.saturating_sub(visible_rows); + + for line in self.log_buffer.lines.iter().skip(skip_count) { self.terminal_pane.write_str(canvas, line); self.terminal_pane.write_str(canvas, "\r\n"); } @@ -568,7 +574,8 @@ pub fn switch_terminal(id: TerminalId) { manager.switch_to(id, &mut *fb_guard); if let Some(db) = fb_guard.double_buffer_mut() { - db.flush_full(); + // Only flush dirty regions, not entire 8MB buffer + db.flush(); } Some(()) })();