diff --git a/AGENTS.md b/AGENTS.md index ce8d7c0..0a01467 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -28,6 +28,18 @@ Retrieve and consider the comments on the PR, which may have come from GitHub Co Offer to optionally post a brief summary of the review to the PR, via the gh CLI tool. +## Tagged Go releases + +If I ask you whether we are ready to release, this means a tagged Go release on the main branch. Go releases are git tagged with a version number. + +Review the changes since the last release, i.e. the previous git tag. Ensure that the changes are complete and correct. Identify new features, bug fixes, and performance improvements. + +Identify breaking changes, especially API changes. + +Ensure good test coverage. Look for performance changes, especially performance regressions, by running benchmarks against the previous release. + +Ensure that the documentation in READMEs and GoDocs, complete, correct and consistent. + ## Comparisons to go-runewidth We originally attempted to make this package compatible with go-runewidth. diff --git a/README.md b/README.md index 17a3981..ca19e6b 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,16 @@ when calculating the display width. When `false` (default), ANSI escape sequences are treated as just a series of characters. When `true`, they are treated as a single zero-width unit. +#### ControlSequences8Bit + +`ControlSequences8Bit` specifies whether to ignore 8-bit ECMA-48 escape sequences +when calculating the display width. When `false` (default), these are treated +as just a series of characters. When `true`, they are treated as a single +zero-width unit. + +Note: this option is ignored by the `Truncate` methods, as the concatenation +can lead to unintended UTF-8 semantics. + #### EastAsianWidth `EastAsianWidth` defines how diff --git a/fuzz_test.go b/fuzz_test.go index d13775f..34bcda9 100644 --- a/fuzz_test.go +++ b/fuzz_test.go @@ -102,7 +102,10 @@ func FuzzBytesAndString(f *testing.F) { {EastAsianWidth: false}, {EastAsianWidth: true}, {ControlSequences: true}, + {ControlSequences8Bit: true}, + {ControlSequences: true, ControlSequences8Bit: true}, {EastAsianWidth: true, ControlSequences: true}, + {EastAsianWidth: true, ControlSequences8Bit: true}, } for _, option := range options { @@ -282,54 +285,25 @@ func FuzzTruncateStringAndBytes(f *testing.F) { f.Add("\xff\xfe\xfd") // invalid UTF-8 f.Fuzz(func(t *testing.T, text string) { - // Test with default options - ts := TruncateString(text, 10, "...") - - // Invariant: truncated string should be less than or equal to maxWidth - if String(ts) > 10 { - t.Errorf("TruncateString() returned string longer than maxWidth for %q: %q", text, ts) - } - - // Invariant: truncated string should be less than or equal to maxWidth - if len(ts) > len(text) { - t.Errorf("TruncateString() returned string longer than original for %q: %q", text, ts) - } - - tb := TruncateBytes([]byte(text), 10, []byte("...")) - - // Invariant: truncated bytes should be less than or equal to maxWidth - if Bytes(tb) > 10 { - t.Errorf("TruncateBytes() returned bytes longer than maxWidth for %q: %q", text, tb) - } - - // Invariant: truncated bytes should be less than or equal to original - if len(tb) > len(text) { - t.Errorf("TruncateBytes() returned bytes longer than original for %q: %q", text, tb) - } - - if !bytes.Equal(tb, []byte(ts)) { - t.Errorf("TruncateBytes() returned bytes different from TruncateString() for %q: %q != %q", text, tb, ts) - } - - // Test with different options + // Exercise truncation to discover panics and infinite loops. + // Width invariant testing is in proper unit tests. options := []Options{ - {EastAsianWidth: false}, + {}, {EastAsianWidth: true}, {ControlSequences: true}, + {ControlSequences8Bit: true}, + {ControlSequences: true, ControlSequences8Bit: true}, {EastAsianWidth: true, ControlSequences: true}, + {EastAsianWidth: true, ControlSequences8Bit: true}, } for _, option := range options { ts := option.TruncateString(text, 10, "...") - - // Invariant: truncated string should be less than or equal to maxWidth - if option.String(ts) > 10 { - t.Errorf("TruncateString() returned string longer than maxWidth for %q: %q", text, ts) - } - tb := option.TruncateBytes([]byte(text), 10, []byte("...")) + + // Invariant: String and Bytes paths must agree if !bytes.Equal(tb, []byte(ts)) { - t.Errorf("TruncateBytes() returned bytes different from TruncateString() for %q: %q != %q", text, tb, ts) + t.Errorf("TruncateBytes() != TruncateString() with %+v for %q: %q != %q", option, text, tb, ts) } } }) @@ -369,6 +343,21 @@ func FuzzControlSequences(f *testing.F) { f.Add([]byte("δΈ­ζ–‡")) // plain CJK f.Add([]byte("πŸ˜€")) // plain emoji + // Seed with 8-bit C1 escape sequences + f.Add([]byte("\x9B31m")) // C1 CSI red + f.Add([]byte("\x9B0m")) // C1 CSI reset + f.Add([]byte("\x9B1m")) // C1 CSI bold + f.Add([]byte("\x9B31mhello\x9B0m")) // C1 CSI red text + f.Add([]byte("\x9B1m\x9B31mhi\x9B0m")) // C1 nested SGR + f.Add([]byte("hello\x9B31mworld\x9B0m")) // C1 mid-string + f.Add([]byte("\x9B31mδΈ­ζ–‡\x9B0m")) // C1 colored CJK + f.Add([]byte("\x9B31mπŸ˜€\x9B0m")) // C1 colored emoji + f.Add([]byte("\x9D0;Title\x9C")) // C1 OSC with C1 ST + f.Add([]byte("\x9D0;Title\x07")) // C1 OSC with BEL + f.Add([]byte("\x90qpayload\x9C")) // C1 DCS with C1 ST + f.Add([]byte("\x84")) // standalone C1 + f.Add([]byte("\x1b[31mhello\x9B0m")) // mixed 7-bit and 8-bit + // Seed with multi-lingual text file, err := testdata.Sample() if err != nil { @@ -383,7 +372,11 @@ func FuzzControlSequences(f *testing.F) { {}, {EastAsianWidth: true}, {ControlSequences: true}, + {ControlSequences8Bit: true}, + {ControlSequences: true, ControlSequences8Bit: true}, {EastAsianWidth: true, ControlSequences: true}, + {EastAsianWidth: true, ControlSequences8Bit: true}, + {EastAsianWidth: true, ControlSequences: true, ControlSequences8Bit: true}, } f.Fuzz(func(t *testing.T, text []byte) { @@ -432,7 +425,7 @@ func FuzzControlSequences(f *testing.F) { // Invariant: ControlSequences width <= default width // (escape sequences become 0 instead of their visible char widths) - if opt.ControlSequences { + if opt.ControlSequences || opt.ControlSequences8Bit { noIgnore := Options{EastAsianWidth: opt.EastAsianWidth} wDefault := noIgnore.Bytes(text) if wb > wDefault { @@ -440,23 +433,14 @@ func FuzzControlSequences(f *testing.F) { } } - // Invariant: truncation respects maxWidth (accounting for the tail, - // which is always appended and may itself exceed maxWidth) + // Exercise truncation to discover panics and infinite loops. + // Width invariant testing is in proper unit tests. tail := "..." - tailWidth := opt.String(tail) for _, maxWidth := range []int{0, 1, 3, 5, 10, 20} { ts := opt.TruncateString(string(text), maxWidth, tail) - tsWidth := opt.String(ts) - limit := maxWidth - if tailWidth > limit { - limit = tailWidth - } - if tsWidth > limit { - t.Errorf("TruncateString() width %d > max(maxWidth, tailWidth) %d with %+v for %q -> %q", - tsWidth, limit, opt, text, ts) - } - tb := opt.TruncateBytes(text, maxWidth, []byte(tail)) + + // Invariant: String and Bytes paths must agree if !bytes.Equal(tb, []byte(ts)) { t.Errorf("TruncateBytes() != TruncateString() with %+v for %q: %q != %q", opt, text, tb, ts) diff --git a/graphemes.go b/graphemes.go index 2d70c46..14a5278 100644 --- a/graphemes.go +++ b/graphemes.go @@ -45,6 +45,7 @@ func StringGraphemes(s string) Graphemes[string] { func (options Options) StringGraphemes(s string) Graphemes[string] { g := graphemes.FromString(s) g.AnsiEscapeSequences = options.ControlSequences + g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit return Graphemes[string]{iter: g, options: options} } @@ -66,6 +67,7 @@ func BytesGraphemes(s []byte) Graphemes[[]byte] { func (options Options) BytesGraphemes(s []byte) Graphemes[[]byte] { g := graphemes.FromBytes(s) g.AnsiEscapeSequences = options.ControlSequences + g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit return Graphemes[[]byte]{iter: g, options: options} } diff --git a/options.go b/options.go new file mode 100644 index 0000000..b63b585 --- /dev/null +++ b/options.go @@ -0,0 +1,30 @@ +package displaywidth + +// Options allows you to specify the treatment of ambiguous East Asian +// characters and ANSI escape sequences. +type Options struct { + // EastAsianWidth specifies whether to treat ambiguous East Asian characters + // as width 1 or 2. When false (default), ambiguous East Asian characters + // are treated as width 1. When true, they are width 2. + EastAsianWidth bool + + // ControlSequences specifies whether to ignore 7-bit ECMA-48 escape sequences + // when calculating the display width. When false (default), ANSI escape + // sequences are treated as just a series of characters. When true, they are + // treated as a single zero-width unit. + ControlSequences bool + // ControlSequences8Bit specifies whether to ignore 8-bit ECMA-48 escape sequences + // when calculating the display width. When false (default), these are treated + // as just a series of characters. When true, they are treated as a single + // zero-width unit. + ControlSequences8Bit bool +} + +// DefaultOptions is the default options for the display width +// calculation, which is EastAsianWidth false, ControlSequences false, and +// ControlSequences8Bit false. +var DefaultOptions = Options{ + EastAsianWidth: false, + ControlSequences: false, + ControlSequences8Bit: false, +} diff --git a/truncate.go b/truncate.go new file mode 100644 index 0000000..b3e696f --- /dev/null +++ b/truncate.go @@ -0,0 +1,149 @@ +package displaywidth + +import ( + "strings" + + "github.com/clipperhouse/uax29/v2/graphemes" +) + +// TruncateString truncates a string to the given maxWidth, and appends the +// given tail if the string is truncated. +// +// It ensures the visible width, including the width of the tail, is less than or +// equal to maxWidth. +// +// When [Options.ControlSequences] is true, 7-bit ANSI escape sequences that +// appear after the truncation point are preserved in the output. This ensures +// that escape sequences such as SGR resets are not lost, preventing color +// bleed in terminal output. +// +// [Options.ControlSequences8Bit] is ignored by truncation. 8-bit C1 byte values +// (0x80-0x9F) overlap with UTF-8 multi-byte encoding, so manipulating them +// during truncation can shift byte boundaries and form unintended visible +// characters. Use [Options.String] or [Options.Bytes] for 8-bit-aware width +// measurement. +func (options Options) TruncateString(s string, maxWidth int, tail string) string { + // We deliberately ignore ControlSequences8Bit for truncation, see above. + options.ControlSequences8Bit = false + + maxWidthWithoutTail := maxWidth - options.String(tail) + + var pos, total int + g := graphemes.FromString(s) + g.AnsiEscapeSequences = options.ControlSequences + + for g.Next() { + gw := graphemeWidth(g.Value(), options) + if total+gw <= maxWidthWithoutTail { + pos = g.End() + } + total += gw + if total > maxWidth { + if options.ControlSequences { + // Build result with trailing 7-bit ANSI escape sequences preserved + var b strings.Builder + b.Grow(len(s) + len(tail)) // at most original + tail + b.WriteString(s[:pos]) + b.WriteString(tail) + + rem := graphemes.FromString(s[pos:]) + rem.AnsiEscapeSequences = options.ControlSequences + + for rem.Next() { + v := rem.Value() + // Only preserve 7-bit escapes (ESC = 0x1B) that measure + // as zero-width on their own; some sequences (e.g. SOS) + // are only valid in their original context. + if len(v) > 0 && v[0] == 0x1B && options.String(v) == 0 { + b.WriteString(v) + } + } + return b.String() + } + return s[:pos] + tail + } + } + // No truncation + return s +} + +// TruncateString truncates a string to the given maxWidth, and appends the +// given tail if the string is truncated. +// +// It ensures the total width, including the width of the tail, is less than or +// equal to maxWidth. +func TruncateString(s string, maxWidth int, tail string) string { + return DefaultOptions.TruncateString(s, maxWidth, tail) +} + +// TruncateBytes truncates a []byte to the given maxWidth, and appends the +// given tail if the []byte is truncated. +// +// It ensures the visible width, including the width of the tail, is less than or +// equal to maxWidth. +// +// When [Options.ControlSequences] is true, 7-bit ANSI escape sequences that +// appear after the truncation point are preserved in the output. This ensures +// that escape sequences such as SGR resets are not lost, preventing color +// bleed in terminal output. +// +// [Options.ControlSequences8Bit] is ignored by truncation. 8-bit C1 byte values +// (0x80-0x9F) overlap with UTF-8 multi-byte encoding, so manipulating them +// during truncation can shift byte boundaries and form unintended visible +// characters. Use [Options.String] or [Options.Bytes] for 8-bit-aware width +// measurement. +func (options Options) TruncateBytes(s []byte, maxWidth int, tail []byte) []byte { + // We deliberately ignore ControlSequences8Bit for truncation, see above. + options.ControlSequences8Bit = false + + maxWidthWithoutTail := maxWidth - options.Bytes(tail) + + var pos, total int + g := graphemes.FromBytes(s) + g.AnsiEscapeSequences = options.ControlSequences + + for g.Next() { + gw := graphemeWidth(g.Value(), options) + if total+gw <= maxWidthWithoutTail { + pos = g.End() + } + total += gw + if total > maxWidth { + if options.ControlSequences { + // Build result with trailing 7-bit ANSI escape sequences preserved + result := make([]byte, 0, len(s)+len(tail)) // at most original + tail + result = append(result, s[:pos]...) + result = append(result, tail...) + + rem := graphemes.FromBytes(s[pos:]) + rem.AnsiEscapeSequences = options.ControlSequences + + for rem.Next() { + v := rem.Value() + // Only preserve 7-bit escapes (ESC = 0x1B) that measure + // as zero-width on their own; some sequences (e.g. SOS) + // are only valid in their original context. + if len(v) > 0 && v[0] == 0x1B && options.Bytes(v) == 0 { + result = append(result, v...) + } + } + return result + } + result := make([]byte, 0, pos+len(tail)) + result = append(result, s[:pos]...) + result = append(result, tail...) + return result + } + } + // No truncation + return s +} + +// TruncateBytes truncates a []byte to the given maxWidth, and appends the +// given tail if the []byte is truncated. +// +// It ensures the total width, including the width of the tail, is less than or +// equal to maxWidth. +func TruncateBytes(s []byte, maxWidth int, tail []byte) []byte { + return DefaultOptions.TruncateBytes(s, maxWidth, tail) +} diff --git a/width.go b/width.go index 8c183aa..f6e0ab7 100644 --- a/width.go +++ b/width.go @@ -1,35 +1,11 @@ package displaywidth import ( - "strings" "unicode/utf8" "github.com/clipperhouse/uax29/v2/graphemes" ) -// Options allows you to specify the treatment of ambiguous East Asian -// characters and ANSI escape sequences. -type Options struct { - // EastAsianWidth specifies whether to treat ambiguous East Asian characters - // as width 1 or 2. When false (default), ambiguous East Asian characters - // are treated as width 1. When true, they are width 2. - EastAsianWidth bool - - // ControlSequences specifies whether to ignore ECMA-48 escape sequences - // when calculating the display width. When false (default), ANSI escape - // sequences are treated as just a series of characters. When true, they are - // treated as a single zero-width unit. - // - // Note that this option is about *sequences*. Individual control characters - // are already treated as zero-width. With this option, ANSI sequences such as - // "\x1b[31m" and "\x1b[0m" do not count towards the width of a string. - ControlSequences bool -} - -// DefaultOptions is the default options for the display width -// calculation, which is EastAsianWidth false and ControlSequences false. -var DefaultOptions = Options{EastAsianWidth: false, ControlSequences: false} - // String calculates the display width of a string, // by iterating over grapheme clusters in the string // and summing their widths. @@ -55,6 +31,7 @@ func (options Options) String(s string) int { // Not ASCII, use grapheme parsing g := graphemes.FromString(s[pos:]) g.AnsiEscapeSequences = options.ControlSequences + g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit start := pos @@ -105,6 +82,7 @@ func (options Options) Bytes(s []byte) int { // Not ASCII, use grapheme parsing g := graphemes.FromBytes(s[pos:]) g.AnsiEscapeSequences = options.ControlSequences + g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit start := pos @@ -166,128 +144,22 @@ func (options Options) Rune(r rune) int { const _Default property = 0 -// TruncateString truncates a string to the given maxWidth, and appends the -// given tail if the string is truncated. -// -// It ensures the visible width, including the width of the tail, is less than or -// equal to maxWidth. -// -// When [Options.ControlSequences] is true, ANSI escape sequences that appear -// after the truncation point are preserved in the output. This ensures that -// escape sequences such as SGR resets are not lost, preventing color bleed -// in terminal output. -func (options Options) TruncateString(s string, maxWidth int, tail string) string { - maxWidthWithoutTail := maxWidth - options.String(tail) - - var pos, total int - g := graphemes.FromString(s) - g.AnsiEscapeSequences = options.ControlSequences - - for g.Next() { - gw := graphemeWidth(g.Value(), options) - if total+gw <= maxWidthWithoutTail { - pos = g.End() - } - total += gw - if total > maxWidth { - if options.ControlSequences { - // Build result with trailing ANSI escape sequences preserved - var b strings.Builder - b.Grow(len(s) + len(tail)) // at most original + tail - b.WriteString(s[:pos]) - b.WriteString(tail) - rem := graphemes.FromString(s[pos:]) - rem.AnsiEscapeSequences = true - for rem.Next() { - v := rem.Value() - if len(v) > 0 && v[0] == 0x1B { - b.WriteString(v) - } - } - return b.String() - } - return s[:pos] + tail - } - } - // No truncation - return s -} - -// TruncateString truncates a string to the given maxWidth, and appends the -// given tail if the string is truncated. -// -// It ensures the total width, including the width of the tail, is less than or -// equal to maxWidth. -func TruncateString(s string, maxWidth int, tail string) string { - return DefaultOptions.TruncateString(s, maxWidth, tail) -} - -// TruncateBytes truncates a []byte to the given maxWidth, and appends the -// given tail if the []byte is truncated. -// -// It ensures the visible width, including the width of the tail, is less than or -// equal to maxWidth. -// -// When [Options.ControlSequences] is true, ANSI escape sequences that appear -// after the truncation point are preserved in the output. This ensures that -// escape sequences such as SGR resets are not lost, preventing color bleed -// in terminal output. -func (options Options) TruncateBytes(s []byte, maxWidth int, tail []byte) []byte { - maxWidthWithoutTail := maxWidth - options.Bytes(tail) - - var pos, total int - g := graphemes.FromBytes(s) - g.AnsiEscapeSequences = options.ControlSequences - - for g.Next() { - gw := graphemeWidth(g.Value(), options) - if total+gw <= maxWidthWithoutTail { - pos = g.End() - } - total += gw - if total > maxWidth { - if options.ControlSequences { - // Build result with trailing ANSI escape sequences preserved - result := make([]byte, 0, len(s)+len(tail)) // at most original + tail - result = append(result, s[:pos]...) - result = append(result, tail...) - rem := graphemes.FromBytes(s[pos:]) - rem.AnsiEscapeSequences = true - for rem.Next() { - v := rem.Value() - if len(v) > 0 && v[0] == 0x1B { - result = append(result, v...) - } - } - return result - } - result := make([]byte, 0, pos+len(tail)) - result = append(result, s[:pos]...) - result = append(result, tail...) - return result - } - } - // No truncation - return s -} - -// TruncateBytes truncates a []byte to the given maxWidth, and appends the -// given tail if the []byte is truncated. -// -// It ensures the total width, including the width of the tail, is less than or -// equal to maxWidth. -func TruncateBytes(s []byte, maxWidth int, tail []byte) []byte { - return DefaultOptions.TruncateBytes(s, maxWidth, tail) -} - // graphemeWidth returns the display width of a grapheme cluster. // The passed string must be a single grapheme cluster. func graphemeWidth[T ~string | []byte](s T, options Options) int { - // Optimization: no need to look up properties - switch len(s) { - case 0: + if len(s) == 0 { return 0 - case 1: + } + + // C1 controls (0x80-0x9F) are zero-width when 8-bit control sequences + // are enabled. This must be checked before the single-byte optimization + // below, which would otherwise return width 1 for these bytes. + if options.ControlSequences8Bit && s[0] >= 0x80 && s[0] <= 0x9F { + return 0 + } + + // Optimization: single-byte graphemes need no property lookup + if len(s) == 1 { return asciiWidth(s[0]) } diff --git a/width_test.go b/width_test.go index ce0db62..bcf5a9d 100644 --- a/width_test.go +++ b/width_test.go @@ -106,6 +106,8 @@ func TestStringWidth(t *testing.T) { } var controlSequences = Options{ControlSequences: true} +var controlSequences8Bit = Options{ControlSequences8Bit: true} +var controlSequencesBoth = Options{ControlSequences: true, ControlSequences8Bit: true} func TestAnsiEscapeSequences(t *testing.T) { tests := []struct { @@ -168,6 +170,174 @@ func TestAnsiEscapeSequences(t *testing.T) { } } +func TestAnsiEscapeSequences8Bit(t *testing.T) { + tests := []struct { + name string + input string + options Options + expected int + }{ + // 8-bit C1 CSI sequences should be zero width + {"C1 CSI red", "\x9B31m", controlSequences8Bit, 0}, + {"C1 CSI reset", "\x9B0m", controlSequences8Bit, 0}, + {"C1 CSI bold", "\x9B1m", controlSequences8Bit, 0}, + {"C1 CSI multi-param", "\x9B1;2;3m", controlSequences8Bit, 0}, + {"C1 CSI cursor up", "\x9BA", controlSequences8Bit, 0}, + + // 8-bit C1 OSC/DCS/SOS/APC with C1 ST terminator + {"C1 OSC with ST", "\x9D0;Title\x9C", controlSequences8Bit, 0}, + {"C1 OSC with BEL", "\x9D0;Title\x07", controlSequences8Bit, 0}, + {"C1 DCS with ST", "\x90qpayload\x9C", controlSequences8Bit, 0}, + {"C1 SOS with ST", "\x98hello\x9C", controlSequences8Bit, 0}, + {"C1 APC with ST", "\x9Fdata\x9C", controlSequences8Bit, 0}, + + // Standalone C1 controls (single byte, no body) + {"C1 IND", "\x84", controlSequences8Bit, 0}, + {"C1 NEL", "\x85", controlSequences8Bit, 0}, + + // 8-bit sequences mixed with visible text + {"C1 CSI red hello", "\x9B31mhello\x9B0m", controlSequences8Bit, 5}, + {"C1 CSI colored CJK", "\x9B31mδΈ­ζ–‡\x9B0m", controlSequences8Bit, 4}, + {"C1 CSI colored emoji", "\x9B31mπŸ˜€\x9B0m", controlSequences8Bit, 2}, + {"C1 CSI nested", "\x9B1m\x9B31mhi\x9B0m", controlSequences8Bit, 2}, + + // Without ControlSequences8Bit, C1 bytes have width per asciiWidth (1 for >= 0x80) + {"C1 CSI default options", "\x9B31m", defaultOptions, 4}, + + // 8-bit option should not regress plain text + {"plain ASCII with 8-bit option", "hello", controlSequences8Bit, 5}, + {"CJK with 8-bit option", "δΈ­ζ–‡", controlSequences8Bit, 4}, + {"emoji with 8-bit option", "πŸ˜€", controlSequences8Bit, 2}, + {"empty with 8-bit option", "", controlSequences8Bit, 0}, + + // Both options enabled + {"both: 7-bit SGR", "\x1b[31mhello\x1b[0m", controlSequencesBoth, 5}, + {"both: 8-bit CSI", "\x9B31mhello\x9B0m", controlSequencesBoth, 5}, + {"both: mixed 7 and 8-bit", "\x1b[31mhello\x9B0m", controlSequencesBoth, 5}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := tt.options.String(tt.input) + if result != tt.expected { + t.Errorf("String(%q) = %d, want %d", tt.input, result, tt.expected) + } + + result = tt.options.Bytes([]byte(tt.input)) + if result != tt.expected { + t.Errorf("Bytes(%q) = %d, want %d", tt.input, result, tt.expected) + } + }) + } +} + +// TestAnsiEscapeSequencesIndependence verifies that the 7-bit and 8-bit options +// are strictly independent: enabling one must NOT cause the other's sequences +// to be treated as escape sequences. +func TestAnsiEscapeSequencesIndependence(t *testing.T) { + tests := []struct { + name string + input string + options Options + expected int + desc string + }{ + // 7-bit only: C1 bytes must NOT be treated as escape sequences. + // \x9B31m is 4 visible chars (0x9B has width 1, '3' '1' 'm' each width 1) + { + name: "7-bit on, 8-bit input C1 CSI", + input: "\x9B31m", + options: controlSequences, + expected: 4, + desc: "C1 CSI should not be recognized when only 7-bit is enabled", + }, + { + name: "7-bit on, 8-bit input standalone C1", + input: "\x84", + options: controlSequences, + expected: 1, + desc: "Standalone C1 byte should have width 1 when only 7-bit is enabled", + }, + { + name: "7-bit on, 8-bit input C1 with text", + input: "\x9B31mhello\x9B0m", + options: controlSequences, + expected: 4 + 5 + 3, + desc: "C1 CSI sequences should contribute visible width when only 7-bit is enabled", + }, + + // 8-bit only: 7-bit ESC sequences must NOT be treated as escape sequences. + // \x1b[31m is: ESC (width 0) + '[' (1) + '3' (1) + '1' (1) + 'm' (1) = 4 + { + name: "8-bit on, 7-bit input SGR", + input: "\x1b[31m", + options: controlSequences8Bit, + expected: 4, + desc: "7-bit SGR should not be recognized when only 8-bit is enabled", + }, + { + name: "8-bit on, 7-bit input SGR with text", + input: "\x1b[31mhello\x1b[0m", + options: controlSequences8Bit, + expected: 4 + 5 + 3, + desc: "7-bit SGR should contribute visible width when only 8-bit is enabled", + }, + + // Both enabled: both kinds should be zero-width + { + name: "both on, 7-bit SGR", + input: "\x1b[31m", + options: controlSequencesBoth, + expected: 0, + desc: "7-bit SGR should be zero-width when both are enabled", + }, + { + name: "both on, 8-bit CSI", + input: "\x9B31m", + options: controlSequencesBoth, + expected: 0, + desc: "C1 CSI should be zero-width when both are enabled", + }, + { + name: "both on, mixed sequences with text", + input: "\x1b[31mhello\x9B0m", + options: controlSequencesBoth, + expected: 5, + desc: "Mixed 7-bit and 8-bit sequences should both be zero-width", + }, + + // Neither enabled: both kinds contribute visible width + { + name: "neither, 7-bit SGR", + input: "\x1b[31m", + options: defaultOptions, + expected: 4, + desc: "7-bit SGR should contribute visible width when neither is enabled", + }, + { + name: "neither, 8-bit CSI", + input: "\x9B31m", + options: defaultOptions, + expected: 4, + desc: "C1 CSI should contribute visible width when neither is enabled", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := tt.options.String(tt.input) + if result != tt.expected { + t.Errorf("String(%q) = %d, want %d (%s)", tt.input, result, tt.expected, tt.desc) + } + + result = tt.options.Bytes([]byte(tt.input)) + if result != tt.expected { + t.Errorf("Bytes(%q) = %d, want %d (%s)", tt.input, result, tt.expected, tt.desc) + } + }) + } +} + func TestRuneWidth(t *testing.T) { tests := []struct { name string @@ -900,6 +1070,19 @@ func TestGraphemesControlSequences(t *testing.T) { // Default options: sum of grapheme widths must still match String/Bytes {"default ANSI wrapped", "\x1b[31mhello\x1b[0m", defaultOptions}, {"default plain", "hello", defaultOptions}, + // 8-bit ControlSequences: C1 sequences are one zero-width grapheme each + {"8-bit C1 CSI wrapped", "\x9B31mhello\x9B0m", controlSequences8Bit}, + {"8-bit C1 CSI only", "\x9B0m", controlSequences8Bit}, + {"8-bit plain text", "hi", controlSequences8Bit}, + {"8-bit C1 CSI mid", "a\x9B31mb\x9B0mc", controlSequences8Bit}, + // Both options: both 7-bit and 8-bit sequences are zero-width graphemes + {"both: mixed", "\x1b[31mhello\x9B0m", controlSequencesBoth}, + {"both: 7-bit only input", "\x1b[31mhi\x1b[0m", controlSequencesBoth}, + {"both: 8-bit only input", "\x9B31mhi\x9B0m", controlSequencesBoth}, + // Independence: 7-bit on but 8-bit input β€” graphemes must still sum correctly + {"7-bit on, 8-bit input", "\x9B31mhello\x9B0m", controlSequences}, + // Independence: 8-bit on but 7-bit input + {"8-bit on, 7-bit input", "\x1b[31mhello\x1b[0m", controlSequences8Bit}, } for _, tt := range tests { @@ -1055,6 +1238,30 @@ func TestTruncateString(t *testing.T) { // Multiple colors: all trailing escapes preserved {"ControlSequences multi color", "a\x1b[31mb\x1b[32mc\x1b[33md\x1b[0m", 2, "...", controlSequences, "...\x1b[31m\x1b[32m\x1b[33m\x1b[0m"}, + // 8-bit ControlSequences8Bit is ignored by truncation entirely. The + // grapheme parser is not told about 8-bit, so C1 sequence parameters + // (e.g. "31m" after \x9B) are treated as visible characters. This is + // intentional: 8-bit C1 bytes (0x80-0x9F) overlap with UTF-8 multi-byte + // encoding, making them unsafe to manipulate during truncation. + {"8-bit plain no truncation", "hello", 5, "...", controlSequences8Bit, "hello"}, + {"8-bit C1 CSI wrapped truncate", "\x9B31mhello\x9B0m", 8, "...", controlSequences8Bit, "\x9B31mh..."}, + {"8-bit C1 CSI wrapped truncate narrow", "\x9B31mhello\x9B0m", 4, "...", controlSequences8Bit, "\x9B..."}, + {"8-bit C1 CSI in middle truncate", "hello\x9B31mworld", 5, "...", controlSequences8Bit, "he..."}, + {"8-bit C1 CSI CJK truncate", "\x9B31mδΈ­ζ–‡\x9B0m", 2, "...", controlSequences8Bit, "..."}, + {"8-bit C1 CSI no trailing escape", "\x9B31mhello", 4, "...", controlSequences8Bit, "\x9B..."}, + {"8-bit C1 stacked SGR", "\x9B31m\x9B42mhello\x9B0m", 4, "...", controlSequences8Bit, "\x9B..."}, + + // 7-bit only must NOT preserve trailing C1 sequences. + // With 7-bit only, \x9B is a regular character (width 1), so the input + // "hello\x9B0m" has visible width 8. Trailing \x9B0m is not preserved. + {"7-bit only ignores trailing C1", "hello\x9B0m", 5, "...", controlSequences, "he..."}, + + // Both enabled: only 7-bit trailing escapes are preserved; 8-bit is + // ignored by truncation, so C1 parameters are visible characters. + {"both: mixed trailing escapes", "\x1b[31mhello\x9B0m", 4, "...", controlSequencesBoth, "\x1b[31mh..."}, + {"both: 7-bit wrapped truncate", "\x1b[31mhello\x1b[0m", 4, "...", controlSequencesBoth, "\x1b[31mh...\x1b[0m"}, + {"both: 8-bit wrapped truncate", "\x9B31mhello\x9B0m", 4, "...", controlSequencesBoth, "\x9B..."}, + // East Asian Width option {"ambiguous EAW fits", "β˜…", 2, "...", eawOptions, "β˜…"}, {"ambiguous EAW truncate", "β˜…", 1, "...", eawOptions, "..."}, @@ -1609,3 +1816,93 @@ func TestUnicode16IndicConjunctBreak(t *testing.T) { }) } } + +func TestReproduceFuzzTruncate(t *testing.T) { + // Regression test: \x1bX (ESC X = SOS) is segmented as one grapheme in the + // full input but as two separate graphemes (\x1b + X) in the truncated + // result, causing the preserved escape sequence to add visible width. + text := "00000000000\x1bX\x18" + options := []Options{ + {EastAsianWidth: false}, + {EastAsianWidth: true}, + {ControlSequences: true}, + {EastAsianWidth: true, ControlSequences: true}, + } + + for _, opt := range options { + ts := opt.TruncateString(text, 10, "...") + w := opt.String(ts) + if w > 10 { + t.Errorf("TruncateString() returned string longer than maxWidth for %q with opts %+v: %q (width %d)", text, opt, ts, w) + } + + tb := opt.TruncateBytes([]byte(text), 10, []byte("...")) + if !bytes.Equal(tb, []byte(ts)) { + t.Errorf("TruncateBytes() != TruncateString() for %q with opts %+v: %q != %q", text, opt, tb, ts) + } + } +} + +func TestTruncateIgnores8Bit(t *testing.T) { + // Truncation ignores ControlSequences8Bit entirely (see GoDoc). + // This means the truncation result, when measured with 8-bit-aware + // String(), may exceed maxWidth. This is the documented tradeoff: + // 8-bit C1 bytes (0x80-0x9F) overlap with UTF-8 multi-byte encoding, + // so manipulating them during truncation is unsafe. + // + // These tests verify that truncation is self-consistent: the result + // measured WITHOUT 8-bit should respect maxWidth. + + cases := []struct { + name string + text string + }{ + { + // Byte recombination: the grapheme parser with 8-bit groups + // \x9f\xcf as one escape (APC + payload). Without 8-bit, \xcf + // and \x90 can recombine into U+03D0 (ϐ, width 1). + name: "byte recombination", + text: "000000000000000000000\x9f\xcf\x1a\x90", + }, + { + // SOS terminator mismatch: with 8-bit, \x9c is ST (terminates + // the 7-bit SOS started by \x1bX). Without 8-bit, \x9c is not + // recognized as ST, so SOS consumes more of the string. + name: "SOS terminator mismatch", + text: "00\x98\x1bX\x9c0000000000\x18", + }, + } + + options := []Options{ + {ControlSequences8Bit: true}, + {ControlSequences: true, ControlSequences8Bit: true}, + {EastAsianWidth: true, ControlSequences8Bit: true}, + } + + for _, tc := range cases { + for _, opt := range options { + // Truncation ignores 8-bit, so measure with the same view + measureOpt := opt + measureOpt.ControlSequences8Bit = false + + ts := opt.TruncateString(tc.text, 10, "...") + w := measureOpt.String(ts) + if w > 10 { + t.Errorf("%s: TruncateString() width %d > 10 (measured without 8-bit) for %q with opts %+v: %q", + tc.name, w, tc.text, opt, ts) + } + + tb := opt.TruncateBytes([]byte(tc.text), 10, []byte("...")) + bw := measureOpt.Bytes(tb) + if bw > 10 { + t.Errorf("%s: TruncateBytes() width %d > 10 (measured without 8-bit) for %q with opts %+v: %q", + tc.name, bw, tc.text, opt, tb) + } + + if !bytes.Equal(tb, []byte(ts)) { + t.Errorf("%s: TruncateBytes() != TruncateString() for %q with opts %+v: %q != %q", + tc.name, tc.text, opt, tb, ts) + } + } + } +}