From 0553d8a84bae929e5752e72e0a8ddcdb05176c7e Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Mon, 16 Feb 2026 12:15:26 -0500 Subject: [PATCH 1/7] adopt 8-bit ANSI option --- graphemes.go | 2 ++ width.go | 54 +++++++++++++++++++++++++++++++++++++++------------ width_test.go | 26 +++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 12 deletions(-) diff --git a/graphemes.go b/graphemes.go index 2d70c46..14a5278 100644 --- a/graphemes.go +++ b/graphemes.go @@ -45,6 +45,7 @@ func StringGraphemes(s string) Graphemes[string] { func (options Options) StringGraphemes(s string) Graphemes[string] { g := graphemes.FromString(s) g.AnsiEscapeSequences = options.ControlSequences + g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit return Graphemes[string]{iter: g, options: options} } @@ -66,6 +67,7 @@ func BytesGraphemes(s []byte) Graphemes[[]byte] { func (options Options) BytesGraphemes(s []byte) Graphemes[[]byte] { g := graphemes.FromBytes(s) g.AnsiEscapeSequences = options.ControlSequences + g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit return Graphemes[[]byte]{iter: g, options: options} } diff --git a/width.go b/width.go index 8c183aa..1e74e39 100644 --- a/width.go +++ b/width.go @@ -15,20 +15,25 @@ type Options struct { // are treated as width 1. When true, they are width 2. EastAsianWidth bool - // ControlSequences specifies whether to ignore ECMA-48 escape sequences + // ControlSequences specifies whether to ignore 7-bit ECMA-48 escape sequences // when calculating the display width. When false (default), ANSI escape // sequences are treated as just a series of characters. When true, they are // treated as a single zero-width unit. - // - // Note that this option is about *sequences*. Individual control characters - // are already treated as zero-width. With this option, ANSI sequences such as - // "\x1b[31m" and "\x1b[0m" do not count towards the width of a string. ControlSequences bool + // ControlSequences8Bit specifies whether to ignore 8-bit ECMA-48 escape sequences + // when calculating the display width. When false (default), these are treated + // as just a series of characters. When true, they are treated as a single + // zero-width unit. + ControlSequences8Bit bool } // DefaultOptions is the default options for the display width // calculation, which is EastAsianWidth false and ControlSequences false. -var DefaultOptions = Options{EastAsianWidth: false, ControlSequences: false} +var DefaultOptions = Options{ + EastAsianWidth: false, + ControlSequences: false, + ControlSequences8Bit: false, +} // String calculates the display width of a string, // by iterating over grapheme clusters in the string @@ -55,6 +60,7 @@ func (options Options) String(s string) int { // Not ASCII, use grapheme parsing g := graphemes.FromString(s[pos:]) g.AnsiEscapeSequences = options.ControlSequences + g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit start := pos @@ -105,6 +111,7 @@ func (options Options) Bytes(s []byte) int { // Not ASCII, use grapheme parsing g := graphemes.FromBytes(s[pos:]) g.AnsiEscapeSequences = options.ControlSequences + g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit start := pos @@ -182,6 +189,7 @@ func (options Options) TruncateString(s string, maxWidth int, tail string) strin var pos, total int g := graphemes.FromString(s) g.AnsiEscapeSequences = options.ControlSequences + g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit for g.Next() { gw := graphemeWidth(g.Value(), options) @@ -190,17 +198,23 @@ func (options Options) TruncateString(s string, maxWidth int, tail string) strin } total += gw if total > maxWidth { - if options.ControlSequences { + if options.ControlSequences || options.ControlSequences8Bit { // Build result with trailing ANSI escape sequences preserved var b strings.Builder b.Grow(len(s) + len(tail)) // at most original + tail b.WriteString(s[:pos]) b.WriteString(tail) + rem := graphemes.FromString(s[pos:]) - rem.AnsiEscapeSequences = true + rem.AnsiEscapeSequences = options.ControlSequences + rem.AnsiEscapeSequences8Bit = options.ControlSequences8Bit + for rem.Next() { v := rem.Value() - if len(v) > 0 && v[0] == 0x1B { + // Only preserve escapes that measure as zero-width + // on their own; some sequences (e.g. SOS) are only + // valid in their original context. + if len(v) > 0 && isEscapeLeader(v[0], options) && options.String(v) == 0 { b.WriteString(v) } } @@ -238,6 +252,7 @@ func (options Options) TruncateBytes(s []byte, maxWidth int, tail []byte) []byte var pos, total int g := graphemes.FromBytes(s) g.AnsiEscapeSequences = options.ControlSequences + g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit for g.Next() { gw := graphemeWidth(g.Value(), options) @@ -246,16 +261,22 @@ func (options Options) TruncateBytes(s []byte, maxWidth int, tail []byte) []byte } total += gw if total > maxWidth { - if options.ControlSequences { + if options.ControlSequences || options.ControlSequences8Bit { // Build result with trailing ANSI escape sequences preserved result := make([]byte, 0, len(s)+len(tail)) // at most original + tail result = append(result, s[:pos]...) result = append(result, tail...) + rem := graphemes.FromBytes(s[pos:]) - rem.AnsiEscapeSequences = true + rem.AnsiEscapeSequences = options.ControlSequences + rem.AnsiEscapeSequences8Bit = options.ControlSequences8Bit + for rem.Next() { v := rem.Value() - if len(v) > 0 && v[0] == 0x1B { + // Only preserve escapes that measure as zero-width + // on their own; some sequences (e.g. SOS) are only + // valid in their original context. + if len(v) > 0 && isEscapeLeader(v[0], options) && options.Bytes(v) == 0 { result = append(result, v...) } } @@ -280,6 +301,15 @@ func TruncateBytes(s []byte, maxWidth int, tail []byte) []byte { return DefaultOptions.TruncateBytes(s, maxWidth, tail) } +// isEscapeLeader reports whether the byte is the leading byte of an +// escape sequence that is active for the given options: 7-bit ESC (0x1B) +// when ControlSequences is true, or 8-bit C1 (0x80-0x9F) when +// ControlSequences8Bit is true. +func isEscapeLeader(b byte, options Options) bool { + return (options.ControlSequences && b == 0x1B) || + (options.ControlSequences8Bit && b >= 0x80 && b <= 0x9F) +} + // graphemeWidth returns the display width of a grapheme cluster. // The passed string must be a single grapheme cluster. func graphemeWidth[T ~string | []byte](s T, options Options) int { diff --git a/width_test.go b/width_test.go index ce0db62..f3d49b1 100644 --- a/width_test.go +++ b/width_test.go @@ -1609,3 +1609,29 @@ func TestUnicode16IndicConjunctBreak(t *testing.T) { }) } } + +func TestReproduceFuzzTruncate(t *testing.T) { + // Regression test: \x1bX (ESC X = SOS) is segmented as one grapheme in the + // full input but as two separate graphemes (\x1b + X) in the truncated + // result, causing the preserved escape sequence to add visible width. + text := "00000000000\x1bX\x18" + options := []Options{ + {EastAsianWidth: false}, + {EastAsianWidth: true}, + {ControlSequences: true}, + {EastAsianWidth: true, ControlSequences: true}, + } + + for _, opt := range options { + ts := opt.TruncateString(text, 10, "...") + w := opt.String(ts) + if w > 10 { + t.Errorf("TruncateString() returned string longer than maxWidth for %q with opts %+v: %q (width %d)", text, opt, ts, w) + } + + tb := opt.TruncateBytes([]byte(text), 10, []byte("...")) + if !bytes.Equal(tb, []byte(ts)) { + t.Errorf("TruncateBytes() != TruncateString() for %q with opts %+v: %q != %q", text, opt, tb, ts) + } + } +} From 84881c829c3ab7ce305301b8c4411ecb5ba074f6 Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Mon, 16 Feb 2026 12:16:20 -0500 Subject: [PATCH 2/7] move truncate to file --- truncate.go | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++++ width.go | 138 ------------------------------------------------- 2 files changed, 144 insertions(+), 138 deletions(-) create mode 100644 truncate.go diff --git a/truncate.go b/truncate.go new file mode 100644 index 0000000..6360c20 --- /dev/null +++ b/truncate.go @@ -0,0 +1,144 @@ +package displaywidth + +import ( + "strings" + + "github.com/clipperhouse/uax29/v2/graphemes" +) + +// TruncateString truncates a string to the given maxWidth, and appends the +// given tail if the string is truncated. +// +// It ensures the visible width, including the width of the tail, is less than or +// equal to maxWidth. +// +// When [Options.ControlSequences] is true, ANSI escape sequences that appear +// after the truncation point are preserved in the output. This ensures that +// escape sequences such as SGR resets are not lost, preventing color bleed +// in terminal output. +func (options Options) TruncateString(s string, maxWidth int, tail string) string { + maxWidthWithoutTail := maxWidth - options.String(tail) + + var pos, total int + g := graphemes.FromString(s) + g.AnsiEscapeSequences = options.ControlSequences + g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit + + for g.Next() { + gw := graphemeWidth(g.Value(), options) + if total+gw <= maxWidthWithoutTail { + pos = g.End() + } + total += gw + if total > maxWidth { + if options.ControlSequences || options.ControlSequences8Bit { + // Build result with trailing ANSI escape sequences preserved + var b strings.Builder + b.Grow(len(s) + len(tail)) // at most original + tail + b.WriteString(s[:pos]) + b.WriteString(tail) + + rem := graphemes.FromString(s[pos:]) + rem.AnsiEscapeSequences = options.ControlSequences + rem.AnsiEscapeSequences8Bit = options.ControlSequences8Bit + + for rem.Next() { + v := rem.Value() + // Only preserve escapes that measure as zero-width + // on their own; some sequences (e.g. SOS) are only + // valid in their original context. + if len(v) > 0 && isEscapeLeader(v[0], options) && options.String(v) == 0 { + b.WriteString(v) + } + } + return b.String() + } + return s[:pos] + tail + } + } + // No truncation + return s +} + +// TruncateString truncates a string to the given maxWidth, and appends the +// given tail if the string is truncated. +// +// It ensures the total width, including the width of the tail, is less than or +// equal to maxWidth. +func TruncateString(s string, maxWidth int, tail string) string { + return DefaultOptions.TruncateString(s, maxWidth, tail) +} + +// TruncateBytes truncates a []byte to the given maxWidth, and appends the +// given tail if the []byte is truncated. +// +// It ensures the visible width, including the width of the tail, is less than or +// equal to maxWidth. +// +// When [Options.ControlSequences] is true, ANSI escape sequences that appear +// after the truncation point are preserved in the output. This ensures that +// escape sequences such as SGR resets are not lost, preventing color bleed +// in terminal output. +func (options Options) TruncateBytes(s []byte, maxWidth int, tail []byte) []byte { + maxWidthWithoutTail := maxWidth - options.Bytes(tail) + + var pos, total int + g := graphemes.FromBytes(s) + g.AnsiEscapeSequences = options.ControlSequences + g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit + + for g.Next() { + gw := graphemeWidth(g.Value(), options) + if total+gw <= maxWidthWithoutTail { + pos = g.End() + } + total += gw + if total > maxWidth { + if options.ControlSequences || options.ControlSequences8Bit { + // Build result with trailing ANSI escape sequences preserved + result := make([]byte, 0, len(s)+len(tail)) // at most original + tail + result = append(result, s[:pos]...) + result = append(result, tail...) + + rem := graphemes.FromBytes(s[pos:]) + rem.AnsiEscapeSequences = options.ControlSequences + rem.AnsiEscapeSequences8Bit = options.ControlSequences8Bit + + for rem.Next() { + v := rem.Value() + // Only preserve escapes that measure as zero-width + // on their own; some sequences (e.g. SOS) are only + // valid in their original context. + if len(v) > 0 && isEscapeLeader(v[0], options) && options.Bytes(v) == 0 { + result = append(result, v...) + } + } + return result + } + result := make([]byte, 0, pos+len(tail)) + result = append(result, s[:pos]...) + result = append(result, tail...) + return result + } + } + // No truncation + return s +} + +// TruncateBytes truncates a []byte to the given maxWidth, and appends the +// given tail if the []byte is truncated. +// +// It ensures the total width, including the width of the tail, is less than or +// equal to maxWidth. +func TruncateBytes(s []byte, maxWidth int, tail []byte) []byte { + return DefaultOptions.TruncateBytes(s, maxWidth, tail) +} + +// isEscapeLeader reports whether the byte is the leading byte of an +// escape sequence that is active for the given options: 7-bit ESC (0x1B) +// when ControlSequences is true, or 8-bit C1 (0x80-0x9F) when +// ControlSequences8Bit is true. +func isEscapeLeader(b byte, options Options) bool { + return (options.ControlSequences && b == 0x1B) || + (options.ControlSequences8Bit && b >= 0x80 && b <= 0x9F) +} diff --git a/width.go b/width.go index 1e74e39..0955fd3 100644 --- a/width.go +++ b/width.go @@ -1,7 +1,6 @@ package displaywidth import ( - "strings" "unicode/utf8" "github.com/clipperhouse/uax29/v2/graphemes" @@ -173,143 +172,6 @@ func (options Options) Rune(r rune) int { const _Default property = 0 -// TruncateString truncates a string to the given maxWidth, and appends the -// given tail if the string is truncated. -// -// It ensures the visible width, including the width of the tail, is less than or -// equal to maxWidth. -// -// When [Options.ControlSequences] is true, ANSI escape sequences that appear -// after the truncation point are preserved in the output. This ensures that -// escape sequences such as SGR resets are not lost, preventing color bleed -// in terminal output. -func (options Options) TruncateString(s string, maxWidth int, tail string) string { - maxWidthWithoutTail := maxWidth - options.String(tail) - - var pos, total int - g := graphemes.FromString(s) - g.AnsiEscapeSequences = options.ControlSequences - g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit - - for g.Next() { - gw := graphemeWidth(g.Value(), options) - if total+gw <= maxWidthWithoutTail { - pos = g.End() - } - total += gw - if total > maxWidth { - if options.ControlSequences || options.ControlSequences8Bit { - // Build result with trailing ANSI escape sequences preserved - var b strings.Builder - b.Grow(len(s) + len(tail)) // at most original + tail - b.WriteString(s[:pos]) - b.WriteString(tail) - - rem := graphemes.FromString(s[pos:]) - rem.AnsiEscapeSequences = options.ControlSequences - rem.AnsiEscapeSequences8Bit = options.ControlSequences8Bit - - for rem.Next() { - v := rem.Value() - // Only preserve escapes that measure as zero-width - // on their own; some sequences (e.g. SOS) are only - // valid in their original context. - if len(v) > 0 && isEscapeLeader(v[0], options) && options.String(v) == 0 { - b.WriteString(v) - } - } - return b.String() - } - return s[:pos] + tail - } - } - // No truncation - return s -} - -// TruncateString truncates a string to the given maxWidth, and appends the -// given tail if the string is truncated. -// -// It ensures the total width, including the width of the tail, is less than or -// equal to maxWidth. -func TruncateString(s string, maxWidth int, tail string) string { - return DefaultOptions.TruncateString(s, maxWidth, tail) -} - -// TruncateBytes truncates a []byte to the given maxWidth, and appends the -// given tail if the []byte is truncated. -// -// It ensures the visible width, including the width of the tail, is less than or -// equal to maxWidth. -// -// When [Options.ControlSequences] is true, ANSI escape sequences that appear -// after the truncation point are preserved in the output. This ensures that -// escape sequences such as SGR resets are not lost, preventing color bleed -// in terminal output. -func (options Options) TruncateBytes(s []byte, maxWidth int, tail []byte) []byte { - maxWidthWithoutTail := maxWidth - options.Bytes(tail) - - var pos, total int - g := graphemes.FromBytes(s) - g.AnsiEscapeSequences = options.ControlSequences - g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit - - for g.Next() { - gw := graphemeWidth(g.Value(), options) - if total+gw <= maxWidthWithoutTail { - pos = g.End() - } - total += gw - if total > maxWidth { - if options.ControlSequences || options.ControlSequences8Bit { - // Build result with trailing ANSI escape sequences preserved - result := make([]byte, 0, len(s)+len(tail)) // at most original + tail - result = append(result, s[:pos]...) - result = append(result, tail...) - - rem := graphemes.FromBytes(s[pos:]) - rem.AnsiEscapeSequences = options.ControlSequences - rem.AnsiEscapeSequences8Bit = options.ControlSequences8Bit - - for rem.Next() { - v := rem.Value() - // Only preserve escapes that measure as zero-width - // on their own; some sequences (e.g. SOS) are only - // valid in their original context. - if len(v) > 0 && isEscapeLeader(v[0], options) && options.Bytes(v) == 0 { - result = append(result, v...) - } - } - return result - } - result := make([]byte, 0, pos+len(tail)) - result = append(result, s[:pos]...) - result = append(result, tail...) - return result - } - } - // No truncation - return s -} - -// TruncateBytes truncates a []byte to the given maxWidth, and appends the -// given tail if the []byte is truncated. -// -// It ensures the total width, including the width of the tail, is less than or -// equal to maxWidth. -func TruncateBytes(s []byte, maxWidth int, tail []byte) []byte { - return DefaultOptions.TruncateBytes(s, maxWidth, tail) -} - -// isEscapeLeader reports whether the byte is the leading byte of an -// escape sequence that is active for the given options: 7-bit ESC (0x1B) -// when ControlSequences is true, or 8-bit C1 (0x80-0x9F) when -// ControlSequences8Bit is true. -func isEscapeLeader(b byte, options Options) bool { - return (options.ControlSequences && b == 0x1B) || - (options.ControlSequences8Bit && b >= 0x80 && b <= 0x9F) -} - // graphemeWidth returns the display width of a grapheme cluster. // The passed string must be a single grapheme cluster. func graphemeWidth[T ~string | []byte](s T, options Options) int { From 544782c05e48c9a5f2978663915e202f1f2900f1 Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Mon, 16 Feb 2026 12:16:57 -0500 Subject: [PATCH 3/7] move options to file --- options.go | 29 +++++++++++++++++++++++++++++ width.go | 28 ---------------------------- 2 files changed, 29 insertions(+), 28 deletions(-) create mode 100644 options.go diff --git a/options.go b/options.go new file mode 100644 index 0000000..7ca7551 --- /dev/null +++ b/options.go @@ -0,0 +1,29 @@ +package displaywidth + +// Options allows you to specify the treatment of ambiguous East Asian +// characters and ANSI escape sequences. +type Options struct { + // EastAsianWidth specifies whether to treat ambiguous East Asian characters + // as width 1 or 2. When false (default), ambiguous East Asian characters + // are treated as width 1. When true, they are width 2. + EastAsianWidth bool + + // ControlSequences specifies whether to ignore 7-bit ECMA-48 escape sequences + // when calculating the display width. When false (default), ANSI escape + // sequences are treated as just a series of characters. When true, they are + // treated as a single zero-width unit. + ControlSequences bool + // ControlSequences8Bit specifies whether to ignore 8-bit ECMA-48 escape sequences + // when calculating the display width. When false (default), these are treated + // as just a series of characters. When true, they are treated as a single + // zero-width unit. + ControlSequences8Bit bool +} + +// DefaultOptions is the default options for the display width +// calculation, which is EastAsianWidth false and ControlSequences false. +var DefaultOptions = Options{ + EastAsianWidth: false, + ControlSequences: false, + ControlSequences8Bit: false, +} diff --git a/width.go b/width.go index 0955fd3..8d1f649 100644 --- a/width.go +++ b/width.go @@ -6,34 +6,6 @@ import ( "github.com/clipperhouse/uax29/v2/graphemes" ) -// Options allows you to specify the treatment of ambiguous East Asian -// characters and ANSI escape sequences. -type Options struct { - // EastAsianWidth specifies whether to treat ambiguous East Asian characters - // as width 1 or 2. When false (default), ambiguous East Asian characters - // are treated as width 1. When true, they are width 2. - EastAsianWidth bool - - // ControlSequences specifies whether to ignore 7-bit ECMA-48 escape sequences - // when calculating the display width. When false (default), ANSI escape - // sequences are treated as just a series of characters. When true, they are - // treated as a single zero-width unit. - ControlSequences bool - // ControlSequences8Bit specifies whether to ignore 8-bit ECMA-48 escape sequences - // when calculating the display width. When false (default), these are treated - // as just a series of characters. When true, they are treated as a single - // zero-width unit. - ControlSequences8Bit bool -} - -// DefaultOptions is the default options for the display width -// calculation, which is EastAsianWidth false and ControlSequences false. -var DefaultOptions = Options{ - EastAsianWidth: false, - ControlSequences: false, - ControlSequences8Bit: false, -} - // String calculates the display width of a string, // by iterating over grapheme clusters in the string // and summing their widths. From 9b2013d6ca05f76d4235d036574a86b907fe9373 Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Mon, 16 Feb 2026 12:35:02 -0500 Subject: [PATCH 4/7] tests & fixes --- fuzz_test.go | 27 ++++++- width.go | 16 +++- width_test.go | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 240 insertions(+), 5 deletions(-) diff --git a/fuzz_test.go b/fuzz_test.go index d13775f..08a3e69 100644 --- a/fuzz_test.go +++ b/fuzz_test.go @@ -102,7 +102,10 @@ func FuzzBytesAndString(f *testing.F) { {EastAsianWidth: false}, {EastAsianWidth: true}, {ControlSequences: true}, + {ControlSequences8Bit: true}, + {ControlSequences: true, ControlSequences8Bit: true}, {EastAsianWidth: true, ControlSequences: true}, + {EastAsianWidth: true, ControlSequences8Bit: true}, } for _, option := range options { @@ -316,7 +319,10 @@ func FuzzTruncateStringAndBytes(f *testing.F) { {EastAsianWidth: false}, {EastAsianWidth: true}, {ControlSequences: true}, + {ControlSequences8Bit: true}, + {ControlSequences: true, ControlSequences8Bit: true}, {EastAsianWidth: true, ControlSequences: true}, + {EastAsianWidth: true, ControlSequences8Bit: true}, } for _, option := range options { @@ -369,6 +375,21 @@ func FuzzControlSequences(f *testing.F) { f.Add([]byte("中文")) // plain CJK f.Add([]byte("😀")) // plain emoji + // Seed with 8-bit C1 escape sequences + f.Add([]byte("\x9B31m")) // C1 CSI red + f.Add([]byte("\x9B0m")) // C1 CSI reset + f.Add([]byte("\x9B1m")) // C1 CSI bold + f.Add([]byte("\x9B31mhello\x9B0m")) // C1 CSI red text + f.Add([]byte("\x9B1m\x9B31mhi\x9B0m")) // C1 nested SGR + f.Add([]byte("hello\x9B31mworld\x9B0m")) // C1 mid-string + f.Add([]byte("\x9B31m中文\x9B0m")) // C1 colored CJK + f.Add([]byte("\x9B31m😀\x9B0m")) // C1 colored emoji + f.Add([]byte("\x9D0;Title\x9C")) // C1 OSC with C1 ST + f.Add([]byte("\x9D0;Title\x07")) // C1 OSC with BEL + f.Add([]byte("\x90qpayload\x9C")) // C1 DCS with C1 ST + f.Add([]byte("\x84")) // standalone C1 + f.Add([]byte("\x1b[31mhello\x9B0m")) // mixed 7-bit and 8-bit + // Seed with multi-lingual text file, err := testdata.Sample() if err != nil { @@ -383,7 +404,11 @@ func FuzzControlSequences(f *testing.F) { {}, {EastAsianWidth: true}, {ControlSequences: true}, + {ControlSequences8Bit: true}, + {ControlSequences: true, ControlSequences8Bit: true}, {EastAsianWidth: true, ControlSequences: true}, + {EastAsianWidth: true, ControlSequences8Bit: true}, + {EastAsianWidth: true, ControlSequences: true, ControlSequences8Bit: true}, } f.Fuzz(func(t *testing.T, text []byte) { @@ -432,7 +457,7 @@ func FuzzControlSequences(f *testing.F) { // Invariant: ControlSequences width <= default width // (escape sequences become 0 instead of their visible char widths) - if opt.ControlSequences { + if opt.ControlSequences || opt.ControlSequences8Bit { noIgnore := Options{EastAsianWidth: opt.EastAsianWidth} wDefault := noIgnore.Bytes(text) if wb > wDefault { diff --git a/width.go b/width.go index 8d1f649..f6e0ab7 100644 --- a/width.go +++ b/width.go @@ -147,11 +147,19 @@ const _Default property = 0 // graphemeWidth returns the display width of a grapheme cluster. // The passed string must be a single grapheme cluster. func graphemeWidth[T ~string | []byte](s T, options Options) int { - // Optimization: no need to look up properties - switch len(s) { - case 0: + if len(s) == 0 { return 0 - case 1: + } + + // C1 controls (0x80-0x9F) are zero-width when 8-bit control sequences + // are enabled. This must be checked before the single-byte optimization + // below, which would otherwise return width 1 for these bytes. + if options.ControlSequences8Bit && s[0] >= 0x80 && s[0] <= 0x9F { + return 0 + } + + // Optimization: single-byte graphemes need no property lookup + if len(s) == 1 { return asciiWidth(s[0]) } diff --git a/width_test.go b/width_test.go index f3d49b1..6de617e 100644 --- a/width_test.go +++ b/width_test.go @@ -106,6 +106,8 @@ func TestStringWidth(t *testing.T) { } var controlSequences = Options{ControlSequences: true} +var controlSequences8Bit = Options{ControlSequences8Bit: true} +var controlSequencesBoth = Options{ControlSequences: true, ControlSequences8Bit: true} func TestAnsiEscapeSequences(t *testing.T) { tests := []struct { @@ -168,6 +170,174 @@ func TestAnsiEscapeSequences(t *testing.T) { } } +func TestAnsiEscapeSequences8Bit(t *testing.T) { + tests := []struct { + name string + input string + options Options + expected int + }{ + // 8-bit C1 CSI sequences should be zero width + {"C1 CSI red", "\x9B31m", controlSequences8Bit, 0}, + {"C1 CSI reset", "\x9B0m", controlSequences8Bit, 0}, + {"C1 CSI bold", "\x9B1m", controlSequences8Bit, 0}, + {"C1 CSI multi-param", "\x9B1;2;3m", controlSequences8Bit, 0}, + {"C1 CSI cursor up", "\x9BA", controlSequences8Bit, 0}, + + // 8-bit C1 OSC/DCS/SOS/APC with C1 ST terminator + {"C1 OSC with ST", "\x9D0;Title\x9C", controlSequences8Bit, 0}, + {"C1 OSC with BEL", "\x9D0;Title\x07", controlSequences8Bit, 0}, + {"C1 DCS with ST", "\x90qpayload\x9C", controlSequences8Bit, 0}, + {"C1 SOS with ST", "\x98hello\x9C", controlSequences8Bit, 0}, + {"C1 APC with ST", "\x9Fdata\x9C", controlSequences8Bit, 0}, + + // Standalone C1 controls (single byte, no body) + {"C1 IND", "\x84", controlSequences8Bit, 0}, + {"C1 NEL", "\x85", controlSequences8Bit, 0}, + + // 8-bit sequences mixed with visible text + {"C1 CSI red hello", "\x9B31mhello\x9B0m", controlSequences8Bit, 5}, + {"C1 CSI colored CJK", "\x9B31m中文\x9B0m", controlSequences8Bit, 4}, + {"C1 CSI colored emoji", "\x9B31m😀\x9B0m", controlSequences8Bit, 2}, + {"C1 CSI nested", "\x9B1m\x9B31mhi\x9B0m", controlSequences8Bit, 2}, + + // Without ControlSequences8Bit, C1 bytes have width per asciiWidth (1 for >= 0x80) + {"C1 CSI default options", "\x9B31m", defaultOptions, 4}, + + // 8-bit option should not regress plain text + {"plain ASCII with 8-bit option", "hello", controlSequences8Bit, 5}, + {"CJK with 8-bit option", "中文", controlSequences8Bit, 4}, + {"emoji with 8-bit option", "😀", controlSequences8Bit, 2}, + {"empty with 8-bit option", "", controlSequences8Bit, 0}, + + // Both options enabled + {"both: 7-bit SGR", "\x1b[31mhello\x1b[0m", controlSequencesBoth, 5}, + {"both: 8-bit CSI", "\x9B31mhello\x9B0m", controlSequencesBoth, 5}, + {"both: mixed 7 and 8-bit", "\x1b[31mhello\x9B0m", controlSequencesBoth, 5}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := tt.options.String(tt.input) + if result != tt.expected { + t.Errorf("String(%q) = %d, want %d", tt.input, result, tt.expected) + } + + result = tt.options.Bytes([]byte(tt.input)) + if result != tt.expected { + t.Errorf("Bytes(%q) = %d, want %d", tt.input, result, tt.expected) + } + }) + } +} + +// TestAnsiEscapeSequencesIndependence verifies that the 7-bit and 8-bit options +// are strictly independent: enabling one must NOT cause the other's sequences +// to be treated as escape sequences. +func TestAnsiEscapeSequencesIndependence(t *testing.T) { + tests := []struct { + name string + input string + options Options + expected int + desc string + }{ + // 7-bit only: C1 bytes must NOT be treated as escape sequences. + // \x9B31m is 4 visible chars (0x9B has width 1, '3' '1' 'm' each width 1) + { + name: "7-bit on, 8-bit input C1 CSI", + input: "\x9B31m", + options: controlSequences, + expected: 4, + desc: "C1 CSI should not be recognized when only 7-bit is enabled", + }, + { + name: "7-bit on, 8-bit input standalone C1", + input: "\x84", + options: controlSequences, + expected: 1, + desc: "Standalone C1 byte should have width 1 when only 7-bit is enabled", + }, + { + name: "7-bit on, 8-bit input C1 with text", + input: "\x9B31mhello\x9B0m", + options: controlSequences, + expected: 4 + 5 + 3, + desc: "C1 CSI sequences should contribute visible width when only 7-bit is enabled", + }, + + // 8-bit only: 7-bit ESC sequences must NOT be treated as escape sequences. + // \x1b[31m is: ESC (width 0) + '[' (1) + '3' (1) + '1' (1) + 'm' (1) = 4 + { + name: "8-bit on, 7-bit input SGR", + input: "\x1b[31m", + options: controlSequences8Bit, + expected: 4, + desc: "7-bit SGR should not be recognized when only 8-bit is enabled", + }, + { + name: "8-bit on, 7-bit input SGR with text", + input: "\x1b[31mhello\x1b[0m", + options: controlSequences8Bit, + expected: 4 + 5 + 3, + desc: "7-bit SGR should contribute visible width when only 8-bit is enabled", + }, + + // Both enabled: both kinds should be zero-width + { + name: "both on, 7-bit SGR", + input: "\x1b[31m", + options: controlSequencesBoth, + expected: 0, + desc: "7-bit SGR should be zero-width when both are enabled", + }, + { + name: "both on, 8-bit CSI", + input: "\x9B31m", + options: controlSequencesBoth, + expected: 0, + desc: "C1 CSI should be zero-width when both are enabled", + }, + { + name: "both on, mixed sequences with text", + input: "\x1b[31mhello\x9B0m", + options: controlSequencesBoth, + expected: 5, + desc: "Mixed 7-bit and 8-bit sequences should both be zero-width", + }, + + // Neither enabled: both kinds contribute visible width + { + name: "neither, 7-bit SGR", + input: "\x1b[31m", + options: defaultOptions, + expected: 4, + desc: "7-bit SGR should contribute visible width when neither is enabled", + }, + { + name: "neither, 8-bit CSI", + input: "\x9B31m", + options: defaultOptions, + expected: 4, + desc: "C1 CSI should contribute visible width when neither is enabled", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := tt.options.String(tt.input) + if result != tt.expected { + t.Errorf("String(%q) = %d, want %d (%s)", tt.input, result, tt.expected, tt.desc) + } + + result = tt.options.Bytes([]byte(tt.input)) + if result != tt.expected { + t.Errorf("Bytes(%q) = %d, want %d (%s)", tt.input, result, tt.expected, tt.desc) + } + }) + } +} + func TestRuneWidth(t *testing.T) { tests := []struct { name string @@ -900,6 +1070,19 @@ func TestGraphemesControlSequences(t *testing.T) { // Default options: sum of grapheme widths must still match String/Bytes {"default ANSI wrapped", "\x1b[31mhello\x1b[0m", defaultOptions}, {"default plain", "hello", defaultOptions}, + // 8-bit ControlSequences: C1 sequences are one zero-width grapheme each + {"8-bit C1 CSI wrapped", "\x9B31mhello\x9B0m", controlSequences8Bit}, + {"8-bit C1 CSI only", "\x9B0m", controlSequences8Bit}, + {"8-bit plain text", "hi", controlSequences8Bit}, + {"8-bit C1 CSI mid", "a\x9B31mb\x9B0mc", controlSequences8Bit}, + // Both options: both 7-bit and 8-bit sequences are zero-width graphemes + {"both: mixed", "\x1b[31mhello\x9B0m", controlSequencesBoth}, + {"both: 7-bit only input", "\x1b[31mhi\x1b[0m", controlSequencesBoth}, + {"both: 8-bit only input", "\x9B31mhi\x9B0m", controlSequencesBoth}, + // Independence: 7-bit on but 8-bit input — graphemes must still sum correctly + {"7-bit on, 8-bit input", "\x9B31mhello\x9B0m", controlSequences}, + // Independence: 8-bit on but 7-bit input + {"8-bit on, 7-bit input", "\x1b[31mhello\x1b[0m", controlSequences8Bit}, } for _, tt := range tests { @@ -1055,6 +1238,25 @@ func TestTruncateString(t *testing.T) { // Multiple colors: all trailing escapes preserved {"ControlSequences multi color", "a\x1b[31mb\x1b[32mc\x1b[33md\x1b[0m", 2, "...", controlSequences, "...\x1b[31m\x1b[32m\x1b[33m\x1b[0m"}, + // 8-bit ControlSequences truncation: same behavior as 7-bit but with C1 sequences + {"8-bit plain no truncation", "hello", 5, "...", controlSequences8Bit, "hello"}, + {"8-bit C1 CSI wrapped no truncation", "\x9B31mhello\x9B0m", 8, "...", controlSequences8Bit, "\x9B31mhello\x9B0m"}, + {"8-bit C1 CSI wrapped truncate", "\x9B31mhello\x9B0m", 4, "...", controlSequences8Bit, "\x9B31mh...\x9B0m"}, + {"8-bit C1 CSI in middle truncate", "hello\x9B31mworld", 5, "...", controlSequences8Bit, "he...\x9B31m"}, + {"8-bit C1 CSI CJK truncate", "\x9B31m中文\x9B0m", 2, "...", controlSequences8Bit, "...\x9B31m\x9B0m"}, + {"8-bit C1 CSI no trailing escape", "\x9B31mhello", 4, "...", controlSequences8Bit, "\x9B31mh..."}, + {"8-bit C1 stacked SGR", "\x9B31m\x9B42mhello\x9B0m", 4, "...", controlSequences8Bit, "\x9B31m\x9B42mh...\x9B0m"}, + + // 7-bit only must NOT preserve trailing C1 sequences. + // With 7-bit only, \x9B is a regular character (width 1), so the input + // "hello\x9B0m" has visible width 8. Trailing \x9B0m is not preserved. + {"7-bit only ignores trailing C1", "hello\x9B0m", 5, "...", controlSequences, "he..."}, + + // Both enabled: preserves both 7-bit and 8-bit trailing escapes + {"both: mixed trailing escapes", "\x1b[31mhello\x9B0m", 4, "...", controlSequencesBoth, "\x1b[31mh...\x9B0m"}, + {"both: 7-bit wrapped truncate", "\x1b[31mhello\x1b[0m", 4, "...", controlSequencesBoth, "\x1b[31mh...\x1b[0m"}, + {"both: 8-bit wrapped truncate", "\x9B31mhello\x9B0m", 4, "...", controlSequencesBoth, "\x9B31mh...\x9B0m"}, + // East Asian Width option {"ambiguous EAW fits", "★", 2, "...", eawOptions, "★"}, {"ambiguous EAW truncate", "★", 1, "...", eawOptions, "..."}, From f3581b46c5d39317b9e6aed6c20fafd46152a108 Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Mon, 16 Feb 2026 13:53:27 -0500 Subject: [PATCH 5/7] Remove 8-bit from Truncate, explained in comments Confusing at best, incorrect / dangerous at worst --- fuzz_test.go | 61 ++++++----------------------------- truncate.go | 71 +++++++++++++++++++++------------------- width_test.go | 89 +++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 127 insertions(+), 94 deletions(-) diff --git a/fuzz_test.go b/fuzz_test.go index 08a3e69..34bcda9 100644 --- a/fuzz_test.go +++ b/fuzz_test.go @@ -285,38 +285,10 @@ func FuzzTruncateStringAndBytes(f *testing.F) { f.Add("\xff\xfe\xfd") // invalid UTF-8 f.Fuzz(func(t *testing.T, text string) { - // Test with default options - ts := TruncateString(text, 10, "...") - - // Invariant: truncated string should be less than or equal to maxWidth - if String(ts) > 10 { - t.Errorf("TruncateString() returned string longer than maxWidth for %q: %q", text, ts) - } - - // Invariant: truncated string should be less than or equal to maxWidth - if len(ts) > len(text) { - t.Errorf("TruncateString() returned string longer than original for %q: %q", text, ts) - } - - tb := TruncateBytes([]byte(text), 10, []byte("...")) - - // Invariant: truncated bytes should be less than or equal to maxWidth - if Bytes(tb) > 10 { - t.Errorf("TruncateBytes() returned bytes longer than maxWidth for %q: %q", text, tb) - } - - // Invariant: truncated bytes should be less than or equal to original - if len(tb) > len(text) { - t.Errorf("TruncateBytes() returned bytes longer than original for %q: %q", text, tb) - } - - if !bytes.Equal(tb, []byte(ts)) { - t.Errorf("TruncateBytes() returned bytes different from TruncateString() for %q: %q != %q", text, tb, ts) - } - - // Test with different options + // Exercise truncation to discover panics and infinite loops. + // Width invariant testing is in proper unit tests. options := []Options{ - {EastAsianWidth: false}, + {}, {EastAsianWidth: true}, {ControlSequences: true}, {ControlSequences8Bit: true}, @@ -327,15 +299,11 @@ func FuzzTruncateStringAndBytes(f *testing.F) { for _, option := range options { ts := option.TruncateString(text, 10, "...") - - // Invariant: truncated string should be less than or equal to maxWidth - if option.String(ts) > 10 { - t.Errorf("TruncateString() returned string longer than maxWidth for %q: %q", text, ts) - } - tb := option.TruncateBytes([]byte(text), 10, []byte("...")) + + // Invariant: String and Bytes paths must agree if !bytes.Equal(tb, []byte(ts)) { - t.Errorf("TruncateBytes() returned bytes different from TruncateString() for %q: %q != %q", text, tb, ts) + t.Errorf("TruncateBytes() != TruncateString() with %+v for %q: %q != %q", option, text, tb, ts) } } }) @@ -465,23 +433,14 @@ func FuzzControlSequences(f *testing.F) { } } - // Invariant: truncation respects maxWidth (accounting for the tail, - // which is always appended and may itself exceed maxWidth) + // Exercise truncation to discover panics and infinite loops. + // Width invariant testing is in proper unit tests. tail := "..." - tailWidth := opt.String(tail) for _, maxWidth := range []int{0, 1, 3, 5, 10, 20} { ts := opt.TruncateString(string(text), maxWidth, tail) - tsWidth := opt.String(ts) - limit := maxWidth - if tailWidth > limit { - limit = tailWidth - } - if tsWidth > limit { - t.Errorf("TruncateString() width %d > max(maxWidth, tailWidth) %d with %+v for %q -> %q", - tsWidth, limit, opt, text, ts) - } - tb := opt.TruncateBytes(text, maxWidth, []byte(tail)) + + // Invariant: String and Bytes paths must agree if !bytes.Equal(tb, []byte(ts)) { t.Errorf("TruncateBytes() != TruncateString() with %+v for %q: %q != %q", opt, text, tb, ts) diff --git a/truncate.go b/truncate.go index 6360c20..b3e696f 100644 --- a/truncate.go +++ b/truncate.go @@ -12,17 +12,25 @@ import ( // It ensures the visible width, including the width of the tail, is less than or // equal to maxWidth. // -// When [Options.ControlSequences] is true, ANSI escape sequences that appear -// after the truncation point are preserved in the output. This ensures that -// escape sequences such as SGR resets are not lost, preventing color bleed -// in terminal output. +// When [Options.ControlSequences] is true, 7-bit ANSI escape sequences that +// appear after the truncation point are preserved in the output. This ensures +// that escape sequences such as SGR resets are not lost, preventing color +// bleed in terminal output. +// +// [Options.ControlSequences8Bit] is ignored by truncation. 8-bit C1 byte values +// (0x80-0x9F) overlap with UTF-8 multi-byte encoding, so manipulating them +// during truncation can shift byte boundaries and form unintended visible +// characters. Use [Options.String] or [Options.Bytes] for 8-bit-aware width +// measurement. func (options Options) TruncateString(s string, maxWidth int, tail string) string { + // We deliberately ignore ControlSequences8Bit for truncation, see above. + options.ControlSequences8Bit = false + maxWidthWithoutTail := maxWidth - options.String(tail) var pos, total int g := graphemes.FromString(s) g.AnsiEscapeSequences = options.ControlSequences - g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit for g.Next() { gw := graphemeWidth(g.Value(), options) @@ -31,8 +39,8 @@ func (options Options) TruncateString(s string, maxWidth int, tail string) strin } total += gw if total > maxWidth { - if options.ControlSequences || options.ControlSequences8Bit { - // Build result with trailing ANSI escape sequences preserved + if options.ControlSequences { + // Build result with trailing 7-bit ANSI escape sequences preserved var b strings.Builder b.Grow(len(s) + len(tail)) // at most original + tail b.WriteString(s[:pos]) @@ -40,14 +48,13 @@ func (options Options) TruncateString(s string, maxWidth int, tail string) strin rem := graphemes.FromString(s[pos:]) rem.AnsiEscapeSequences = options.ControlSequences - rem.AnsiEscapeSequences8Bit = options.ControlSequences8Bit for rem.Next() { v := rem.Value() - // Only preserve escapes that measure as zero-width - // on their own; some sequences (e.g. SOS) are only - // valid in their original context. - if len(v) > 0 && isEscapeLeader(v[0], options) && options.String(v) == 0 { + // Only preserve 7-bit escapes (ESC = 0x1B) that measure + // as zero-width on their own; some sequences (e.g. SOS) + // are only valid in their original context. + if len(v) > 0 && v[0] == 0x1B && options.String(v) == 0 { b.WriteString(v) } } @@ -75,17 +82,25 @@ func TruncateString(s string, maxWidth int, tail string) string { // It ensures the visible width, including the width of the tail, is less than or // equal to maxWidth. // -// When [Options.ControlSequences] is true, ANSI escape sequences that appear -// after the truncation point are preserved in the output. This ensures that -// escape sequences such as SGR resets are not lost, preventing color bleed -// in terminal output. +// When [Options.ControlSequences] is true, 7-bit ANSI escape sequences that +// appear after the truncation point are preserved in the output. This ensures +// that escape sequences such as SGR resets are not lost, preventing color +// bleed in terminal output. +// +// [Options.ControlSequences8Bit] is ignored by truncation. 8-bit C1 byte values +// (0x80-0x9F) overlap with UTF-8 multi-byte encoding, so manipulating them +// during truncation can shift byte boundaries and form unintended visible +// characters. Use [Options.String] or [Options.Bytes] for 8-bit-aware width +// measurement. func (options Options) TruncateBytes(s []byte, maxWidth int, tail []byte) []byte { + // We deliberately ignore ControlSequences8Bit for truncation, see above. + options.ControlSequences8Bit = false + maxWidthWithoutTail := maxWidth - options.Bytes(tail) var pos, total int g := graphemes.FromBytes(s) g.AnsiEscapeSequences = options.ControlSequences - g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit for g.Next() { gw := graphemeWidth(g.Value(), options) @@ -94,22 +109,21 @@ func (options Options) TruncateBytes(s []byte, maxWidth int, tail []byte) []byte } total += gw if total > maxWidth { - if options.ControlSequences || options.ControlSequences8Bit { - // Build result with trailing ANSI escape sequences preserved + if options.ControlSequences { + // Build result with trailing 7-bit ANSI escape sequences preserved result := make([]byte, 0, len(s)+len(tail)) // at most original + tail result = append(result, s[:pos]...) result = append(result, tail...) rem := graphemes.FromBytes(s[pos:]) rem.AnsiEscapeSequences = options.ControlSequences - rem.AnsiEscapeSequences8Bit = options.ControlSequences8Bit for rem.Next() { v := rem.Value() - // Only preserve escapes that measure as zero-width - // on their own; some sequences (e.g. SOS) are only - // valid in their original context. - if len(v) > 0 && isEscapeLeader(v[0], options) && options.Bytes(v) == 0 { + // Only preserve 7-bit escapes (ESC = 0x1B) that measure + // as zero-width on their own; some sequences (e.g. SOS) + // are only valid in their original context. + if len(v) > 0 && v[0] == 0x1B && options.Bytes(v) == 0 { result = append(result, v...) } } @@ -133,12 +147,3 @@ func (options Options) TruncateBytes(s []byte, maxWidth int, tail []byte) []byte func TruncateBytes(s []byte, maxWidth int, tail []byte) []byte { return DefaultOptions.TruncateBytes(s, maxWidth, tail) } - -// isEscapeLeader reports whether the byte is the leading byte of an -// escape sequence that is active for the given options: 7-bit ESC (0x1B) -// when ControlSequences is true, or 8-bit C1 (0x80-0x9F) when -// ControlSequences8Bit is true. -func isEscapeLeader(b byte, options Options) bool { - return (options.ControlSequences && b == 0x1B) || - (options.ControlSequences8Bit && b >= 0x80 && b <= 0x9F) -} diff --git a/width_test.go b/width_test.go index 6de617e..bcf5a9d 100644 --- a/width_test.go +++ b/width_test.go @@ -1238,24 +1238,29 @@ func TestTruncateString(t *testing.T) { // Multiple colors: all trailing escapes preserved {"ControlSequences multi color", "a\x1b[31mb\x1b[32mc\x1b[33md\x1b[0m", 2, "...", controlSequences, "...\x1b[31m\x1b[32m\x1b[33m\x1b[0m"}, - // 8-bit ControlSequences truncation: same behavior as 7-bit but with C1 sequences + // 8-bit ControlSequences8Bit is ignored by truncation entirely. The + // grapheme parser is not told about 8-bit, so C1 sequence parameters + // (e.g. "31m" after \x9B) are treated as visible characters. This is + // intentional: 8-bit C1 bytes (0x80-0x9F) overlap with UTF-8 multi-byte + // encoding, making them unsafe to manipulate during truncation. {"8-bit plain no truncation", "hello", 5, "...", controlSequences8Bit, "hello"}, - {"8-bit C1 CSI wrapped no truncation", "\x9B31mhello\x9B0m", 8, "...", controlSequences8Bit, "\x9B31mhello\x9B0m"}, - {"8-bit C1 CSI wrapped truncate", "\x9B31mhello\x9B0m", 4, "...", controlSequences8Bit, "\x9B31mh...\x9B0m"}, - {"8-bit C1 CSI in middle truncate", "hello\x9B31mworld", 5, "...", controlSequences8Bit, "he...\x9B31m"}, - {"8-bit C1 CSI CJK truncate", "\x9B31m中文\x9B0m", 2, "...", controlSequences8Bit, "...\x9B31m\x9B0m"}, - {"8-bit C1 CSI no trailing escape", "\x9B31mhello", 4, "...", controlSequences8Bit, "\x9B31mh..."}, - {"8-bit C1 stacked SGR", "\x9B31m\x9B42mhello\x9B0m", 4, "...", controlSequences8Bit, "\x9B31m\x9B42mh...\x9B0m"}, + {"8-bit C1 CSI wrapped truncate", "\x9B31mhello\x9B0m", 8, "...", controlSequences8Bit, "\x9B31mh..."}, + {"8-bit C1 CSI wrapped truncate narrow", "\x9B31mhello\x9B0m", 4, "...", controlSequences8Bit, "\x9B..."}, + {"8-bit C1 CSI in middle truncate", "hello\x9B31mworld", 5, "...", controlSequences8Bit, "he..."}, + {"8-bit C1 CSI CJK truncate", "\x9B31m中文\x9B0m", 2, "...", controlSequences8Bit, "..."}, + {"8-bit C1 CSI no trailing escape", "\x9B31mhello", 4, "...", controlSequences8Bit, "\x9B..."}, + {"8-bit C1 stacked SGR", "\x9B31m\x9B42mhello\x9B0m", 4, "...", controlSequences8Bit, "\x9B..."}, // 7-bit only must NOT preserve trailing C1 sequences. // With 7-bit only, \x9B is a regular character (width 1), so the input // "hello\x9B0m" has visible width 8. Trailing \x9B0m is not preserved. {"7-bit only ignores trailing C1", "hello\x9B0m", 5, "...", controlSequences, "he..."}, - // Both enabled: preserves both 7-bit and 8-bit trailing escapes - {"both: mixed trailing escapes", "\x1b[31mhello\x9B0m", 4, "...", controlSequencesBoth, "\x1b[31mh...\x9B0m"}, + // Both enabled: only 7-bit trailing escapes are preserved; 8-bit is + // ignored by truncation, so C1 parameters are visible characters. + {"both: mixed trailing escapes", "\x1b[31mhello\x9B0m", 4, "...", controlSequencesBoth, "\x1b[31mh..."}, {"both: 7-bit wrapped truncate", "\x1b[31mhello\x1b[0m", 4, "...", controlSequencesBoth, "\x1b[31mh...\x1b[0m"}, - {"both: 8-bit wrapped truncate", "\x9B31mhello\x9B0m", 4, "...", controlSequencesBoth, "\x9B31mh...\x9B0m"}, + {"both: 8-bit wrapped truncate", "\x9B31mhello\x9B0m", 4, "...", controlSequencesBoth, "\x9B..."}, // East Asian Width option {"ambiguous EAW fits", "★", 2, "...", eawOptions, "★"}, @@ -1837,3 +1842,67 @@ func TestReproduceFuzzTruncate(t *testing.T) { } } } + +func TestTruncateIgnores8Bit(t *testing.T) { + // Truncation ignores ControlSequences8Bit entirely (see GoDoc). + // This means the truncation result, when measured with 8-bit-aware + // String(), may exceed maxWidth. This is the documented tradeoff: + // 8-bit C1 bytes (0x80-0x9F) overlap with UTF-8 multi-byte encoding, + // so manipulating them during truncation is unsafe. + // + // These tests verify that truncation is self-consistent: the result + // measured WITHOUT 8-bit should respect maxWidth. + + cases := []struct { + name string + text string + }{ + { + // Byte recombination: the grapheme parser with 8-bit groups + // \x9f\xcf as one escape (APC + payload). Without 8-bit, \xcf + // and \x90 can recombine into U+03D0 (ϐ, width 1). + name: "byte recombination", + text: "000000000000000000000\x9f\xcf\x1a\x90", + }, + { + // SOS terminator mismatch: with 8-bit, \x9c is ST (terminates + // the 7-bit SOS started by \x1bX). Without 8-bit, \x9c is not + // recognized as ST, so SOS consumes more of the string. + name: "SOS terminator mismatch", + text: "00\x98\x1bX\x9c0000000000\x18", + }, + } + + options := []Options{ + {ControlSequences8Bit: true}, + {ControlSequences: true, ControlSequences8Bit: true}, + {EastAsianWidth: true, ControlSequences8Bit: true}, + } + + for _, tc := range cases { + for _, opt := range options { + // Truncation ignores 8-bit, so measure with the same view + measureOpt := opt + measureOpt.ControlSequences8Bit = false + + ts := opt.TruncateString(tc.text, 10, "...") + w := measureOpt.String(ts) + if w > 10 { + t.Errorf("%s: TruncateString() width %d > 10 (measured without 8-bit) for %q with opts %+v: %q", + tc.name, w, tc.text, opt, ts) + } + + tb := opt.TruncateBytes([]byte(tc.text), 10, []byte("...")) + bw := measureOpt.Bytes(tb) + if bw > 10 { + t.Errorf("%s: TruncateBytes() width %d > 10 (measured without 8-bit) for %q with opts %+v: %q", + tc.name, bw, tc.text, opt, tb) + } + + if !bytes.Equal(tb, []byte(ts)) { + t.Errorf("%s: TruncateBytes() != TruncateString() for %q with opts %+v: %q != %q", + tc.name, tc.text, opt, tb, ts) + } + } + } +} From e3356fbf198075618519f9d9a62506323ae14c06 Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Mon, 16 Feb 2026 19:51:47 -0500 Subject: [PATCH 6/7] comment --- options.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/options.go b/options.go index 7ca7551..b63b585 100644 --- a/options.go +++ b/options.go @@ -21,7 +21,8 @@ type Options struct { } // DefaultOptions is the default options for the display width -// calculation, which is EastAsianWidth false and ControlSequences false. +// calculation, which is EastAsianWidth false, ControlSequences false, and +// ControlSequences8Bit false. var DefaultOptions = Options{ EastAsianWidth: false, ControlSequences: false, From 3672014dedbb26368645285071ce88dd9bc16013 Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Mon, 16 Feb 2026 22:02:36 -0500 Subject: [PATCH 7/7] README and AGENTS --- AGENTS.md | 12 ++++++++++++ README.md | 10 ++++++++++ 2 files changed, 22 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index ce8d7c0..0a01467 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -28,6 +28,18 @@ Retrieve and consider the comments on the PR, which may have come from GitHub Co Offer to optionally post a brief summary of the review to the PR, via the gh CLI tool. +## Tagged Go releases + +If I ask you whether we are ready to release, this means a tagged Go release on the main branch. Go releases are git tagged with a version number. + +Review the changes since the last release, i.e. the previous git tag. Ensure that the changes are complete and correct. Identify new features, bug fixes, and performance improvements. + +Identify breaking changes, especially API changes. + +Ensure good test coverage. Look for performance changes, especially performance regressions, by running benchmarks against the previous release. + +Ensure that the documentation in READMEs and GoDocs, complete, correct and consistent. + ## Comparisons to go-runewidth We originally attempted to make this package compatible with go-runewidth. diff --git a/README.md b/README.md index 17a3981..ca19e6b 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,16 @@ when calculating the display width. When `false` (default), ANSI escape sequences are treated as just a series of characters. When `true`, they are treated as a single zero-width unit. +#### ControlSequences8Bit + +`ControlSequences8Bit` specifies whether to ignore 8-bit ECMA-48 escape sequences +when calculating the display width. When `false` (default), these are treated +as just a series of characters. When `true`, they are treated as a single +zero-width unit. + +Note: this option is ignored by the `Truncate` methods, as the concatenation +can lead to unintended UTF-8 semantics. + #### EastAsianWidth `EastAsianWidth` defines how