Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@ Retrieve and consider the comments on the PR, which may have come from GitHub Co

Offer to optionally post a brief summary of the review to the PR, via the gh CLI tool.

## Tagged Go releases

If I ask you whether we are ready to release, this means a tagged Go release on the main branch. Go releases are git tagged with a version number.

Review the changes since the last release, i.e. the previous git tag. Ensure that the changes are complete and correct. Identify new features, bug fixes, and performance improvements.

Identify breaking changes, especially API changes.

Ensure good test coverage. Look for performance changes, especially performance regressions, by running benchmarks against the previous release.

Ensure that the documentation in READMEs and GoDocs, complete, correct and consistent.

## Comparisons to go-runewidth

We originally attempted to make this package compatible with go-runewidth.
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,16 @@ when calculating the display width. When `false` (default), ANSI escape
sequences are treated as just a series of characters. When `true`, they are
treated as a single zero-width unit.

#### ControlSequences8Bit

`ControlSequences8Bit` specifies whether to ignore 8-bit ECMA-48 escape sequences
when calculating the display width. When `false` (default), these are treated
as just a series of characters. When `true`, they are treated as a single
zero-width unit.

Note: this option is ignored by the `Truncate` methods, as the concatenation
can lead to unintended UTF-8 semantics.

#### EastAsianWidth

`EastAsianWidth` defines how
Expand Down
88 changes: 36 additions & 52 deletions fuzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,10 @@ func FuzzBytesAndString(f *testing.F) {
{EastAsianWidth: false},
{EastAsianWidth: true},
{ControlSequences: true},
{ControlSequences8Bit: true},
{ControlSequences: true, ControlSequences8Bit: true},
{EastAsianWidth: true, ControlSequences: true},
{EastAsianWidth: true, ControlSequences8Bit: true},
}

for _, option := range options {
Expand Down Expand Up @@ -282,54 +285,25 @@ func FuzzTruncateStringAndBytes(f *testing.F) {
f.Add("\xff\xfe\xfd") // invalid UTF-8

f.Fuzz(func(t *testing.T, text string) {
// Test with default options
ts := TruncateString(text, 10, "...")

// Invariant: truncated string should be less than or equal to maxWidth
if String(ts) > 10 {
t.Errorf("TruncateString() returned string longer than maxWidth for %q: %q", text, ts)
}

// Invariant: truncated string should be less than or equal to maxWidth
if len(ts) > len(text) {
t.Errorf("TruncateString() returned string longer than original for %q: %q", text, ts)
}

tb := TruncateBytes([]byte(text), 10, []byte("..."))

// Invariant: truncated bytes should be less than or equal to maxWidth
if Bytes(tb) > 10 {
t.Errorf("TruncateBytes() returned bytes longer than maxWidth for %q: %q", text, tb)
}

// Invariant: truncated bytes should be less than or equal to original
if len(tb) > len(text) {
t.Errorf("TruncateBytes() returned bytes longer than original for %q: %q", text, tb)
}

if !bytes.Equal(tb, []byte(ts)) {
t.Errorf("TruncateBytes() returned bytes different from TruncateString() for %q: %q != %q", text, tb, ts)
}

// Test with different options
// Exercise truncation to discover panics and infinite loops.
// Width invariant testing is in proper unit tests.
options := []Options{
{EastAsianWidth: false},
{},
{EastAsianWidth: true},
{ControlSequences: true},
{ControlSequences8Bit: true},
{ControlSequences: true, ControlSequences8Bit: true},
{EastAsianWidth: true, ControlSequences: true},
{EastAsianWidth: true, ControlSequences8Bit: true},
}

for _, option := range options {
ts := option.TruncateString(text, 10, "...")

// Invariant: truncated string should be less than or equal to maxWidth
if option.String(ts) > 10 {
t.Errorf("TruncateString() returned string longer than maxWidth for %q: %q", text, ts)
}

tb := option.TruncateBytes([]byte(text), 10, []byte("..."))

// Invariant: String and Bytes paths must agree
if !bytes.Equal(tb, []byte(ts)) {
t.Errorf("TruncateBytes() returned bytes different from TruncateString() for %q: %q != %q", text, tb, ts)
t.Errorf("TruncateBytes() != TruncateString() with %+v for %q: %q != %q", option, text, tb, ts)
}
}
})
Expand Down Expand Up @@ -369,6 +343,21 @@ func FuzzControlSequences(f *testing.F) {
f.Add([]byte("中文")) // plain CJK
f.Add([]byte("😀")) // plain emoji

// Seed with 8-bit C1 escape sequences
f.Add([]byte("\x9B31m")) // C1 CSI red
f.Add([]byte("\x9B0m")) // C1 CSI reset
f.Add([]byte("\x9B1m")) // C1 CSI bold
f.Add([]byte("\x9B31mhello\x9B0m")) // C1 CSI red text
f.Add([]byte("\x9B1m\x9B31mhi\x9B0m")) // C1 nested SGR
f.Add([]byte("hello\x9B31mworld\x9B0m")) // C1 mid-string
f.Add([]byte("\x9B31m中文\x9B0m")) // C1 colored CJK
f.Add([]byte("\x9B31m😀\x9B0m")) // C1 colored emoji
f.Add([]byte("\x9D0;Title\x9C")) // C1 OSC with C1 ST
f.Add([]byte("\x9D0;Title\x07")) // C1 OSC with BEL
f.Add([]byte("\x90qpayload\x9C")) // C1 DCS with C1 ST
f.Add([]byte("\x84")) // standalone C1
f.Add([]byte("\x1b[31mhello\x9B0m")) // mixed 7-bit and 8-bit

// Seed with multi-lingual text
file, err := testdata.Sample()
if err != nil {
Expand All @@ -383,7 +372,11 @@ func FuzzControlSequences(f *testing.F) {
{},
{EastAsianWidth: true},
{ControlSequences: true},
{ControlSequences8Bit: true},
{ControlSequences: true, ControlSequences8Bit: true},
{EastAsianWidth: true, ControlSequences: true},
{EastAsianWidth: true, ControlSequences8Bit: true},
{EastAsianWidth: true, ControlSequences: true, ControlSequences8Bit: true},
}

f.Fuzz(func(t *testing.T, text []byte) {
Expand Down Expand Up @@ -432,31 +425,22 @@ func FuzzControlSequences(f *testing.F) {

// Invariant: ControlSequences width <= default width
// (escape sequences become 0 instead of their visible char widths)
if opt.ControlSequences {
if opt.ControlSequences || opt.ControlSequences8Bit {
noIgnore := Options{EastAsianWidth: opt.EastAsianWidth}
wDefault := noIgnore.Bytes(text)
if wb > wDefault {
t.Errorf("ControlSequences width %d > default width %d with %+v for %q", wb, wDefault, opt, text)
}
}

// Invariant: truncation respects maxWidth (accounting for the tail,
// which is always appended and may itself exceed maxWidth)
// Exercise truncation to discover panics and infinite loops.
// Width invariant testing is in proper unit tests.
tail := "..."
tailWidth := opt.String(tail)
for _, maxWidth := range []int{0, 1, 3, 5, 10, 20} {
ts := opt.TruncateString(string(text), maxWidth, tail)
tsWidth := opt.String(ts)
limit := maxWidth
if tailWidth > limit {
limit = tailWidth
}
if tsWidth > limit {
t.Errorf("TruncateString() width %d > max(maxWidth, tailWidth) %d with %+v for %q -> %q",
tsWidth, limit, opt, text, ts)
}

tb := opt.TruncateBytes(text, maxWidth, []byte(tail))

// Invariant: String and Bytes paths must agree
if !bytes.Equal(tb, []byte(ts)) {
t.Errorf("TruncateBytes() != TruncateString() with %+v for %q: %q != %q",
opt, text, tb, ts)
Expand Down
2 changes: 2 additions & 0 deletions graphemes.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ func StringGraphemes(s string) Graphemes[string] {
func (options Options) StringGraphemes(s string) Graphemes[string] {
g := graphemes.FromString(s)
g.AnsiEscapeSequences = options.ControlSequences
g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit

return Graphemes[string]{iter: g, options: options}
}
Expand All @@ -66,6 +67,7 @@ func BytesGraphemes(s []byte) Graphemes[[]byte] {
func (options Options) BytesGraphemes(s []byte) Graphemes[[]byte] {
g := graphemes.FromBytes(s)
g.AnsiEscapeSequences = options.ControlSequences
g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit

return Graphemes[[]byte]{iter: g, options: options}
}
30 changes: 30 additions & 0 deletions options.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package displaywidth

// Options allows you to specify the treatment of ambiguous East Asian
// characters and ANSI escape sequences.
type Options struct {
// EastAsianWidth specifies whether to treat ambiguous East Asian characters
// as width 1 or 2. When false (default), ambiguous East Asian characters
// are treated as width 1. When true, they are width 2.
EastAsianWidth bool

// ControlSequences specifies whether to ignore 7-bit ECMA-48 escape sequences
// when calculating the display width. When false (default), ANSI escape
// sequences are treated as just a series of characters. When true, they are
// treated as a single zero-width unit.
ControlSequences bool
// ControlSequences8Bit specifies whether to ignore 8-bit ECMA-48 escape sequences
// when calculating the display width. When false (default), these are treated
// as just a series of characters. When true, they are treated as a single
// zero-width unit.
ControlSequences8Bit bool
}

// DefaultOptions is the default options for the display width
// calculation, which is EastAsianWidth false, ControlSequences false, and
// ControlSequences8Bit false.
var DefaultOptions = Options{
EastAsianWidth: false,
ControlSequences: false,
ControlSequences8Bit: false,
}
149 changes: 149 additions & 0 deletions truncate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
package displaywidth

import (
"strings"

"github.com/clipperhouse/uax29/v2/graphemes"
)

// TruncateString truncates a string to the given maxWidth, and appends the
// given tail if the string is truncated.
//
// It ensures the visible width, including the width of the tail, is less than or
// equal to maxWidth.
//
// When [Options.ControlSequences] is true, 7-bit ANSI escape sequences that
// appear after the truncation point are preserved in the output. This ensures
// that escape sequences such as SGR resets are not lost, preventing color
// bleed in terminal output.
//
// [Options.ControlSequences8Bit] is ignored by truncation. 8-bit C1 byte values
// (0x80-0x9F) overlap with UTF-8 multi-byte encoding, so manipulating them
// during truncation can shift byte boundaries and form unintended visible
// characters. Use [Options.String] or [Options.Bytes] for 8-bit-aware width
// measurement.
func (options Options) TruncateString(s string, maxWidth int, tail string) string {
// We deliberately ignore ControlSequences8Bit for truncation, see above.
options.ControlSequences8Bit = false

maxWidthWithoutTail := maxWidth - options.String(tail)

var pos, total int
g := graphemes.FromString(s)
g.AnsiEscapeSequences = options.ControlSequences

for g.Next() {
gw := graphemeWidth(g.Value(), options)
if total+gw <= maxWidthWithoutTail {
pos = g.End()
}
total += gw
if total > maxWidth {
if options.ControlSequences {
// Build result with trailing 7-bit ANSI escape sequences preserved
var b strings.Builder
b.Grow(len(s) + len(tail)) // at most original + tail
b.WriteString(s[:pos])
b.WriteString(tail)

rem := graphemes.FromString(s[pos:])
rem.AnsiEscapeSequences = options.ControlSequences

for rem.Next() {
v := rem.Value()
// Only preserve 7-bit escapes (ESC = 0x1B) that measure
// as zero-width on their own; some sequences (e.g. SOS)
// are only valid in their original context.
if len(v) > 0 && v[0] == 0x1B && options.String(v) == 0 {
b.WriteString(v)
}
}
return b.String()
}
return s[:pos] + tail
}
}
// No truncation
return s
}

// TruncateString truncates a string to the given maxWidth, and appends the
// given tail if the string is truncated.
//
// It ensures the total width, including the width of the tail, is less than or
// equal to maxWidth.
func TruncateString(s string, maxWidth int, tail string) string {
return DefaultOptions.TruncateString(s, maxWidth, tail)
}

// TruncateBytes truncates a []byte to the given maxWidth, and appends the
// given tail if the []byte is truncated.
//
// It ensures the visible width, including the width of the tail, is less than or
// equal to maxWidth.
//
// When [Options.ControlSequences] is true, 7-bit ANSI escape sequences that
// appear after the truncation point are preserved in the output. This ensures
// that escape sequences such as SGR resets are not lost, preventing color
// bleed in terminal output.
//
// [Options.ControlSequences8Bit] is ignored by truncation. 8-bit C1 byte values
// (0x80-0x9F) overlap with UTF-8 multi-byte encoding, so manipulating them
// during truncation can shift byte boundaries and form unintended visible
// characters. Use [Options.String] or [Options.Bytes] for 8-bit-aware width
// measurement.
func (options Options) TruncateBytes(s []byte, maxWidth int, tail []byte) []byte {
// We deliberately ignore ControlSequences8Bit for truncation, see above.
options.ControlSequences8Bit = false

maxWidthWithoutTail := maxWidth - options.Bytes(tail)

var pos, total int
g := graphemes.FromBytes(s)
g.AnsiEscapeSequences = options.ControlSequences

for g.Next() {
gw := graphemeWidth(g.Value(), options)
if total+gw <= maxWidthWithoutTail {
pos = g.End()
}
total += gw
if total > maxWidth {
if options.ControlSequences {
// Build result with trailing 7-bit ANSI escape sequences preserved
result := make([]byte, 0, len(s)+len(tail)) // at most original + tail
result = append(result, s[:pos]...)
result = append(result, tail...)

rem := graphemes.FromBytes(s[pos:])
rem.AnsiEscapeSequences = options.ControlSequences

for rem.Next() {
v := rem.Value()
// Only preserve 7-bit escapes (ESC = 0x1B) that measure
// as zero-width on their own; some sequences (e.g. SOS)
// are only valid in their original context.
if len(v) > 0 && v[0] == 0x1B && options.Bytes(v) == 0 {
result = append(result, v...)
}
}
return result
}
result := make([]byte, 0, pos+len(tail))
result = append(result, s[:pos]...)
result = append(result, tail...)
return result
}
}
// No truncation
return s
}

// TruncateBytes truncates a []byte to the given maxWidth, and appends the
// given tail if the []byte is truncated.
//
// It ensures the total width, including the width of the tail, is less than or
// equal to maxWidth.
func TruncateBytes(s []byte, maxWidth int, tail []byte) []byte {
return DefaultOptions.TruncateBytes(s, maxWidth, tail)
}
Loading