From 1ace54fe687bfaaa5e32b02abb4eb664569c7396 Mon Sep 17 00:00:00 2001 From: Sindre Sorhus Date: Tue, 23 Dec 2025 14:31:36 +0100 Subject: [PATCH 1/2] Fix width calculation for minimally-qualified emoji sequences Fixes #67 --- index.js | 29 +++++++++++++++++++++++++++-- test.js | 20 ++++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/index.js b/index.js index aad1b05..59c2ab1 100644 --- a/index.js +++ b/index.js @@ -7,7 +7,8 @@ Logic: - Width rules: 1. Skip non-printing clusters (Default_Ignorable, Control, pure Mark, lone Surrogates). Tabs are ignored by design. 2. RGI emoji clusters (\p{RGI_Emoji}) are double-width. - 3. Otherwise use East Asian Width of the cluster’s first visible code point, and add widths for trailing Halfwidth/Fullwidth Forms within the same cluster (e.g., dakuten/handakuten/prolonged sound mark). + 3. Minimally-qualified/unqualified emoji clusters (ZWJ sequences with 2+ Extended_Pictographic, or keycap sequences) are double-width. + 4. Otherwise use East Asian Width of the cluster's first visible code point, and add widths for trailing Halfwidth/Fullwidth Forms within the same cluster (e.g., dakuten/handakuten/prolonged sound mark). */ const segmenter = new Intl.Segmenter(); @@ -21,6 +22,30 @@ const leadingNonPrintingRegex = /^[\p{Default_Ignorable_Code_Point}\p{Control}\p // RGI emoji sequences const rgiEmojiRegex = /^\p{RGI_Emoji}$/v; +// Detect minimally-qualified/unqualified emoji clusters (missing VS16) +// - ZWJ sequences with 2+ Extended_Pictographic (e.g., ❀‍πŸ”₯, πŸ³β€πŸŒˆ, ⛹‍♂) +// - Keycap sequences (e.g., #⃣, 0⃣) +const zwjRegex = /\u200D/; +const validKeycapRegex = /^[\d#*].*\u20E3/; +const extendedPictographicRegex = /\p{Extended_Pictographic}/gu; + +function isDoubleWidthEmojiCluster(segment) { + // Keycap sequences with valid base (0-9, #, *) + if (validKeycapRegex.test(segment)) { + return true; + } + + // ZWJ sequences with 2+ Extended_Pictographic + if (zwjRegex.test(segment)) { + const matches = segment.match(extendedPictographicRegex); + if (matches && matches.length >= 2) { + return true; + } + } + + return false; +} + function baseVisible(segment) { return segment.replace(leadingNonPrintingRegex, ''); } @@ -72,7 +97,7 @@ export default function stringWidth(input, options = {}) { } // Emoji width logic - if (rgiEmojiRegex.test(segment)) { + if (rgiEmojiRegex.test(segment) || isDoubleWidthEmojiCluster(segment)) { width += 2; continue; } diff --git a/test.js b/test.js index 2b90be5..a88a638 100644 --- a/test.js +++ b/test.js @@ -249,3 +249,23 @@ test('digit zero as plain text (not emoji)', macro, '0', 1); test('digit one as plain text', macro, '1', 1); test('asterisk as plain text', macro, '*', 1); test('hash as plain text', macro, '#', 1); + +// Minimally-qualified/unqualified emoji sequences +// These are emoji sequences missing VS16 but should still be width 2 +test('heart on fire (MQ)', macro, '\u2764\u200D\u{1F525}', 2); // ❀‍πŸ”₯ +test('rainbow flag (MQ)', macro, '\u{1F3F3}\u200D\u{1F308}', 2); // πŸ³β€πŸŒˆ +test('transgender flag (MQ)', macro, '\u{1F3F3}\u200D\u26A7', 2); // πŸ³β€βš§ +test('broken chain (MQ)', macro, '\u26D3\u200D\u{1F4A5}', 2); // ⛓‍πŸ’₯ +test('eye in speech bubble (MQ)', macro, '\u{1F441}\u200D\u{1F5E8}', 2); // πŸ‘β€πŸ—¨ +test('man bouncing ball (MQ)', macro, '\u26F9\u200D\u2642', 2); // ⛹‍♂ +test('woman bouncing ball (MQ)', macro, '\u26F9\u200D\u2640', 2); // ⛹‍♀ +test('man detective (MQ)', macro, '\u{1F575}\u200D\u2642', 2); // πŸ•΅β€β™‚ +test('woman detective (MQ)', macro, '\u{1F575}\u200D\u2640', 2); // πŸ•΅β€β™€ + +// Unqualified keycap sequences (missing VS16) +test('keycap # (UQ)', macro, '#\u20E3', 2); // #⃣ +test('keycap 0 (UQ)', macro, '0\u20E3', 2); // 0⃣ +test('keycap * (UQ)', macro, '*\u20E3', 2); // *⃣ + +// Ensure invalid keycap sequences don't match +test('phone + keycap (invalid)', macro, '\u260E\uFE0F\u20E3', 1); // Not a valid keycap base From cdefd92379079ff1110c22319d299dcd42047275 Mon Sep 17 00:00:00 2001 From: Sindre Sorhus Date: Fri, 26 Dec 2025 17:54:18 +0100 Subject: [PATCH 2/2] Simplify emoji cluster detection --- index.js | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/index.js b/index.js index 59c2ab1..a93d1ed 100644 --- a/index.js +++ b/index.js @@ -22,25 +22,24 @@ const leadingNonPrintingRegex = /^[\p{Default_Ignorable_Code_Point}\p{Control}\p // RGI emoji sequences const rgiEmojiRegex = /^\p{RGI_Emoji}$/v; -// Detect minimally-qualified/unqualified emoji clusters (missing VS16) -// - ZWJ sequences with 2+ Extended_Pictographic (e.g., ❀‍πŸ”₯, πŸ³β€πŸŒˆ, ⛹‍♂) -// - Keycap sequences (e.g., #⃣, 0⃣) -const zwjRegex = /\u200D/; -const validKeycapRegex = /^[\d#*].*\u20E3/; +// Detect minimally-qualified/unqualified emoji sequences (missing VS16 but still render as double-width) +const unqualifiedKeycapRegex = /^[\d#*]\u20E3$/; const extendedPictographicRegex = /\p{Extended_Pictographic}/gu; -function isDoubleWidthEmojiCluster(segment) { - // Keycap sequences with valid base (0-9, #, *) - if (validKeycapRegex.test(segment)) { +function isDoubleWidthNonRgiEmojiSequence(segment) { + // Real emoji clusters are < 30 chars; guard against pathological input + if (segment.length > 50) { + return false; + } + + if (unqualifiedKeycapRegex.test(segment)) { return true; } // ZWJ sequences with 2+ Extended_Pictographic - if (zwjRegex.test(segment)) { - const matches = segment.match(extendedPictographicRegex); - if (matches && matches.length >= 2) { - return true; - } + if (segment.includes('\u200D')) { + const pictographics = segment.match(extendedPictographicRegex); + return pictographics !== null && pictographics.length >= 2; } return false; @@ -97,7 +96,7 @@ export default function stringWidth(input, options = {}) { } // Emoji width logic - if (rgiEmojiRegex.test(segment) || isDoubleWidthEmojiCluster(segment)) { + if (rgiEmojiRegex.test(segment) || isDoubleWidthNonRgiEmojiSequence(segment)) { width += 2; continue; }