diff --git a/index.js b/index.js index aad1b05..a93d1ed 100644 --- a/index.js +++ b/index.js @@ -7,7 +7,8 @@ Logic: - Width rules: 1. Skip non-printing clusters (Default_Ignorable, Control, pure Mark, lone Surrogates). Tabs are ignored by design. 2. RGI emoji clusters (\p{RGI_Emoji}) are double-width. - 3. Otherwise use East Asian Width of the cluster’s first visible code point, and add widths for trailing Halfwidth/Fullwidth Forms within the same cluster (e.g., dakuten/handakuten/prolonged sound mark). + 3. Minimally-qualified/unqualified emoji clusters (ZWJ sequences with 2+ Extended_Pictographic, or keycap sequences) are double-width. + 4. Otherwise use East Asian Width of the cluster's first visible code point, and add widths for trailing Halfwidth/Fullwidth Forms within the same cluster (e.g., dakuten/handakuten/prolonged sound mark). */ const segmenter = new Intl.Segmenter(); @@ -21,6 +22,29 @@ const leadingNonPrintingRegex = /^[\p{Default_Ignorable_Code_Point}\p{Control}\p // RGI emoji sequences const rgiEmojiRegex = /^\p{RGI_Emoji}$/v; +// Detect minimally-qualified/unqualified emoji sequences (missing VS16 but still render as double-width) +const unqualifiedKeycapRegex = /^[\d#*]\u20E3$/; +const extendedPictographicRegex = /\p{Extended_Pictographic}/gu; + +function isDoubleWidthNonRgiEmojiSequence(segment) { + // Real emoji clusters are < 30 chars; guard against pathological input + if (segment.length > 50) { + return false; + } + + if (unqualifiedKeycapRegex.test(segment)) { + return true; + } + + // ZWJ sequences with 2+ Extended_Pictographic + if (segment.includes('\u200D')) { + const pictographics = segment.match(extendedPictographicRegex); + return pictographics !== null && pictographics.length >= 2; + } + + return false; +} + function baseVisible(segment) { return segment.replace(leadingNonPrintingRegex, ''); } @@ -72,7 +96,7 @@ export default function stringWidth(input, options = {}) { } // Emoji width logic - if (rgiEmojiRegex.test(segment)) { + if (rgiEmojiRegex.test(segment) || isDoubleWidthNonRgiEmojiSequence(segment)) { width += 2; continue; } diff --git a/test.js b/test.js index 2b90be5..a88a638 100644 --- a/test.js +++ b/test.js @@ -249,3 +249,23 @@ test('digit zero as plain text (not emoji)', macro, '0', 1); test('digit one as plain text', macro, '1', 1); test('asterisk as plain text', macro, '*', 1); test('hash as plain text', macro, '#', 1); + +// Minimally-qualified/unqualified emoji sequences +// These are emoji sequences missing VS16 but should still be width 2 +test('heart on fire (MQ)', macro, '\u2764\u200D\u{1F525}', 2); // ❀‍πŸ”₯ +test('rainbow flag (MQ)', macro, '\u{1F3F3}\u200D\u{1F308}', 2); // πŸ³β€πŸŒˆ +test('transgender flag (MQ)', macro, '\u{1F3F3}\u200D\u26A7', 2); // πŸ³β€βš§ +test('broken chain (MQ)', macro, '\u26D3\u200D\u{1F4A5}', 2); // ⛓‍πŸ’₯ +test('eye in speech bubble (MQ)', macro, '\u{1F441}\u200D\u{1F5E8}', 2); // πŸ‘β€πŸ—¨ +test('man bouncing ball (MQ)', macro, '\u26F9\u200D\u2642', 2); // ⛹‍♂ +test('woman bouncing ball (MQ)', macro, '\u26F9\u200D\u2640', 2); // ⛹‍♀ +test('man detective (MQ)', macro, '\u{1F575}\u200D\u2642', 2); // πŸ•΅β€β™‚ +test('woman detective (MQ)', macro, '\u{1F575}\u200D\u2640', 2); // πŸ•΅β€β™€ + +// Unqualified keycap sequences (missing VS16) +test('keycap # (UQ)', macro, '#\u20E3', 2); // #⃣ +test('keycap 0 (UQ)', macro, '0\u20E3', 2); // 0⃣ +test('keycap * (UQ)', macro, '*\u20E3', 2); // *⃣ + +// Ensure invalid keycap sequences don't match +test('phone + keycap (invalid)', macro, '\u260E\uFE0F\u20E3', 1); // Not a valid keycap base