Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 13 additions & 17 deletions src/grapheme.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,42 +50,39 @@ export function* graphemeSegments(input) {
// do nothing on empty string
if (cp == null) return;

/** Current cursor position. */
let cursor = cp <= BMP_MAX ? 1 : 2;

/** Total length of the input string. */
let len = input.length;

let index = 0;
let cursor = 0;

/** Category of codepoint immediately preceding cursor */
let catBefore = cat(cp);

/** @type {GraphemeCategoryNum} Category of codepoint immediately preceding cursor. */
let catAfter = 0;

/** The number of RIS codepoints preceding `cursor`. */
let risCount = 0;
/** The number of RI codepoints preceding `cursor`. */
let riCount = 0;

/**
* Emoji state for GB11: tracks if we've seen Extended_Pictographic followed by Extend* ZWJ
* Only relevant when catBefore === ZWJ && catAfter === Extended_Pictographic
*/
let emoji = false;

/** InCB=Consonant - segment started with Indic consonant */
let consonant = false;

/** InCB=Linker - seen a linker after consonant */
let linker = false;
/** State for Indic scripts */
let consonant = false, linker = false;

let index = 0;

/** Beginning category of a segment */
/** Memoize the beginning category of the segment */
let _catBegin = catBefore;

/** Memoize the beginning code point of the segment. */
let _hd = cp;

while (cursor < len) {
cursor += cp <= BMP_MAX ? 1 : 2;

cp = /** @type {number} */ (input.codePointAt(cursor));
catAfter = cat(cp);

Expand Down Expand Up @@ -117,8 +114,8 @@ export function* graphemeSegments(input) {
}
// GB12, GB13: RI × RI (odd count means no break)
else if (catBefore === 10 && catAfter === 10) {
// risCount is count BEFORE current RI, so odd means this is 2nd, 4th, etc.
boundary = risCount++ % 2 === 1;
// riCount is count BEFORE current RI, so odd means this is 2nd, 4th, etc.
boundary = riCount++ % 2 === 1;
}
// GB6: L × (L | V | LV | LVT)
else if (catBefore === 5) {
Expand Down Expand Up @@ -150,7 +147,7 @@ export function* graphemeSegments(input) {

// Reset segment state
emoji = false;
risCount = 0;
riCount = 0;
index = cursor;
_catBegin = catAfter;
_hd = cp;
Expand Down Expand Up @@ -181,7 +178,6 @@ export function* graphemeSegments(input) {
}
}

cursor += cp <= BMP_MAX ? 1 : 2;
catBefore = catAfter;
}

Expand Down