From 64e0c59b0f075081c8d86c4ee33cdf94aca4f9cb Mon Sep 17 00:00:00 2001 From: Hyeseong Kim Date: Fri, 9 Jan 2026 04:05:09 +0900 Subject: [PATCH] Make GraphemeCategory enum tree-shakable --- scripts/unicode.js | 97 +++++++++++++++++++++++-------------- src/_grapheme_categories.js | 80 ++++++++++++++++++++++++++++++ src/_grapheme_data.js | 86 +------------------------------- src/grapheme.js | 8 ++- 4 files changed, 145 insertions(+), 126 deletions(-) create mode 100644 src/_grapheme_categories.js diff --git a/scripts/unicode.js b/scripts/unicode.js index 3b7832d..e2ae024 100755 --- a/scripts/unicode.js +++ b/scripts/unicode.js @@ -430,28 +430,63 @@ let printTableRaw = (f, name, table, format) => { /** * @param {WriteStream} f - * @param {CategorizedUnicodeRange[]} breakTable - * @param {string[]} breakCats + * @param {CategorizedUnicodeRange[]} ranges + * @param {string[]} cats + * @param {string} catsModule * @param {string} name * @returns */ -let printBreakModule = (f, breakTable, breakCats, name) => { - let cats = ['Any', ...breakCats.toSorted()]; - +let printDataModule = (f, ranges, cats, catsModule, name) => { let capitalName = capitalize(name); let typeName = `${capitalName}Category`; - let keyTypeName = `${typeName}Key`; let numTypeName = `${typeName}Num`; let rangeTypeName = `${typeName}Range`; + /** @type {Record} */ + let inversed = {}; + cats.forEach((cat, idx) => { + inversed[cat] = idx; + }); + f.write(preamble); f.write(` import { decodeUnicodeData } from './core.js'; /** + * @typedef {import('./${catsModule}').${numTypeName}} ${numTypeName} * @typedef {import('./core.js').UnicodeDataEncoding} UnicodeDataEncoding + * @typedef {import('./core.js').CategorizedUnicodeRange<${numTypeName}>} ${rangeTypeName} */ +`, + ); + f.write(` +/** + * @type {${rangeTypeName}[]} + */ +export const ${name}_ranges = decodeUnicodeData( + /** @type {UnicodeDataEncoding} */ + ('${encodeUnicodeData(ranges.map(range => [range[0], range[1], 0]))}'), + '${ranges.map(range => inversed[range[2]].toString(36)).join('')}', +); +`, + ); +}; + +/** + * @param {WriteStream} f + * @param {string[]} cats + * @param {string} name + * @returns + */ +let printCategoryModule = (f, cats, name) => { + let capitalName = capitalize(name); + let typeName = `${capitalName}Category`; + let keyTypeName = `${typeName}Key`; + let numTypeName = `${typeName}Num`; + + f.write(preamble); + f.write(` /** `, ); @@ -471,13 +506,6 @@ import { decodeUnicodeData } from './core.js'; f.write(` /** - * @typedef {import('./core.js').CategorizedUnicodeRange<${numTypeName}>} ${rangeTypeName} - */ -`, - ); - - f.write(` -/** * @typedef {( `, ); @@ -489,33 +517,15 @@ import { decodeUnicodeData } from './core.js'; f.write(` /** - * Grapheme category enum - * - * Note: - * The object isn't actually frozen - * because using \`Object.freeze\` increases 800 bytes on Brotli compression. - * - * @type {Readonly>} + * ${capitalName}_Break property values */ -export const ${typeName} = { +export const ${typeName} = /** @type {const} */ ({ `.trimStart(), ); for (let cat of cats) { f.write(` ${cat}: ${inversed[cat]},\n`); } - f.write('};\n'); - - f.write(` -/** - * @type {${rangeTypeName}[]} - */ -export const ${name}_ranges = decodeUnicodeData( - /** @type {UnicodeDataEncoding} */ - ('${encodeUnicodeData(breakTable.map(row => [row[0], row[1], 0]))}'), - '${breakTable.map(row => inversed[row[2]].toString(36)).join('')}', -); -`, - ); + f.write('});\n'); }; /** @@ -865,12 +875,27 @@ let graphemeTableOptimized = graphemeTable.filter(([from, to, cat]) => { return true; }); +let graphemeCategories = + ['Any', ...Object.keys(graphemeCats).concat(['Extended_Pictographic']).toSorted()]; + +let graphemCatsModule = '_grapheme_categories.js' + +await emitSrc( + graphemCatsModule, + async f => printCategoryModule( + f, + graphemeCategories, + 'grapheme', + ), +); + await emitSrc( '_grapheme_data.js', - async f => printBreakModule( + async f => printDataModule( f, graphemeTableOptimized, - Object.keys(graphemeCats).concat(['Extended_Pictographic']), + graphemeCategories, + graphemCatsModule, 'grapheme', ), ); diff --git a/src/_grapheme_categories.js b/src/_grapheme_categories.js new file mode 100644 index 0000000..390985e --- /dev/null +++ b/src/_grapheme_categories.js @@ -0,0 +1,80 @@ +// The following code was generated by "scripts/unicode.js", +// DO NOT EDIT DIRECTLY. +// +// @ts-check + +/** + * @typedef {0} GC_Any + * @typedef {1} GC_CR + * @typedef {2} GC_Control + * @typedef {3} GC_Extend + * @typedef {4} GC_Extended_Pictographic + * @typedef {5} GC_L + * @typedef {6} GC_LF + * @typedef {7} GC_LV + * @typedef {8} GC_LVT + * @typedef {9} GC_Prepend + * @typedef {10} GC_Regional_Indicator + * @typedef {11} GC_SpacingMark + * @typedef {12} GC_T + * @typedef {13} GC_V + * @typedef {14} GC_ZWJ + * @typedef {( + * | GC_Any + * | GC_CR + * | GC_Control + * | GC_Extend + * | GC_Extended_Pictographic + * | GC_L + * | GC_LF + * | GC_LV + * | GC_LVT + * | GC_Prepend + * | GC_Regional_Indicator + * | GC_SpacingMark + * | GC_T + * | GC_V + * | GC_ZWJ + * )} GraphemeCategoryNum + */ + +/** + * @typedef {( + * | 'Any' + * | 'CR' + * | 'Control' + * | 'Extend' + * | 'Extended_Pictographic' + * | 'L' + * | 'LF' + * | 'LV' + * | 'LVT' + * | 'Prepend' + * | 'Regional_Indicator' + * | 'SpacingMark' + * | 'T' + * | 'V' + * | 'ZWJ' + * )} GraphemeCategoryKey + */ + +/** + * Grapheme_Break property values + */ +export const GraphemeCategory = /** @type {const} */ ({ + Any: 0, + CR: 1, + Control: 2, + Extend: 3, + Extended_Pictographic: 4, + L: 5, + LF: 6, + LV: 7, + LVT: 8, + Prepend: 9, + Regional_Indicator: 10, + SpacingMark: 11, + T: 12, + V: 13, + ZWJ: 14, +}); diff --git a/src/_grapheme_data.js b/src/_grapheme_data.js index 76cf556..2cdc81d 100644 --- a/src/_grapheme_data.js +++ b/src/_grapheme_data.js @@ -6,95 +6,11 @@ import { decodeUnicodeData } from './core.js'; /** + * @typedef {import('./_grapheme_categories.js').GraphemeCategoryNum} GraphemeCategoryNum * @typedef {import('./core.js').UnicodeDataEncoding} UnicodeDataEncoding - */ - -/** - * @typedef {0} GC_Any - * @typedef {1} GC_CR - * @typedef {2} GC_Control - * @typedef {3} GC_Extend - * @typedef {4} GC_Extended_Pictographic - * @typedef {5} GC_L - * @typedef {6} GC_LF - * @typedef {7} GC_LV - * @typedef {8} GC_LVT - * @typedef {9} GC_Prepend - * @typedef {10} GC_Regional_Indicator - * @typedef {11} GC_SpacingMark - * @typedef {12} GC_T - * @typedef {13} GC_V - * @typedef {14} GC_ZWJ - * @typedef {( - * | GC_Any - * | GC_CR - * | GC_Control - * | GC_Extend - * | GC_Extended_Pictographic - * | GC_L - * | GC_LF - * | GC_LV - * | GC_LVT - * | GC_Prepend - * | GC_Regional_Indicator - * | GC_SpacingMark - * | GC_T - * | GC_V - * | GC_ZWJ - * )} GraphemeCategoryNum - */ - -/** * @typedef {import('./core.js').CategorizedUnicodeRange} GraphemeCategoryRange */ -/** - * @typedef {( - * | 'Any' - * | 'CR' - * | 'Control' - * | 'Extend' - * | 'Extended_Pictographic' - * | 'L' - * | 'LF' - * | 'LV' - * | 'LVT' - * | 'Prepend' - * | 'Regional_Indicator' - * | 'SpacingMark' - * | 'T' - * | 'V' - * | 'ZWJ' - * )} GraphemeCategoryKey - */ - -/** - * Grapheme category enum - * - * Note: - * The object isn't actually frozen - * because using `Object.freeze` increases 800 bytes on Brotli compression. - * - * @type {Readonly>} - */ -export const GraphemeCategory = { - Any: 0, - CR: 1, - Control: 2, - Extend: 3, - Extended_Pictographic: 4, - L: 5, - LF: 6, - LV: 7, - LVT: 8, - Prepend: 9, - Regional_Indicator: 10, - SpacingMark: 11, - T: 12, - V: 13, - ZWJ: 14, -}; - /** * @type {GraphemeCategoryRange[]} */ diff --git a/src/grapheme.js b/src/grapheme.js index bcee94a..c00b562 100644 --- a/src/grapheme.js +++ b/src/grapheme.js @@ -14,12 +14,12 @@ // @ts-check import { findUnicodeRangeIndex } from './core.js'; -import { GraphemeCategory, grapheme_ranges } from './_grapheme_data.js'; +import { grapheme_ranges } from './_grapheme_data.js'; import { consonant_ranges } from './_incb_data.js'; +export { GraphemeCategory } from './_grapheme_categories.js'; + /** - * @typedef {import('./_grapheme_data.js').GC_Any} GC_Any - * * @typedef {import('./_grapheme_data.js').GraphemeCategoryNum} GraphemeCategoryNum * @typedef {import('./_grapheme_data.js').GraphemeCategoryRange} GraphemeCategoryRange * @@ -31,8 +31,6 @@ import { consonant_ranges } from './_incb_data.js'; * @typedef {import('./core.js').Segmenter} GraphemeSegmenter */ -export { GraphemeCategory }; - const BMP_MAX = 0xFFFF; /**