Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 61 additions & 36 deletions scripts/unicode.js
Original file line number Diff line number Diff line change
Expand Up @@ -430,28 +430,63 @@ let printTableRaw = (f, name, table, format) => {

/**
* @param {WriteStream} f
* @param {CategorizedUnicodeRange[]} breakTable
* @param {string[]} breakCats
* @param {CategorizedUnicodeRange[]} ranges
* @param {string[]} cats
* @param {string} catsModule
* @param {string} name
* @returns
*/
let printBreakModule = (f, breakTable, breakCats, name) => {
let cats = ['Any', ...breakCats.toSorted()];

let printDataModule = (f, ranges, cats, catsModule, name) => {
let capitalName = capitalize(name);
let typeName = `${capitalName}Category`;
let keyTypeName = `${typeName}Key`;
let numTypeName = `${typeName}Num`;
let rangeTypeName = `${typeName}Range`;

/** @type {Record<string, number>} */
let inversed = {};
cats.forEach((cat, idx) => {
inversed[cat] = idx;
});

f.write(preamble);
f.write(`
import { decodeUnicodeData } from './core.js';

/**
* @typedef {import('./${catsModule}').${numTypeName}} ${numTypeName}
* @typedef {import('./core.js').UnicodeDataEncoding} UnicodeDataEncoding
* @typedef {import('./core.js').CategorizedUnicodeRange<${numTypeName}>} ${rangeTypeName}
*/
`,
);

f.write(`
/**
* @type {${rangeTypeName}[]}
*/
export const ${name}_ranges = decodeUnicodeData(
/** @type {UnicodeDataEncoding} */
('${encodeUnicodeData(ranges.map(range => [range[0], range[1], 0]))}'),
'${ranges.map(range => inversed[range[2]].toString(36)).join('')}',
);
`,
);
};

/**
* @param {WriteStream} f
* @param {string[]} cats
* @param {string} name
* @returns
*/
let printCategoryModule = (f, cats, name) => {
let capitalName = capitalize(name);
let typeName = `${capitalName}Category`;
let keyTypeName = `${typeName}Key`;
let numTypeName = `${typeName}Num`;

f.write(preamble);
f.write(`
/**
`,
);
Expand All @@ -471,13 +506,6 @@ import { decodeUnicodeData } from './core.js';

f.write(`
/**
* @typedef {import('./core.js').CategorizedUnicodeRange<${numTypeName}>} ${rangeTypeName}
*/
`,
);

f.write(`
/**
* @typedef {(
`,
);
Expand All @@ -489,33 +517,15 @@ import { decodeUnicodeData } from './core.js';

f.write(`
/**
* Grapheme category enum
*
* Note:
* The object isn't actually frozen
* because using \`Object.freeze\` increases 800 bytes on Brotli compression.
*
* @type {Readonly<Record<${keyTypeName}, ${numTypeName}>>}
* ${capitalName}_Break property values
*/
export const ${typeName} = {
export const ${typeName} = /** @type {const} */ ({
`.trimStart(),
);
for (let cat of cats) {
f.write(` ${cat}: ${inversed[cat]},\n`);
}
f.write('};\n');

f.write(`
/**
* @type {${rangeTypeName}[]}
*/
export const ${name}_ranges = decodeUnicodeData(
/** @type {UnicodeDataEncoding} */
('${encodeUnicodeData(breakTable.map(row => [row[0], row[1], 0]))}'),
'${breakTable.map(row => inversed[row[2]].toString(36)).join('')}',
);
`,
);
f.write('});\n');
};

/**
Expand Down Expand Up @@ -865,12 +875,27 @@ let graphemeTableOptimized = graphemeTable.filter(([from, to, cat]) => {
return true;
});

let graphemeCategories =
['Any', ...Object.keys(graphemeCats).concat(['Extended_Pictographic']).toSorted()];

let graphemCatsModule = '_grapheme_categories.js'

await emitSrc(
graphemCatsModule,
async f => printCategoryModule(
f,
graphemeCategories,
'grapheme',
),
);

await emitSrc(
'_grapheme_data.js',
async f => printBreakModule(
async f => printDataModule(
f,
graphemeTableOptimized,
Object.keys(graphemeCats).concat(['Extended_Pictographic']),
graphemeCategories,
graphemCatsModule,
'grapheme',
),
);
Expand Down
80 changes: 80 additions & 0 deletions src/_grapheme_categories.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// The following code was generated by "scripts/unicode.js",
// DO NOT EDIT DIRECTLY.
//
// @ts-check

/**
* @typedef {0} GC_Any
* @typedef {1} GC_CR
* @typedef {2} GC_Control
* @typedef {3} GC_Extend
* @typedef {4} GC_Extended_Pictographic
* @typedef {5} GC_L
* @typedef {6} GC_LF
* @typedef {7} GC_LV
* @typedef {8} GC_LVT
* @typedef {9} GC_Prepend
* @typedef {10} GC_Regional_Indicator
* @typedef {11} GC_SpacingMark
* @typedef {12} GC_T
* @typedef {13} GC_V
* @typedef {14} GC_ZWJ
* @typedef {(
* | GC_Any
* | GC_CR
* | GC_Control
* | GC_Extend
* | GC_Extended_Pictographic
* | GC_L
* | GC_LF
* | GC_LV
* | GC_LVT
* | GC_Prepend
* | GC_Regional_Indicator
* | GC_SpacingMark
* | GC_T
* | GC_V
* | GC_ZWJ
* )} GraphemeCategoryNum
*/

/**
* @typedef {(
* | 'Any'
* | 'CR'
* | 'Control'
* | 'Extend'
* | 'Extended_Pictographic'
* | 'L'
* | 'LF'
* | 'LV'
* | 'LVT'
* | 'Prepend'
* | 'Regional_Indicator'
* | 'SpacingMark'
* | 'T'
* | 'V'
* | 'ZWJ'
* )} GraphemeCategoryKey
*/

/**
* Grapheme_Break property values
*/
export const GraphemeCategory = /** @type {const} */ ({
Any: 0,
CR: 1,
Control: 2,
Extend: 3,
Extended_Pictographic: 4,
L: 5,
LF: 6,
LV: 7,
LVT: 8,
Prepend: 9,
Regional_Indicator: 10,
SpacingMark: 11,
T: 12,
V: 13,
ZWJ: 14,
});
86 changes: 1 addition & 85 deletions src/_grapheme_data.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 3 additions & 5 deletions src/grapheme.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
// @ts-check

import { findUnicodeRangeIndex } from './core.js';
import { GraphemeCategory, grapheme_ranges } from './_grapheme_data.js';
import { grapheme_ranges } from './_grapheme_data.js';
import { consonant_ranges } from './_incb_data.js';

export { GraphemeCategory } from './_grapheme_categories.js';

/**
* @typedef {import('./_grapheme_data.js').GC_Any} GC_Any
*
* @typedef {import('./_grapheme_data.js').GraphemeCategoryNum} GraphemeCategoryNum
* @typedef {import('./_grapheme_data.js').GraphemeCategoryRange} GraphemeCategoryRange
*
Expand All @@ -31,8 +31,6 @@ import { consonant_ranges } from './_incb_data.js';
* @typedef {import('./core.js').Segmenter<GraphemeSegmentExtra>} GraphemeSegmenter
*/

export { GraphemeCategory };

const BMP_MAX = 0xFFFF;

/**
Expand Down