diff --git a/plugins/xlsx-extractor/ref/test-headers.xlsx b/plugins/xlsx-extractor/ref/test-headers.xlsx new file mode 100644 index 000000000..ef0706087 Binary files /dev/null and b/plugins/xlsx-extractor/ref/test-headers.xlsx differ diff --git a/plugins/xlsx-extractor/src/header.normalization.spec.ts b/plugins/xlsx-extractor/src/header.normalization.spec.ts new file mode 100644 index 000000000..edac974d1 --- /dev/null +++ b/plugins/xlsx-extractor/src/header.normalization.spec.ts @@ -0,0 +1,129 @@ +import api from '@flatfile/api' +import { + setupListener, + setupSpace, + getEnvironmentId, +} from '@flatfile/utils-testing' +import { ExcelExtractor } from '.' +import fs from 'fs' +import path from 'path' +import { FlatfileEvent } from '@flatfile/listener' + +describe('xlsx-extractor plugin', () => { + + const listener = setupListener() + let spaceId: string + + beforeAll(async () => { + const space = await setupSpace() + spaceId = space.id + }) + afterAll(async () => { + await api.spaces.delete(spaceId) + }) + + beforeEach(async () => { + listener.use(ExcelExtractor()) + }) + + it('Upload file with headers that require normalization', async () => { + + listener.on("**", async (event: FlatfileEvent) => { + console.log(event.topic) + }) + + await api.files.upload(fs.createReadStream(path.join(__dirname,'../ref/test-headers.xlsx')), { + environmentId: getEnvironmentId(), + spaceId, + }) + + const failure = async () => { + await listener.waitFor("job:failed", 1) + return false + } + const success = async () => { + await listener.waitFor("sheet:counts-updated", 3) + return true + } + + const ok = await Promise.race([failure(), success()]) + if(!ok) { + throw new Error("Job should not fail") + } else { + const { data: workbooks } = await api.workbooks.list({spaceId}) + expect(workbooks.length).toBe(1) + const { data: sheets } = await api.sheets.list({workbookId: workbooks[0].id}) + expect(sheets.length).toBe(1) + const EXPECTED_FIELDS = [{ + "description": "", + "key": "Code", + "label": "Code", + "type": "string", + }, + { + "description": "", + "key": "Amount_DOLLAR_", + "label": "Amount_DOLLAR_", + "type": "string", + }, + { + "description": "", + "key": "Amount_DOLLAR__1", + "label": "Amount_DOLLAR__1", + "type": "string", + }, + { + "description": "", + "key": "Rate_PERCENT_", + "label": "Rate_PERCENT_", + "type": "string", + }, + { + "description": "", + "key": "empty", + "label": "empty", + "type": "string", + }, + { + "description": "", + "key": "empty_1", + "label": "empty_1", + "type": "string", + }] + + expect(sheets[0].config.fields).toEqual(EXPECTED_FIELDS) + + const { data: { records } } = await api.records.get(sheets[0].id) + expect(records.length).toBe(2) + const data = records.map((record) => + EXPECTED_FIELDS.reduce((acc, field) => { + acc[field.key] = record.values[field.key].value + return acc + }, {}) + ) + expect(data).toEqual([ + { + "Amount_DOLLAR_": "100", + "Amount_DOLLAR__1": "300", + "Code": "ABC", + "Rate_PERCENT_": "5%", + "empty": undefined, + "empty_1": undefined, + }, + { + "Amount_DOLLAR_": "200", + "Amount_DOLLAR__1": "400", + "Code": "DEF", + "Rate_PERCENT_": "3%", + "empty": undefined, + "empty_1": undefined, + }, + ]) + + } + + }) + +}) + + diff --git a/plugins/xlsx-extractor/src/utils.ts b/plugins/xlsx-extractor/src/utils.ts index 1a25dda1e..7e9d4ecec 100644 --- a/plugins/xlsx-extractor/src/utils.ts +++ b/plugins/xlsx-extractor/src/utils.ts @@ -5,8 +5,8 @@ export function prependNonUniqueHeaderColumns( const result: Record = {} for (const [key, value] of Object.entries(record)) { - const newValue = value ? value : 'empty' - const cleanValue = newValue.replace('*', '') + const newValue = value || 'empty' + const cleanValue = normalizeKey(newValue.replace('*', '')) if (cleanValue && counts[cleanValue]) { result[key] = `${cleanValue}_${counts[cleanValue]}` @@ -19,3 +19,7 @@ export function prependNonUniqueHeaderColumns( return result } + +function normalizeKey(key: string): string { + return key.trim().replace(/%/g, '_PERCENT_').replace(/\$/g, '_DOLLAR_').replace(/[^a-zA-Z0-9]/g, "_") +} \ No newline at end of file diff --git a/utils/extractor/src/index.ts b/utils/extractor/src/index.ts index 979ad8528..1104301a3 100644 --- a/utils/extractor/src/index.ts +++ b/utils/extractor/src/index.ts @@ -211,7 +211,7 @@ function getSheetConfig( } function normalizeKey(key: string): string { - return key.trim().replace(/%/g, '_PERCENT_').replace(/\$/g, '_DOLLAR_') + return key.trim().replace(/%/g, '_PERCENT_').replace(/\$/g, '_DOLLAR_').replace(/[^a-zA-Z0-9]/g, "_") } function normalizeRecordKeys(record: Flatfile.RecordData): Flatfile.RecordData {