diff --git a/package-lock.json b/package-lock.json index d53dd5f..d0e500f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -24,7 +24,7 @@ "llparse-test-fixture": "^5.0.1", "mocha": "^9.2.2", "ts-node": "^9.0.0", - "typescript": "^4.0.3" + "typescript": "^5.0.3" } }, "node_modules/@eslint-community/eslint-utils": { @@ -403,20 +403,6 @@ "typescript": ">=4.2.0" } }, - "node_modules/@typescript-eslint/eslint-plugin/node_modules/typescript": { - "version": "5.5.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.4.tgz", - "integrity": "sha512-Mtq29sKDAEYP7aljRgtPOpTvOfbwRWlS6dPRzwjdE+C0R4brX/GUyhHSecbHMFLNBLcJIPt9nl9yG5TZ1weH+Q==", - "dev": true, - "peer": true, - "bin": { - "tsc": "bin/tsc", - "tsserver": "bin/tsserver" - }, - "engines": { - "node": ">=14.17" - } - }, "node_modules/@typescript-eslint/parser": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.0.0.tgz", @@ -498,20 +484,6 @@ "typescript": ">=4.2.0" } }, - "node_modules/@typescript-eslint/type-utils/node_modules/typescript": { - "version": "5.5.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.4.tgz", - "integrity": "sha512-Mtq29sKDAEYP7aljRgtPOpTvOfbwRWlS6dPRzwjdE+C0R4brX/GUyhHSecbHMFLNBLcJIPt9nl9yG5TZ1weH+Q==", - "dev": true, - "peer": true, - "bin": { - "tsc": "bin/tsc", - "tsserver": "bin/tsserver" - }, - "engines": { - "node": ">=14.17" - } - }, "node_modules/@typescript-eslint/types": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.0.0.tgz", @@ -601,20 +573,6 @@ "typescript": ">=4.2.0" } }, - "node_modules/@typescript-eslint/typescript-estree/node_modules/typescript": { - "version": "5.5.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.4.tgz", - "integrity": "sha512-Mtq29sKDAEYP7aljRgtPOpTvOfbwRWlS6dPRzwjdE+C0R4brX/GUyhHSecbHMFLNBLcJIPt9nl9yG5TZ1weH+Q==", - "dev": true, - "peer": true, - "bin": { - "tsc": "bin/tsc", - "tsserver": "bin/tsserver" - }, - "engines": { - "node": ">=14.17" - } - }, "node_modules/@typescript-eslint/utils": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.0.0.tgz", @@ -2760,16 +2718,17 @@ } }, "node_modules/typescript": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.0.3.tgz", - "integrity": "sha512-tEu6DGxGgRJPb/mVPIZ48e69xCn2yRmCgYmDugAVwmJ6o+0u1RI18eO7E7WBTLYLaEVVOhwQmcdhQHweux/WPg==", + "version": "5.8.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz", + "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "dev": true, + "license": "Apache-2.0", "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" }, "engines": { - "node": ">=4.2.0" + "node": ">=14.17" } }, "node_modules/undici-types": { diff --git a/package.json b/package.json index 1d2a907..e08f3b3 100644 --- a/package.json +++ b/package.json @@ -43,10 +43,10 @@ "llparse-test-fixture": "^5.0.1", "mocha": "^9.2.2", "ts-node": "^9.0.0", - "typescript": "^4.0.3" + "typescript": "^5.0.3" }, "dependencies": { "debug": "^4.2.0", "llparse-frontend": "^3.0.0" } -} \ No newline at end of file +} diff --git a/src/implementation/c/compilation.ts b/src/implementation/c/compilation.ts index 34942b0..24bbf79 100644 --- a/src/implementation/c/compilation.ts +++ b/src/implementation/c/compilation.ts @@ -85,16 +85,9 @@ export class Compilation { const hex: string[] = []; for (let j = i; j < limit; j++) { const value = buffer[j]; + assert(value !== undefined); - const ch = String.fromCharCode(value); - // `'`, `\` - if (value === 0x27 || value === 0x5c) { - hex.push(`'\\${ch}'`); - } else if (value >= 0x20 && value <= 0x7e) { - hex.push(`'${ch}'`); - } else { - hex.push(`0x${value.toString(16)}`); - } + hex.push(this.toChar(value)); } let line = ' ' + hex.join(', '); if (limit !== buffer.length) { @@ -331,4 +324,16 @@ export class Compilation { }); return res; } + + public toChar(value: number): string { + const ch = String.fromCharCode(value); + // `'`, `\` + if (value === 0x27 || value === 0x5c) { + return `'\\${ch}'`; + } else if (value >= 0x20 && value <= 0x7e) { + return `'${ch}'`; + } else { + return `0x${value.toString(16)}`; + } + } } diff --git a/src/implementation/c/index.ts b/src/implementation/c/index.ts index e7b5000..7728a6c 100644 --- a/src/implementation/c/index.ts +++ b/src/implementation/c/index.ts @@ -49,6 +49,11 @@ export class CCompiler { out.push('#endif /* __SSE4_2__ */'); out.push(''); + out.push('#ifdef __wasm__'); + out.push(' #include '); + out.push('#endif /* __wasm__ */'); + out.push(''); + out.push('#ifdef _MSC_VER'); out.push(' #define ALIGN(n) _declspec(align(n))'); out.push('#else /* !_MSC_VER */'); diff --git a/src/implementation/c/node/table-lookup.ts b/src/implementation/c/node/table-lookup.ts index e71cacc..6aea501 100644 --- a/src/implementation/c/node/table-lookup.ts +++ b/src/implementation/c/node/table-lookup.ts @@ -11,6 +11,7 @@ const SSE_RANGES_LEN = 16; // _mm_cmpestri takes 128bit input const SSE_RANGES_PAD = 16; const MAX_SSE_CALLS = 2; +const MAX_WASM_RANGES = 32; const SSE_ALIGNMENT = 16; interface ITable { @@ -34,7 +35,10 @@ export class TableLookup extends Node { // Try to vectorize nodes matching characters and looping to themselves // NOTE: `switch` below triggers when there is not enough characters in the // stream for vectorized processing. - this.buildSSE(out); + if (this.canVectorize()) { + this.buildSSE(out); + this.buildWASM(out); + } const current = transform.build(ctx, `*${ctx.posArg()}`); out.push(`switch (${table.name}[(uint8_t) ${current}]) {`); @@ -63,9 +67,7 @@ export class TableLookup extends Node { out.push('}'); } - private buildSSE(out: string[]): boolean { - const ctx = this.compilation; - + private canVectorize(): boolean { // Transformation is not supported atm if (this.ref.transform && this.ref.transform.ref.name !== 'id') { return false; @@ -83,8 +85,14 @@ export class TableLookup extends Node { return false; } + assert.strictEqual(edge.noAdvance, false); + + return true; + } + + private buildRanges(edge: frontend.node.TableLookup["edges"][0]): number[] { // NOTE: keys are sorted - let ranges: number[] = []; + const ranges: number[] = []; let first: number | undefined; let last: number | undefined; for (const key of edge.keys) { @@ -104,6 +112,16 @@ export class TableLookup extends Node { if (first !== undefined && last !== undefined) { ranges.push(first, last); } + return ranges; + } + + private buildSSE(out: string[]): boolean { + const ctx = this.compilation; + + const edge = this.ref.edges[0]; + assert(edge !== undefined); + + const ranges = this.buildRanges(edge); if (ranges.length === 0) { return false; @@ -118,7 +136,6 @@ export class TableLookup extends Node { out.push(`if (${ctx.endPosArg()} - ${ctx.posArg()} >= 16) {`); out.push(' __m128i ranges;'); out.push(' __m128i input;'); - out.push(' int avail;'); out.push(' int match_len;'); out.push(''); out.push(' /* Load input */'); @@ -145,7 +162,6 @@ export class TableLookup extends Node { out.push(` ${ctx.posArg()} += match_len;`); const tmp: string[] = []; - assert.strictEqual(edge.noAdvance, false); this.tailTo(tmp, { noAdvance: true, node: edge.node, @@ -167,6 +183,86 @@ export class TableLookup extends Node { return true; } + private buildWASM(out: string[]): boolean { + const ctx = this.compilation; + + const edge = this.ref.edges[0]; + assert(edge !== undefined); + + const ranges = this.buildRanges(edge); + + if (ranges.length === 0) { + return false; + } + + // Way too many calls would be required + if (ranges.length > MAX_WASM_RANGES) { + return false; + } + + out.push('#ifdef __wasm_simd128__'); + out.push(`if (${ctx.endPosArg()} - ${ctx.posArg()} >= 16) {`); + out.push(' v128_t input;'); + out.push(' v128_t total;'); + out.push(' v128_t single;'); + out.push(' int match_len;'); + out.push(''); + out.push(' /* Load input */'); + out.push(` input = wasm_v128_load(${ctx.posArg()});`); + + out.push(' /* Find first character that does not match `ranges` */'); + function v128(value: number): string { + return `wasm_u8x16_const_splat(${ctx.toChar(value)})`; + } + + for (let off = 0; off < ranges.length; off += 2) { + const start = ranges[off]; + const end = ranges[off + 1]; + assert(start !== undefined); + assert(end !== undefined); + + const varName = off === 0 ? 'total' : 'single'; + + // Same character, equality is sufficient (and faster) + if (start === end) { + out.push(` ${varName} = wasm_i8x16_eq(input, ${v128(start)});`); + } else { + out.push(` ${varName} = wasm_v128_and(`); + out.push(` wasm_i8x16_ge(input, ${v128(start)}),`); + out.push(` wasm_i8x16_le(input, ${v128(end)})`); + out.push(' );'); + } + + if (off !== 0) { + out.push(' total = wasm_v128_or(total, single);'); + } + } + out.push(' total = wasm_v128_not(total);'); + out.push(' match_len = __builtin_ctz(wasm_i8x16_bitmask(total));'); + out.push(' if (match_len != 0) {'); + out.push(` ${ctx.posArg()} += match_len;`); + + const tmp: string[] = []; + this.tailTo(tmp, { + noAdvance: true, + node: edge.node, + }); + ctx.indent(out, tmp, ' '); + + out.push(' }'); + + { + const tmp: string[] = []; + this.tailTo(tmp, this.ref.otherwise!); + ctx.indent(out, tmp, ' '); + } + out.push('}'); + + out.push('#endif /* __wasm_simd128__ */'); + + return true; + } + private buildTable(): ITable { const table: number[] = new Array(MAX_CHAR + 1).fill(0);