Skip to content

Commit be6df57

Browse files
committed
Refactoring in support of memoization
1 parent aa866e8 commit be6df57

File tree

2 files changed

+76
-53
lines changed

2 files changed

+76
-53
lines changed

packages/wasm/src/index.js

Lines changed: 42 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import * as w from '@wasmgroundup/emit';
77
const {instr} = w;
88
import {pexprs} from 'ohm-js';
99

10+
const WASM_PAGE_SIZE = 64 * 1024;
11+
1012
function assert(cond, msg) {
1113
if (!cond) {
1214
throw new Error(msg ?? 'assertion failed');
@@ -45,7 +47,7 @@ function getDebugLabel(exp) {
4547
Offers a higher-level interface for generating WebAssembly code and
4648
constructing a module.
4749
*/
48-
export class Assembler {
50+
class Assembler {
4951
constructor() {
5052
this._globals = new Map();
5153

@@ -147,6 +149,11 @@ export class Assembler {
147149
this.emit(w.instr.end);
148150
}
149151

152+
ifFalse(bt, bodyThunk) {
153+
this.i32Ne();
154+
this.if(bt, bodyThunk);
155+
}
156+
150157
br(depth) {
151158
this.emit(instr.br, w.labelidx(depth));
152159
}
@@ -167,6 +174,10 @@ export class Assembler {
167174
this.emit(instr.i32.load8_u, w.memarg(Assembler.ALIGN_1_BYTE, offset));
168175
}
169176

177+
i32Mul() {
178+
this.emit(instr.i32.mul);
179+
}
180+
170181
i32Ne() {
171182
this.emit(instr.i32.ne);
172183
}
@@ -309,7 +320,7 @@ export class Assembler {
309320
cstNodeAlloc() {
310321
this.globalGet('cst');
311322
this.dup();
312-
this.i32Const(8);
323+
this.i32Const(Assembler.CST_NODE_SIZE_BYTES);
313324
this.i32Add();
314325
this.globalSet('cst');
315326
}
@@ -352,9 +363,11 @@ export class Assembler {
352363
}
353364
Assembler.ALIGN_1_BYTE = 0;
354365
Assembler.ALIGN_4_BYTES = 2;
366+
Assembler.CST_NODE_SIZE_BYTES = 8;
367+
Assembler.MEMO_REC_SIZE_BYTES = 16;
355368
Assembler.STACK_FRAME_SIZE_BYTES = 8;
356369

357-
export class Compiler {
370+
class Compiler {
358371
constructor(grammar) {
359372
this.importDecls = [
360373
{
@@ -447,6 +460,7 @@ export class Compiler {
447460

448461
buildModule(functionDecls) {
449462
const {importDecls} = this;
463+
// TODO: Compress types!
450464
const types = [...importDecls, ...functionDecls].map(f =>
451465
w.functype(f.paramTypes, f.resultTypes),
452466
);
@@ -533,8 +547,9 @@ export class Compiler {
533547

534548
emitMatchBody() {
535549
const {asm} = this;
536-
const getInputLen = () => [instr.local.get, w.localidx(0)];
537-
const getCurrPos = () => [instr.global.get, w.globalidx(0)];
550+
asm.addLocal('inputLen', w.valtype.i32);
551+
asm.addLocal('ret', w.valtype.i32);
552+
asm.addLocal('tmp', w.valtype.i32);
538553

539554
asm.i32Const(0);
540555
asm.globalSet('pos');
@@ -546,16 +561,21 @@ export class Compiler {
546561
asm.globalSet('cst');
547562

548563
asm.i32Const(0); // offset
549-
asm.i32Const(64 * 1024); // maxLen
564+
asm.i32Const(WASM_PAGE_SIZE); // maxLen
550565
asm.emit(instr.call, w.funcidx(0)); // fillInputBuffer
551566
asm.emit(instr.local.set, w.localidx(0)); // set inputLen
552567

553-
asm.emit(instr.call, this.ruleEvalFuncIdx(this.grammar.defaultStartRule));
568+
// TODO: This should probably a seq of [Apply, end] just like in the JS version.
569+
// Note that in the CST tests, the depth of all nodes will increase by 1.
570+
this.emitPExpr(new pexprs.Apply(this.grammar.defaultStartRule));
571+
asm.localGet('ret');
554572
asm.ifElse(
555573
w.blocktype.i32,
556574
() => {
557575
// match succeeded -- return currPos == inputLen
558-
asm.emit(getInputLen(), getCurrPos(), instr.i32.eq);
576+
asm.localGet('inputLen');
577+
asm.globalGet('pos');
578+
asm.emit(instr.i32.eq);
559579
},
560580
() => {
561581
asm.i32Const(0);
@@ -573,21 +593,13 @@ export class Compiler {
573593
const name = [...this.ruleIdxByName.keys()][i];
574594
ruleDecls.push(this.compileRule(name, this.ruleBody(name)));
575595
}
596+
this.asm.addFunction('match', [], [w.valtype.i32], () => this.emitMatchBody());
576597

577-
return [
578-
{
579-
name: 'match',
580-
paramTypes: [],
581-
resultTypes: [w.valtype.i32],
582-
locals: [w.locals(1, w.valtype.i32)],
583-
body: this.asm.doEmit(() => this.emitMatchBody()),
584-
},
585-
...ruleDecls,
586-
];
598+
return [this.asm._functionDecls.at(-1), ...ruleDecls];
587599
}
588600

589601
// Contract: emitPExpr always means we're going deeper in the PExpr tree.
590-
emitPExpr(exp, {skipBacktracking} = {}) {
602+
emitPExpr(exp, {skipBacktracking, saveCst} = {}) {
591603
const {asm} = this;
592604
const isLookahead = exp.constructor === pexprs.Lookahead || exp.constructor === pexprs.Not;
593605
const emitBacktracking = !skipBacktracking && !isLookahead;
@@ -800,8 +812,7 @@ export class Compiler {
800812
// Compare next char
801813
asm.i32Const(c.charCodeAt(0));
802814
asm.currCharCode();
803-
asm.i32Ne();
804-
asm.if(w.blocktype.empty, () => {
815+
asm.ifFalse(w.blocktype.empty, () => {
805816
asm.i32Const(0);
806817
asm.localSet('ret');
807818
asm.break(1);
@@ -817,10 +828,10 @@ export class Compiler {
817828
// - 2nd page is for input buffer (max 64k for now).
818829
// - Pages 3-18 (incl.) for memo table (4 entries per char, 4 bytes each).
819830
// - Remainder (>18) is for CST (growing upwards).
820-
Compiler.INPUT_BUFFER_OFFSET = 64 * 1024; // Offset of the input buffer in memory.
821-
Compiler.STACK_START_OFFSET = 64 * 1024; // Starting offset of the stack.
822-
Compiler.MEMO_START_OFFSET = 2 * (64 * 1024); // Starting offset of memo records.
823-
Compiler.CST_START_OFFSET = 18 * (64 * 1024); // Starting offset of CST records.
831+
Compiler.STACK_START_OFFSET = WASM_PAGE_SIZE; // Starting offset of the stack.
832+
Compiler.INPUT_BUFFER_OFFSET = WASM_PAGE_SIZE; // Offset of the input buffer in memory.
833+
Compiler.MEMO_START_OFFSET = 2 * WASM_PAGE_SIZE; // Starting offset of memo records.
834+
Compiler.CST_START_OFFSET = 18 * WASM_PAGE_SIZE; // Starting offset of CST records.
824835

825836
export class WasmMatcher {
826837
constructor(grammar) {
@@ -881,3 +892,9 @@ export class WasmMatcher {
881892
return written;
882893
}
883894
}
895+
896+
export const ConstantsForTesting = {
897+
CST_NODE_SIZE_BYTES: Assembler.CST_NODE_SIZE_BYTES,
898+
CST_START_OFFSET: Compiler.CST_START_OFFSET,
899+
MEMO_REC_SIZE_BYTES: Compiler.MEMO_REC_SIZE_BYTES,
900+
};

packages/wasm/test/test-wasm.js

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import test from 'ava';
22
import * as ohm from 'ohm-js';
33
import {performance} from 'perf_hooks';
44

5-
import {WasmMatcher} from '../src/index.js';
5+
import {ConstantsForTesting as Constants, WasmMatcher} from '../src/index.js';
66

77
const matchWithInput = (m, str) => (m.setInput(str), m.match());
88

@@ -67,9 +67,10 @@ test('basic cst', async t => {
6767
t.is(matchWithInput(matcher, input), 1);
6868
t.deepEqual(rawCst(matcher), [
6969
[0, 2],
70-
[1, 1],
71-
[1, 1],
70+
[1, 2],
71+
[2, 1],
7272
[2, 1],
73+
[3, 1],
7374
]);
7475

7576
matcher = await WasmMatcher.forGrammar(ohm.grammar('G { start = "a" | b\nb = "b" }'));
@@ -78,6 +79,7 @@ test('basic cst', async t => {
7879
t.deepEqual(rawCst(matcher), [
7980
[0, 1],
8081
[1, 1],
82+
[2, 1],
8183
]);
8284

8385
input = 'b';
@@ -86,6 +88,7 @@ test('basic cst', async t => {
8688
[0, 1],
8789
[1, 1],
8890
[2, 1],
91+
[3, 1],
8992
]);
9093
});
9194

@@ -97,29 +100,14 @@ test('cst with lookahead', async t => {
97100
[0, 1],
98101
[1, 1],
99102
[2, 1],
103+
[3, 1],
100104
]);
101105

102106
matcher = await WasmMatcher.forGrammar(ohm.grammar('G {x = (~space any)*}'));
103107
input = 'abc';
104108
t.is(matchWithInput(matcher, input), 1);
105109
t.deepEqual(rawCst(matcher), [
106-
[0, 3], // - rep
107-
[1, 1], // - seq
108-
[2, 1], // - any
109-
[3, 1], // - (child)
110-
[1, 1], // - seq
111-
[2, 1], // - any
112-
[3, 1], // - (child)
113-
[1, 1], // - seq
114-
[2, 1], // - any
115-
[3, 1], // - (child)
116-
]);
117-
118-
matcher = await WasmMatcher.forGrammar(ohm.grammar('G {x = (~space any)+ spaces any+}'));
119-
input = '/ab xy';
120-
t.is(matchWithInput(matcher, input), 1);
121-
t.deepEqual(rawCst(matcher), [
122-
[0, 6], // - seq
110+
[0, 3], // - apply
123111
[1, 3], // - rep
124112
[2, 1], // - seq
125113
[3, 1], // - any
@@ -130,15 +118,33 @@ test('cst with lookahead', async t => {
130118
[2, 1], // - seq
131119
[3, 1], // - any
132120
[4, 1], // - (child)
133-
[1, 1], // - spaces
134-
[2, 1], // - rep
135-
[3, 1], // - space
121+
]);
122+
123+
matcher = await WasmMatcher.forGrammar(ohm.grammar('G {x = (~space any)+ spaces any+}'));
124+
input = '/ab xy';
125+
t.is(matchWithInput(matcher, input), 1);
126+
t.deepEqual(rawCst(matcher), [
127+
[0, 6], // - apply
128+
[1, 6], // - seq
129+
[2, 3], // - rep
130+
[3, 1], // - seq
131+
[4, 1], // - any
132+
[5, 1], // - (child)
133+
[3, 1], // - seq
134+
[4, 1], // - any
135+
[5, 1], // - (child)
136+
[3, 1], // - seq
137+
[4, 1], // - any
138+
[5, 1], // - (child)
139+
[2, 1], // - spaces
140+
[3, 1], // - rep
141+
[4, 1], // - space
142+
[5, 1], // - (child)
143+
[2, 2], // - rep
144+
[3, 1], // - any
145+
[4, 1], // - (child)
146+
[3, 1], // - any
136147
[4, 1], // - (child)
137-
[1, 2], // - rep
138-
[2, 1], // - any
139-
[3, 1], // - (child)
140-
[2, 1], // - any
141-
[3, 1], // - (child)
142148
]);
143149
});
144150

0 commit comments

Comments
 (0)