From 5879b94811462eaee9af3256f7e3f2d00c6b43fe Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 17 Sep 2023 23:38:40 +0900 Subject: [PATCH 001/126] parser wip --- src/parser/index.ts | 29 ++++------ src/parser/token-stream.ts | 111 +++++++++++++++++++++++++++++++++++++ src/parser/token.ts | 40 +++++++++++++ 3 files changed, 163 insertions(+), 17 deletions(-) create mode 100644 src/parser/token-stream.ts create mode 100644 src/parser/token.ts diff --git a/src/parser/index.ts b/src/parser/index.ts index 0c34f647..13f09839 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -1,5 +1,5 @@ import { AiScriptSyntaxError } from '../error.js'; -import * as parser from './parser.js'; +import { TokenStream } from './token-stream.js'; import { validateKeyword } from './plugins/validate-keyword.js'; import { validateType } from './plugins/validate-type.js'; @@ -56,22 +56,7 @@ export class Parser { public parse(input: string): Ast.Node[] { let nodes: Cst.Node[]; - // generate a node tree - try { - // apply preprocessor - const code = parser.parse(input, { startRule: 'Preprocess' }); - // apply main parser - nodes = parser.parse(code, { startRule: 'Main' }); - } catch (e) { - if (e.location) { - if (e.expected) { - throw new AiScriptSyntaxError(`Parsing error. (Line ${e.location.start.line}:${e.location.start.column})`, e); - } else { - throw new AiScriptSyntaxError(`${e.message} (Line ${e.location.start.line}:${e.location.start.column})`, e); - } - } - throw e; - } + nodes = parse(input); // validate the node tree for (const plugin of this.plugins.validate) { @@ -86,3 +71,13 @@ export class Parser { return nodes as Ast.Node[]; } } + +function parse(source: string): Cst.Node[] { + const stream = new TokenStream(source); + //stream.read(); + //stream.current; + + // TODO + + return []; +} diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts new file mode 100644 index 00000000..322d834e --- /dev/null +++ b/src/parser/token-stream.ts @@ -0,0 +1,111 @@ +import { AiScriptSyntaxError } from '../error.js'; +import { TOKEN, Token, TokenKind } from './token.js'; + +const spacingChars = [' ', '\t', '\r', '\n']; + +export class TokenStream { + private source: string; + private token?: Token; + private index: number; + private char?: string; + + public constructor(source: string) { + this.source = source; + this.index = 0; + this.loadChar(); + } + + private get isEof(): boolean { + return (this.index >= this.source.length); + } + + private loadChar(): void { + if (this.isEof) { + this.char = undefined; + } + this.char = this.source[this.index]; + } + + private nextChar(): void { + if (!this.isEof) { + this.index++; + } + this.loadChar(); + } + + /** readメソッドで読み取ったトークンを取得します。 */ + public get current(): Token { + if (this.token == null) { + throw new Error('invalid operation: token is not read yet'); + } + return this.token; + } + + private readWord(): boolean { + // TODO + return false; + } + + private readDigits(): boolean { + // TODO + return false; + } + + /** トークンを読み取ります。 */ + public read(): void { + while (true) { + // EOF terminated + if (this.char == null) { + this.token = TOKEN(TokenKind.EOF); + break; + } + // skip spasing + if (spacingChars.includes(this.char)) { + this.nextChar(); + continue; + } + let match = true; + switch (this.char) { + case '@': { + this.token = TOKEN(TokenKind.At); + this.nextChar(); + break; + } + case '(': { + this.token = TOKEN(TokenKind.OpenParen); + this.nextChar(); + break; + } + case ')': { + this.token = TOKEN(TokenKind.CloseParen); + this.nextChar(); + break; + } + case '{': { + this.token = TOKEN(TokenKind.OpenBrace); + this.nextChar(); + break; + } + case '}': { + this.token = TOKEN(TokenKind.CloseBrace); + this.nextChar(); + break; + } + // TODO + default: { + match = false; + } + } + if (!match) { + if (this.readDigits()) { + break; + } + if (this.readWord()) { + break; + } + throw new AiScriptSyntaxError(`invalid character: "${this.char}"`); + } + break; + } + } +} diff --git a/src/parser/token.ts b/src/parser/token.ts new file mode 100644 index 00000000..3ad81733 --- /dev/null +++ b/src/parser/token.ts @@ -0,0 +1,40 @@ +export enum TokenKind { + EOF, + Identifier, + Literal, + + OpenParen, + CloseParen, + + OpenBrace, + CloseBrace, + + At, +} + +export class Token { + constructor( + public kind: TokenKind, + public literal?: Literal, + ) { } +} + +export function TOKEN(kind: TokenKind) { + return new Token(kind); +} + +export type Literal = NumberLiteral | StringLiteral; + +export class NumberLiteral { + kind = 'NumberLiteral' as const; + constructor( + public value: number + ) { } +} + +export class StringLiteral { + kind = 'StringLiteral' as const; + constructor( + public value: string + ) { } +} From 4b291fa0b18e85c9db18352de5c12a8ac5d720ca Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 17 Sep 2023 23:47:26 +0900 Subject: [PATCH 002/126] npm scripts --- package.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/package.json b/package.json index 05886791..95101dd5 100644 --- a/package.json +++ b/package.json @@ -26,8 +26,7 @@ "ts": "npm run ts-esm && npm run ts-dts", "ts-esm": "tsc --outDir built/esm", "ts-dts": "tsc --outDir built/dts --declaration true --emitDeclarationOnly true --declarationMap true", - "build": "npm run peg && npm run ts", - "build-debug": "npm run peg-debug && tsc", + "build": "npm run ts", "api": "npx api-extractor run --local --verbose", "api-prod": "npx api-extractor run --verbose", "lint": "eslint . --ext .js,.jsx,.ts,.tsx", From c33c4fb4d888fef9d4d0426bf887135f7fce7f62 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 17 Sep 2023 23:50:43 +0900 Subject: [PATCH 003/126] lint --- src/parser/token-stream.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index 322d834e..6b6a89c5 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -1,5 +1,6 @@ import { AiScriptSyntaxError } from '../error.js'; -import { TOKEN, Token, TokenKind } from './token.js'; +import { TOKEN, TokenKind } from './token.js'; +import type { Token } from './token.js'; const spacingChars = [' ', '\t', '\r', '\n']; From c2d395d11b6ecbc3a1d4850930b19ff8b78e6ef2 Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 10:27:19 +0900 Subject: [PATCH 004/126] token stream wip --- src/parser/token-stream.ts | 45 ++++++++++++++++++++++++++++++++++---- src/parser/token.ts | 34 ++++++++++------------------ 2 files changed, 53 insertions(+), 26 deletions(-) diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index 6b6a89c5..37d97e6d 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -3,6 +3,8 @@ import { TOKEN, TokenKind } from './token.js'; import type { Token } from './token.js'; const spacingChars = [' ', '\t', '\r', '\n']; +const digit = /^[0-9]$/; +const wordChar = /^[A-Za-z0-9_]$/; export class TokenStream { private source: string; @@ -43,13 +45,48 @@ export class TokenStream { } private readWord(): boolean { - // TODO - return false; + // read a word + let word = ''; + while (this.char != null && wordChar.test(this.char)) { + word += this.char; + this.nextChar(); + } + if (word.length === 0) { + return false; + } + // check word kind + switch (word) { + case 'null': { + this.token = TOKEN(TokenKind.NullKeyword); + break; + } + case 'true': { + this.token = TOKEN(TokenKind.TrueKeyword); + break; + } + case 'false': { + this.token = TOKEN(TokenKind.FalseKeyword); + break; + } + default: { + this.token = TOKEN(TokenKind.Identifier, word); + break; + } + } + return true; } private readDigits(): boolean { - // TODO - return false; + let digits = ''; + while (this.char != null && digit.test(this.char)) { + digits += this.char; + this.nextChar(); + } + if (digits.length === 0) { + return false; + } + this.token = TOKEN(TokenKind.NumberLiteral, digits); + return true; } /** トークンを読み取ります。 */ diff --git a/src/parser/token.ts b/src/parser/token.ts index 3ad81733..5e3dfa59 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -1,40 +1,30 @@ export enum TokenKind { EOF, Identifier, - Literal, + + // literal + NumberLiteral, + StringLiteral, + + // keyword + NullKeyword, + TrueKeyword, + FalseKeyword, OpenParen, CloseParen, - OpenBrace, CloseBrace, - At, } export class Token { constructor( public kind: TokenKind, - public literal?: Literal, + public value?: string, ) { } } -export function TOKEN(kind: TokenKind) { - return new Token(kind); -} - -export type Literal = NumberLiteral | StringLiteral; - -export class NumberLiteral { - kind = 'NumberLiteral' as const; - constructor( - public value: number - ) { } -} - -export class StringLiteral { - kind = 'StringLiteral' as const; - constructor( - public value: string - ) { } +export function TOKEN(kind: TokenKind, value?: Token['value']) { + return new Token(kind, value); } From 23210e152947f2d68f4b0a48cdaced1481ab765d Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 10:42:00 +0900 Subject: [PATCH 005/126] token stream wip --- src/parser/token-stream.ts | 62 +++++++++++++++++++++++++++++++++++--- src/parser/token.ts | 14 +++++++++ 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index 37d97e6d..4cf837b1 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -68,6 +68,58 @@ export class TokenStream { this.token = TOKEN(TokenKind.FalseKeyword); break; } + case 'each': { + this.token = TOKEN(TokenKind.EachKeyword); + break; + } + case 'for': { + this.token = TOKEN(TokenKind.ForKeyword); + break; + } + case 'loop': { + this.token = TOKEN(TokenKind.LoopKeyword); + break; + } + case 'break': { + this.token = TOKEN(TokenKind.BreakKeyword); + break; + } + case 'continue': { + this.token = TOKEN(TokenKind.ContinueKeyword); + break; + } + case 'match': { + this.token = TOKEN(TokenKind.MatchKeyword); + break; + } + case 'elif': { + this.token = TOKEN(TokenKind.ElifKeyword); + break; + } + case 'else': { + this.token = TOKEN(TokenKind.ElseKeyword); + break; + } + case 'return': { + this.token = TOKEN(TokenKind.ReturnKeyword); + break; + } + case 'eval': { + this.token = TOKEN(TokenKind.EvalKeyword); + break; + } + case 'var': { + this.token = TOKEN(TokenKind.VarKeyword); + break; + } + case 'let': { + this.token = TOKEN(TokenKind.LetKeyword); + break; + } + case 'exists': { + this.token = TOKEN(TokenKind.ExistsKeyword); + break; + } default: { this.token = TOKEN(TokenKind.Identifier, word); break; @@ -104,11 +156,6 @@ export class TokenStream { } let match = true; switch (this.char) { - case '@': { - this.token = TOKEN(TokenKind.At); - this.nextChar(); - break; - } case '(': { this.token = TOKEN(TokenKind.OpenParen); this.nextChar(); @@ -129,6 +176,11 @@ export class TokenStream { this.nextChar(); break; } + case '@': { + this.token = TOKEN(TokenKind.At); + this.nextChar(); + break; + } // TODO default: { match = false; diff --git a/src/parser/token.ts b/src/parser/token.ts index 5e3dfa59..1ec2e650 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -10,6 +10,20 @@ export enum TokenKind { NullKeyword, TrueKeyword, FalseKeyword, + EachKeyword, + ForKeyword, + LoopKeyword, + BreakKeyword, + ContinueKeyword, + MatchKeyword, + IfKeyword, + ElifKeyword, + ElseKeyword, + ReturnKeyword, + EvalKeyword, + VarKeyword, + LetKeyword, + ExistsKeyword, OpenParen, CloseParen, From d75ac4fbf96a66b4e7065e01a9abfc6636d25682 Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 11:19:01 +0900 Subject: [PATCH 006/126] add tokens --- src/parser/token.ts | 65 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/src/parser/token.ts b/src/parser/token.ts index 1ec2e650..ad08591b 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -25,11 +25,74 @@ export enum TokenKind { LetKeyword, ExistsKeyword, + /** "!" */ + Not, + /** "!=" */ + NotEq, + /** "#" */ + Sharp, + /** "###" */ + Sharp3, + /** "%" */ + Percent, + /** "&&" */ + And2, + /** "(" */ OpenParen, + /** ")" */ CloseParen, + /** "*" */ + Asterisk, + /** "+" */ + Plus, + /** "+=" */ + PlusEq, + /** "," */ + Comma, + /** "-" */ + Minus, + /** "-=" */ + MinusEq, + /** "." */ + Dot, + /** "/" */ + Slash, + /** ":" */ + Colon, + /** "::" */ + Colon2, + /** ";" */ + SemiColon, + /** "<" */ + Lt, + /** "<=" */ + LtEq, + /** "<:" */ + Out, + /** "=" */ + Eq, + /** "==" */ + Eq2, + /** "=>" */ + Arrow, + /** ">" */ + Gt, + /** ">=" */ + GtEq, + /** "@" */ + At, + /** "[" */ + OpenBracket, + /** "]" */ + CloseBracket, + /** "^" */ + Hat, + /** "{" */ OpenBrace, + /** "||" */ + Or2, + /** "}" */ CloseBrace, - At, } export class Token { From bd27468b646a1c6eded008a6c33ebb589e49e122 Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 11:37:56 +0900 Subject: [PATCH 007/126] token stream wip --- src/parser/token-stream.ts | 117 +++++++++++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 6 deletions(-) diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index 4cf837b1..9049d22f 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -25,8 +25,9 @@ export class TokenStream { private loadChar(): void { if (this.isEof) { this.char = undefined; + } else { + this.char = this.source[this.index]; } - this.char = this.source[this.index]; } private nextChar(): void { @@ -156,6 +157,31 @@ export class TokenStream { } let match = true; switch (this.char) { + case '!': { + this.token = TOKEN(TokenKind.Not); + this.nextChar(); + break; + } + case '#': { + this.token = TOKEN(TokenKind.Sharp); + this.nextChar(); + break; + } + case '%': { + this.token = TOKEN(TokenKind.Percent); + this.nextChar(); + break; + } + case '&': { + this.nextChar(); + if (this.char == '&') { + this.token = TOKEN(TokenKind.And2); + this.nextChar(); + } else { + match = false; + } + break; + } case '(': { this.token = TOKEN(TokenKind.OpenParen); this.nextChar(); @@ -166,13 +192,58 @@ export class TokenStream { this.nextChar(); break; } - case '{': { - this.token = TOKEN(TokenKind.OpenBrace); + case '*': { + this.token = TOKEN(TokenKind.Asterisk); this.nextChar(); break; } - case '}': { - this.token = TOKEN(TokenKind.CloseBrace); + case '+': { + this.token = TOKEN(TokenKind.Plus); + this.nextChar(); + break; + } + case ',': { + this.token = TOKEN(TokenKind.Comma); + this.nextChar(); + break; + } + case '-': { + this.token = TOKEN(TokenKind.Minus); + this.nextChar(); + break; + } + case '.': { + this.token = TOKEN(TokenKind.Dot); + this.nextChar(); + break; + } + case '/': { + this.token = TOKEN(TokenKind.Slash); + this.nextChar(); + break; + } + case ':': { + this.token = TOKEN(TokenKind.Colon); + this.nextChar(); + break; + } + case ';': { + this.token = TOKEN(TokenKind.SemiColon); + this.nextChar(); + break; + } + case '<': { + this.token = TOKEN(TokenKind.Lt); + this.nextChar(); + break; + } + case '=': { + this.token = TOKEN(TokenKind.Eq); + this.nextChar(); + break; + } + case '>': { + this.token = TOKEN(TokenKind.Gt); this.nextChar(); break; } @@ -181,7 +252,41 @@ export class TokenStream { this.nextChar(); break; } - // TODO + case '[': { + this.token = TOKEN(TokenKind.OpenBracket); + this.nextChar(); + break; + } + case ']': { + this.token = TOKEN(TokenKind.CloseBracket); + this.nextChar(); + break; + } + case '^': { + this.token = TOKEN(TokenKind.Hat); + this.nextChar(); + break; + } + case '{': { + this.token = TOKEN(TokenKind.OpenBrace); + this.nextChar(); + break; + } + case '|': { + this.nextChar(); + if (this.char == '|') { + this.token = TOKEN(TokenKind.Or2); + this.nextChar(); + } else { + match = false; + } + break; + } + case '}': { + this.token = TOKEN(TokenKind.CloseBrace); + this.nextChar(); + break; + } default: { match = false; } From 27864a1e1daddc75236aea0c204ea3388bbda49f Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 11:41:27 +0900 Subject: [PATCH 008/126] lint --- src/parser/token-stream.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index 9049d22f..f459d46f 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -174,7 +174,7 @@ export class TokenStream { } case '&': { this.nextChar(); - if (this.char == '&') { + if (this.char === '&') { this.token = TOKEN(TokenKind.And2); this.nextChar(); } else { @@ -274,7 +274,7 @@ export class TokenStream { } case '|': { this.nextChar(); - if (this.char == '|') { + if (this.char === '|') { this.token = TOKEN(TokenKind.Or2); this.nextChar(); } else { From 880d5f245fcdcfa1646364e6cfc05362895a2b31 Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 12:15:31 +0900 Subject: [PATCH 009/126] token stream wip --- src/parser/token-stream.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index f459d46f..d2c90082 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -93,6 +93,10 @@ export class TokenStream { this.token = TOKEN(TokenKind.MatchKeyword); break; } + case 'if': { + this.token = TOKEN(TokenKind.IfKeyword); + break; + } case 'elif': { this.token = TOKEN(TokenKind.ElifKeyword); break; From c70ed01dc6baf0012b89b837ce7f0575ec965f5a Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 12:15:39 +0900 Subject: [PATCH 010/126] add test --- test/parser.ts | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 test/parser.ts diff --git a/test/parser.ts b/test/parser.ts new file mode 100644 index 00000000..65fee050 --- /dev/null +++ b/test/parser.ts @@ -0,0 +1,47 @@ +import * as assert from 'assert'; +import { TokenStream } from '../src/parser/token-stream'; +import { TOKEN, TokenKind } from '../src/parser/token'; + +describe('TokenStream', () => { + function next(stream: TokenStream, kind: TokenKind, value?: string) { + stream.read(); + assert.deepStrictEqual(stream.current, TOKEN(kind, value)); + } + + test.concurrent('eof', async () => { + const source = ''; + const stream = new TokenStream(source); + next(stream, TokenKind.EOF); + next(stream, TokenKind.EOF); + }); + test.concurrent('keyword', async () => { + const source = 'if'; + const stream = new TokenStream(source); + next(stream, TokenKind.IfKeyword); + next(stream, TokenKind.EOF); + }); + test.concurrent('identifier', async () => { + const source = 'xyz'; + const stream = new TokenStream(source); + next(stream, TokenKind.Identifier, 'xyz'); + next(stream, TokenKind.EOF); + }); + test.concurrent('words', async () => { + const source = 'abc xyz'; + const stream = new TokenStream(source); + next(stream, TokenKind.Identifier, 'abc'); + next(stream, TokenKind.Identifier, 'xyz'); + next(stream, TokenKind.EOF); + }); + test.concurrent('stream', async () => { + const source = '@abc() { }'; + const stream = new TokenStream(source); + next(stream, TokenKind.At); + next(stream, TokenKind.Identifier, 'abc'); + next(stream, TokenKind.OpenParen); + next(stream, TokenKind.CloseParen); + next(stream, TokenKind.OpenBrace); + next(stream, TokenKind.CloseBrace); + next(stream, TokenKind.EOF); + }); +}); From fe61723cf78b6551cf30a20648c7fefad5f761a3 Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 12:24:44 +0900 Subject: [PATCH 011/126] test --- test/parser.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/parser.ts b/test/parser.ts index 65fee050..4f4b0125 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -8,6 +8,16 @@ describe('TokenStream', () => { assert.deepStrictEqual(stream.current, TOKEN(kind, value)); } + test.concurrent('can get a token after reading', async () => { + const source = ''; + const stream = new TokenStream(source); + try { + stream.current; + assert.fail(); + } catch (e) { } + stream.read(); + stream.current; + }); test.concurrent('eof', async () => { const source = ''; const stream = new TokenStream(source); From 4c3cad7d22e159b23960d48c2f064862a022d15a Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 12:40:00 +0900 Subject: [PATCH 012/126] test --- test/parser.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/parser.ts b/test/parser.ts index 4f4b0125..d824998f 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -36,6 +36,14 @@ describe('TokenStream', () => { next(stream, TokenKind.Identifier, 'xyz'); next(stream, TokenKind.EOF); }); + test.concurrent('invalid token', async () => { + const source = '$'; + const stream = new TokenStream(source); + try { + stream.read(); + assert.fail(); + } catch (e) { } + }); test.concurrent('words', async () => { const source = 'abc xyz'; const stream = new TokenStream(source); From b0e39862e3fe062dcf0faa1dcac638a34b3fa004 Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 13:30:28 +0900 Subject: [PATCH 013/126] token stream wip --- src/parser/token-stream.ts | 106 ++++++++++++++++++++++++++++--------- src/parser/token.ts | 2 + 2 files changed, 82 insertions(+), 26 deletions(-) diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index d2c90082..24bd6988 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -162,133 +162,187 @@ export class TokenStream { let match = true; switch (this.char) { case '!': { - this.token = TOKEN(TokenKind.Not); this.nextChar(); + if ((this.char as string) === '=') { + this.nextChar(); + this.token = TOKEN(TokenKind.NotEq); + } else { + this.token = TOKEN(TokenKind.Not); + } break; } case '#': { - this.token = TOKEN(TokenKind.Sharp); this.nextChar(); + if ((this.char as string) === '#') { + this.nextChar(); + if ((this.char as string) === '#') { + this.nextChar(); + this.token = TOKEN(TokenKind.Sharp3); + } else { + match = false; + } + } else if ((this.char as string) === '[') { + this.nextChar(); + this.token = TOKEN(TokenKind.OpenSharpBracket); + } else { + this.token = TOKEN(TokenKind.Sharp); + } break; } case '%': { - this.token = TOKEN(TokenKind.Percent); this.nextChar(); + this.token = TOKEN(TokenKind.Percent); break; } case '&': { this.nextChar(); - if (this.char === '&') { - this.token = TOKEN(TokenKind.And2); + if ((this.char as string) === '&') { this.nextChar(); + this.token = TOKEN(TokenKind.And2); } else { match = false; } break; } case '(': { - this.token = TOKEN(TokenKind.OpenParen); this.nextChar(); + this.token = TOKEN(TokenKind.OpenParen); break; } case ')': { - this.token = TOKEN(TokenKind.CloseParen); this.nextChar(); + this.token = TOKEN(TokenKind.CloseParen); break; } case '*': { - this.token = TOKEN(TokenKind.Asterisk); this.nextChar(); + this.token = TOKEN(TokenKind.Asterisk); break; } case '+': { - this.token = TOKEN(TokenKind.Plus); this.nextChar(); + if ((this.char as string) === '=') { + this.nextChar(); + this.token = TOKEN(TokenKind.PlusEq); + } else { + this.token = TOKEN(TokenKind.Plus); + } break; } case ',': { - this.token = TOKEN(TokenKind.Comma); this.nextChar(); + this.token = TOKEN(TokenKind.Comma); break; } case '-': { - this.token = TOKEN(TokenKind.Minus); this.nextChar(); + if ((this.char as string) === '=') { + this.nextChar(); + this.token = TOKEN(TokenKind.MinusEq); + } else { + this.token = TOKEN(TokenKind.Minus); + } break; } case '.': { - this.token = TOKEN(TokenKind.Dot); this.nextChar(); + this.token = TOKEN(TokenKind.Dot); break; } case '/': { - this.token = TOKEN(TokenKind.Slash); this.nextChar(); + this.token = TOKEN(TokenKind.Slash); break; } case ':': { - this.token = TOKEN(TokenKind.Colon); this.nextChar(); + if ((this.char as string) === ':') { + this.nextChar(); + this.token = TOKEN(TokenKind.Colon2); + } else { + this.token = TOKEN(TokenKind.Colon); + } break; } case ';': { - this.token = TOKEN(TokenKind.SemiColon); this.nextChar(); + this.token = TOKEN(TokenKind.SemiColon); break; } case '<': { - this.token = TOKEN(TokenKind.Lt); this.nextChar(); + if ((this.char as string) === '=') { + this.nextChar(); + this.token = TOKEN(TokenKind.LtEq); + } else if ((this.char as string) === ':') { + this.nextChar(); + this.token = TOKEN(TokenKind.Out); + } else { + this.token = TOKEN(TokenKind.Lt); + } break; } case '=': { - this.token = TOKEN(TokenKind.Eq); this.nextChar(); + if ((this.char as string) === '=') { + this.nextChar(); + this.token = TOKEN(TokenKind.Eq2); + } else if ((this.char as string) === '>') { + this.nextChar(); + this.token = TOKEN(TokenKind.Arrow); + } else { + this.token = TOKEN(TokenKind.Eq); + } break; } case '>': { - this.token = TOKEN(TokenKind.Gt); this.nextChar(); + if ((this.char as string) === '=') { + this.nextChar(); + this.token = TOKEN(TokenKind.GtEq); + } else { + this.token = TOKEN(TokenKind.Gt); + } break; } case '@': { - this.token = TOKEN(TokenKind.At); this.nextChar(); + this.token = TOKEN(TokenKind.At); break; } case '[': { - this.token = TOKEN(TokenKind.OpenBracket); this.nextChar(); + this.token = TOKEN(TokenKind.OpenBracket); break; } case ']': { - this.token = TOKEN(TokenKind.CloseBracket); this.nextChar(); + this.token = TOKEN(TokenKind.CloseBracket); break; } case '^': { - this.token = TOKEN(TokenKind.Hat); this.nextChar(); + this.token = TOKEN(TokenKind.Hat); break; } case '{': { - this.token = TOKEN(TokenKind.OpenBrace); this.nextChar(); + this.token = TOKEN(TokenKind.OpenBrace); break; } case '|': { this.nextChar(); - if (this.char === '|') { - this.token = TOKEN(TokenKind.Or2); + if ((this.char as string) === '|') { this.nextChar(); + this.token = TOKEN(TokenKind.Or2); } else { match = false; } break; } case '}': { - this.token = TOKEN(TokenKind.CloseBrace); this.nextChar(); + this.token = TOKEN(TokenKind.CloseBrace); break; } default: { diff --git a/src/parser/token.ts b/src/parser/token.ts index ad08591b..650082db 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -31,6 +31,8 @@ export enum TokenKind { NotEq, /** "#" */ Sharp, + /** "#[" */ + OpenSharpBracket, /** "###" */ Sharp3, /** "%" */ From 14c7694a72e21e90e31b866a5d6582cd174b2046 Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 14:02:32 +0900 Subject: [PATCH 014/126] parse comment --- src/parser/token-stream.ts | 42 +++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index 24bd6988..531272c2 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -146,6 +146,36 @@ export class TokenStream { return true; } + private skipCommentLine() { + while (true) { + if (this.char == null) { + break; + } + if (this.char == '\n') { + this.nextChar(); + break; + } + this.nextChar(); + } + } + + private skipCommentRange() { + while (true) { + if (this.char == null) { + break; + } + if (this.char == '*') { + this.nextChar(); + if ((this.char as string) == '/') { + this.nextChar(); + break; + } + continue; + } + this.nextChar(); + } + } + /** トークンを読み取ります。 */ public read(): void { while (true) { @@ -251,7 +281,17 @@ export class TokenStream { } case '/': { this.nextChar(); - this.token = TOKEN(TokenKind.Slash); + if ((this.char as string) === '*') { + this.nextChar(); + this.skipCommentRange(); + continue; + } else if ((this.char as string) === '/') { + this.nextChar(); + this.skipCommentLine(); + continue; + } else { + this.token = TOKEN(TokenKind.Slash); + } break; } case ':': { From 7848a8cc82788f4c55d57d1c2efc6b9ce4909e0f Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 14:11:07 +0900 Subject: [PATCH 015/126] refactor --- src/parser/token-stream.ts | 262 ++++++++++++++++++------------------- 1 file changed, 131 insertions(+), 131 deletions(-) diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index 531272c2..f898a281 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -45,137 +45,6 @@ export class TokenStream { return this.token; } - private readWord(): boolean { - // read a word - let word = ''; - while (this.char != null && wordChar.test(this.char)) { - word += this.char; - this.nextChar(); - } - if (word.length === 0) { - return false; - } - // check word kind - switch (word) { - case 'null': { - this.token = TOKEN(TokenKind.NullKeyword); - break; - } - case 'true': { - this.token = TOKEN(TokenKind.TrueKeyword); - break; - } - case 'false': { - this.token = TOKEN(TokenKind.FalseKeyword); - break; - } - case 'each': { - this.token = TOKEN(TokenKind.EachKeyword); - break; - } - case 'for': { - this.token = TOKEN(TokenKind.ForKeyword); - break; - } - case 'loop': { - this.token = TOKEN(TokenKind.LoopKeyword); - break; - } - case 'break': { - this.token = TOKEN(TokenKind.BreakKeyword); - break; - } - case 'continue': { - this.token = TOKEN(TokenKind.ContinueKeyword); - break; - } - case 'match': { - this.token = TOKEN(TokenKind.MatchKeyword); - break; - } - case 'if': { - this.token = TOKEN(TokenKind.IfKeyword); - break; - } - case 'elif': { - this.token = TOKEN(TokenKind.ElifKeyword); - break; - } - case 'else': { - this.token = TOKEN(TokenKind.ElseKeyword); - break; - } - case 'return': { - this.token = TOKEN(TokenKind.ReturnKeyword); - break; - } - case 'eval': { - this.token = TOKEN(TokenKind.EvalKeyword); - break; - } - case 'var': { - this.token = TOKEN(TokenKind.VarKeyword); - break; - } - case 'let': { - this.token = TOKEN(TokenKind.LetKeyword); - break; - } - case 'exists': { - this.token = TOKEN(TokenKind.ExistsKeyword); - break; - } - default: { - this.token = TOKEN(TokenKind.Identifier, word); - break; - } - } - return true; - } - - private readDigits(): boolean { - let digits = ''; - while (this.char != null && digit.test(this.char)) { - digits += this.char; - this.nextChar(); - } - if (digits.length === 0) { - return false; - } - this.token = TOKEN(TokenKind.NumberLiteral, digits); - return true; - } - - private skipCommentLine() { - while (true) { - if (this.char == null) { - break; - } - if (this.char == '\n') { - this.nextChar(); - break; - } - this.nextChar(); - } - } - - private skipCommentRange() { - while (true) { - if (this.char == null) { - break; - } - if (this.char == '*') { - this.nextChar(); - if ((this.char as string) == '/') { - this.nextChar(); - break; - } - continue; - } - this.nextChar(); - } - } - /** トークンを読み取ります。 */ public read(): void { while (true) { @@ -401,4 +270,135 @@ export class TokenStream { break; } } + + private readWord(): boolean { + // read a word + let word = ''; + while (this.char != null && wordChar.test(this.char)) { + word += this.char; + this.nextChar(); + } + if (word.length === 0) { + return false; + } + // check word kind + switch (word) { + case 'null': { + this.token = TOKEN(TokenKind.NullKeyword); + break; + } + case 'true': { + this.token = TOKEN(TokenKind.TrueKeyword); + break; + } + case 'false': { + this.token = TOKEN(TokenKind.FalseKeyword); + break; + } + case 'each': { + this.token = TOKEN(TokenKind.EachKeyword); + break; + } + case 'for': { + this.token = TOKEN(TokenKind.ForKeyword); + break; + } + case 'loop': { + this.token = TOKEN(TokenKind.LoopKeyword); + break; + } + case 'break': { + this.token = TOKEN(TokenKind.BreakKeyword); + break; + } + case 'continue': { + this.token = TOKEN(TokenKind.ContinueKeyword); + break; + } + case 'match': { + this.token = TOKEN(TokenKind.MatchKeyword); + break; + } + case 'if': { + this.token = TOKEN(TokenKind.IfKeyword); + break; + } + case 'elif': { + this.token = TOKEN(TokenKind.ElifKeyword); + break; + } + case 'else': { + this.token = TOKEN(TokenKind.ElseKeyword); + break; + } + case 'return': { + this.token = TOKEN(TokenKind.ReturnKeyword); + break; + } + case 'eval': { + this.token = TOKEN(TokenKind.EvalKeyword); + break; + } + case 'var': { + this.token = TOKEN(TokenKind.VarKeyword); + break; + } + case 'let': { + this.token = TOKEN(TokenKind.LetKeyword); + break; + } + case 'exists': { + this.token = TOKEN(TokenKind.ExistsKeyword); + break; + } + default: { + this.token = TOKEN(TokenKind.Identifier, word); + break; + } + } + return true; + } + + private readDigits(): boolean { + let digits = ''; + while (this.char != null && digit.test(this.char)) { + digits += this.char; + this.nextChar(); + } + if (digits.length === 0) { + return false; + } + this.token = TOKEN(TokenKind.NumberLiteral, digits); + return true; + } + + private skipCommentLine() { + while (true) { + if (this.char == null) { + break; + } + if (this.char == '\n') { + this.nextChar(); + break; + } + this.nextChar(); + } + } + + private skipCommentRange() { + while (true) { + if (this.char == null) { + break; + } + if (this.char == '*') { + this.nextChar(); + if ((this.char as string) == '/') { + this.nextChar(); + break; + } + continue; + } + this.nextChar(); + } + } } From 3996ffcc67c3d2d21187c40344459c5640995d5a Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 15:04:51 +0900 Subject: [PATCH 016/126] parser wip --- src/parser/index.ts | 74 +++++++++++++++++++++++++++++++++++--- src/parser/token-stream.ts | 3 ++ test/parser.ts | 19 ++++++---- 3 files changed, 85 insertions(+), 11 deletions(-) diff --git a/src/parser/index.ts b/src/parser/index.ts index 13f09839..47b9c406 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -1,5 +1,7 @@ import { AiScriptSyntaxError } from '../error.js'; import { TokenStream } from './token-stream.js'; +import type { Token } from './token.js'; +import { TokenKind } from './token.js'; import { validateKeyword } from './plugins/validate-keyword.js'; import { validateType } from './plugins/validate-type.js'; @@ -72,12 +74,76 @@ export class Parser { } } +class ParseContext { + private stream: TokenStream; + + public constructor(stream: TokenStream) { + this.stream = stream; + } + + public init(): void { + this.stream.read(); + } + + public get token(): Token { + return this.stream.current; + } + + public kindOf(kind: TokenKind): boolean { + return (this.token.kind === kind); + } + + public expect(kind: TokenKind): void { + if (!this.kindOf(kind)) { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.token.kind]}`); + } + } + + public next(): void { + this.stream.read(); + } + + public consumeAs(kind: TokenKind): void { + this.expect(kind); + this.next(); + } +} + function parse(source: string): Cst.Node[] { const stream = new TokenStream(source); - //stream.read(); - //stream.current; + const ctx = new ParseContext(stream); + stream.init(); + ctx.init(); + + const nodes: Cst.Node[] = []; + while (!ctx.kindOf(TokenKind.EOF)) { + switch (ctx.token.kind) { + case TokenKind.Colon2: { + nodes.push(parseNamespace(ctx)); + break; + } + case TokenKind.Sharp3: { + nodes.push(parseMeta(ctx)); + break; + } + default: { + nodes.push(parseStatement(ctx)); + break; + } + } + } + + return nodes; +} - // TODO +function parseNamespace(ctx: ParseContext): Cst.Node { + throw new Error('todo'); +} + +function parseMeta(ctx: ParseContext): Cst.Node { + throw new Error('todo'); +} - return []; +function parseStatement(ctx: ParseContext): Cst.Node { + throw new Error('todo'); } diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index f898a281..15708f81 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -15,6 +15,9 @@ export class TokenStream { public constructor(source: string) { this.source = source; this.index = 0; + } + + public init() { this.loadChar(); } diff --git a/test/parser.ts b/test/parser.ts index d824998f..9c92b0e4 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -3,6 +3,11 @@ import { TokenStream } from '../src/parser/token-stream'; import { TOKEN, TokenKind } from '../src/parser/token'; describe('TokenStream', () => { + function init(source: string) { + const stream = new TokenStream(source); + stream.init(); + return stream; + } function next(stream: TokenStream, kind: TokenKind, value?: string) { stream.read(); assert.deepStrictEqual(stream.current, TOKEN(kind, value)); @@ -10,7 +15,7 @@ describe('TokenStream', () => { test.concurrent('can get a token after reading', async () => { const source = ''; - const stream = new TokenStream(source); + const stream = init(source); try { stream.current; assert.fail(); @@ -20,25 +25,25 @@ describe('TokenStream', () => { }); test.concurrent('eof', async () => { const source = ''; - const stream = new TokenStream(source); + const stream = init(source); next(stream, TokenKind.EOF); next(stream, TokenKind.EOF); }); test.concurrent('keyword', async () => { const source = 'if'; - const stream = new TokenStream(source); + const stream = init(source); next(stream, TokenKind.IfKeyword); next(stream, TokenKind.EOF); }); test.concurrent('identifier', async () => { const source = 'xyz'; - const stream = new TokenStream(source); + const stream = init(source); next(stream, TokenKind.Identifier, 'xyz'); next(stream, TokenKind.EOF); }); test.concurrent('invalid token', async () => { const source = '$'; - const stream = new TokenStream(source); + const stream = init(source); try { stream.read(); assert.fail(); @@ -46,14 +51,14 @@ describe('TokenStream', () => { }); test.concurrent('words', async () => { const source = 'abc xyz'; - const stream = new TokenStream(source); + const stream = init(source); next(stream, TokenKind.Identifier, 'abc'); next(stream, TokenKind.Identifier, 'xyz'); next(stream, TokenKind.EOF); }); test.concurrent('stream', async () => { const source = '@abc() { }'; - const stream = new TokenStream(source); + const stream = init(source); next(stream, TokenKind.At); next(stream, TokenKind.Identifier, 'abc'); next(stream, TokenKind.OpenParen); From 97bf5cee50aedef2210ba8f4fbea5fd68f143c3f Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 15:17:00 +0900 Subject: [PATCH 017/126] lint --- src/parser/token-stream.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index 15708f81..8fa5eecf 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -380,7 +380,7 @@ export class TokenStream { if (this.char == null) { break; } - if (this.char == '\n') { + if (this.char === '\n') { this.nextChar(); break; } @@ -393,9 +393,9 @@ export class TokenStream { if (this.char == null) { break; } - if (this.char == '*') { + if (this.char === '*') { this.nextChar(); - if ((this.char as string) == '/') { + if ((this.char as string) === '/') { this.nextChar(); break; } From 6ef696752874b79bdcdffd986aecadfee5d66a7a Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 15:40:20 +0900 Subject: [PATCH 018/126] enhance token stream, move parser rules --- src/parser/index.ts | 82 ++-------------------- src/parser/syntaxes.ts | 38 +++++++++++ src/parser/token-stream.ts | 136 +++++++++++++++++++++---------------- test/parser.ts | 18 ++--- 4 files changed, 127 insertions(+), 147 deletions(-) create mode 100644 src/parser/syntaxes.ts diff --git a/src/parser/index.ts b/src/parser/index.ts index 47b9c406..2bdb8eea 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -1,7 +1,5 @@ -import { AiScriptSyntaxError } from '../error.js'; import { TokenStream } from './token-stream.js'; -import type { Token } from './token.js'; -import { TokenKind } from './token.js'; +import { parseTopLevel } from './syntaxes.js'; import { validateKeyword } from './plugins/validate-keyword.js'; import { validateType } from './plugins/validate-type.js'; @@ -58,7 +56,9 @@ export class Parser { public parse(input: string): Ast.Node[] { let nodes: Cst.Node[]; - nodes = parse(input); + const stream = new TokenStream(input); + stream.init(); + nodes = parseTopLevel(stream); // validate the node tree for (const plugin of this.plugins.validate) { @@ -73,77 +73,3 @@ export class Parser { return nodes as Ast.Node[]; } } - -class ParseContext { - private stream: TokenStream; - - public constructor(stream: TokenStream) { - this.stream = stream; - } - - public init(): void { - this.stream.read(); - } - - public get token(): Token { - return this.stream.current; - } - - public kindOf(kind: TokenKind): boolean { - return (this.token.kind === kind); - } - - public expect(kind: TokenKind): void { - if (!this.kindOf(kind)) { - throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.token.kind]}`); - } - } - - public next(): void { - this.stream.read(); - } - - public consumeAs(kind: TokenKind): void { - this.expect(kind); - this.next(); - } -} - -function parse(source: string): Cst.Node[] { - const stream = new TokenStream(source); - const ctx = new ParseContext(stream); - stream.init(); - ctx.init(); - - const nodes: Cst.Node[] = []; - while (!ctx.kindOf(TokenKind.EOF)) { - switch (ctx.token.kind) { - case TokenKind.Colon2: { - nodes.push(parseNamespace(ctx)); - break; - } - case TokenKind.Sharp3: { - nodes.push(parseMeta(ctx)); - break; - } - default: { - nodes.push(parseStatement(ctx)); - break; - } - } - } - - return nodes; -} - -function parseNamespace(ctx: ParseContext): Cst.Node { - throw new Error('todo'); -} - -function parseMeta(ctx: ParseContext): Cst.Node { - throw new Error('todo'); -} - -function parseStatement(ctx: ParseContext): Cst.Node { - throw new Error('todo'); -} diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts new file mode 100644 index 00000000..164be20e --- /dev/null +++ b/src/parser/syntaxes.ts @@ -0,0 +1,38 @@ +import { AiScriptSyntaxError } from '../error.js'; +import { Cst } from '../index.js'; +import { TokenStream } from './token-stream.js'; +import { TokenKind } from './token.js'; + +export function parseTopLevel(stream: TokenStream): Cst.Node[] { + const nodes: Cst.Node[] = []; + while (!stream.kindOf(TokenKind.EOF)) { + switch (stream.token.kind) { + case TokenKind.Colon2: { + nodes.push(parseNamespace(stream)); + break; + } + case TokenKind.Sharp3: { + nodes.push(parseMeta(stream)); + break; + } + default: { + nodes.push(parseStatement(stream)); + break; + } + } + } + + return nodes; +} + +export function parseNamespace(stream: TokenStream): Cst.Node { + throw new Error('todo'); +} + +export function parseMeta(stream: TokenStream): Cst.Node { + throw new Error('todo'); +} + +export function parseStatement(stream: TokenStream): Cst.Node { + throw new Error('todo'); +} diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index 8fa5eecf..804c35b5 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -8,7 +8,7 @@ const wordChar = /^[A-Za-z0-9_]$/; export class TokenStream { private source: string; - private token?: Token; + private _token?: Token; private index: number; private char?: string; @@ -19,6 +19,22 @@ export class TokenStream { public init() { this.loadChar(); + this.next(); + } + + public kindOf(kind: TokenKind): boolean { + return (this.token.kind === kind); + } + + public expect(kind: TokenKind): void { + if (!this.kindOf(kind)) { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.token.kind]}`); + } + } + + public consumeAs(kind: TokenKind): void { + this.expect(kind); + this.next(); } private get isEof(): boolean { @@ -41,19 +57,19 @@ export class TokenStream { } /** readメソッドで読み取ったトークンを取得します。 */ - public get current(): Token { - if (this.token == null) { + public get token(): Token { + if (this._token == null) { throw new Error('invalid operation: token is not read yet'); } - return this.token; + return this._token; } /** トークンを読み取ります。 */ - public read(): void { + public next(): void { while (true) { // EOF terminated if (this.char == null) { - this.token = TOKEN(TokenKind.EOF); + this._token = TOKEN(TokenKind.EOF); break; } // skip spasing @@ -67,9 +83,9 @@ export class TokenStream { this.nextChar(); if ((this.char as string) === '=') { this.nextChar(); - this.token = TOKEN(TokenKind.NotEq); + this._token = TOKEN(TokenKind.NotEq); } else { - this.token = TOKEN(TokenKind.Not); + this._token = TOKEN(TokenKind.Not); } break; } @@ -79,28 +95,28 @@ export class TokenStream { this.nextChar(); if ((this.char as string) === '#') { this.nextChar(); - this.token = TOKEN(TokenKind.Sharp3); + this._token = TOKEN(TokenKind.Sharp3); } else { match = false; } } else if ((this.char as string) === '[') { this.nextChar(); - this.token = TOKEN(TokenKind.OpenSharpBracket); + this._token = TOKEN(TokenKind.OpenSharpBracket); } else { - this.token = TOKEN(TokenKind.Sharp); + this._token = TOKEN(TokenKind.Sharp); } break; } case '%': { this.nextChar(); - this.token = TOKEN(TokenKind.Percent); + this._token = TOKEN(TokenKind.Percent); break; } case '&': { this.nextChar(); if ((this.char as string) === '&') { this.nextChar(); - this.token = TOKEN(TokenKind.And2); + this._token = TOKEN(TokenKind.And2); } else { match = false; } @@ -108,47 +124,47 @@ export class TokenStream { } case '(': { this.nextChar(); - this.token = TOKEN(TokenKind.OpenParen); + this._token = TOKEN(TokenKind.OpenParen); break; } case ')': { this.nextChar(); - this.token = TOKEN(TokenKind.CloseParen); + this._token = TOKEN(TokenKind.CloseParen); break; } case '*': { this.nextChar(); - this.token = TOKEN(TokenKind.Asterisk); + this._token = TOKEN(TokenKind.Asterisk); break; } case '+': { this.nextChar(); if ((this.char as string) === '=') { this.nextChar(); - this.token = TOKEN(TokenKind.PlusEq); + this._token = TOKEN(TokenKind.PlusEq); } else { - this.token = TOKEN(TokenKind.Plus); + this._token = TOKEN(TokenKind.Plus); } break; } case ',': { this.nextChar(); - this.token = TOKEN(TokenKind.Comma); + this._token = TOKEN(TokenKind.Comma); break; } case '-': { this.nextChar(); if ((this.char as string) === '=') { this.nextChar(); - this.token = TOKEN(TokenKind.MinusEq); + this._token = TOKEN(TokenKind.MinusEq); } else { - this.token = TOKEN(TokenKind.Minus); + this._token = TOKEN(TokenKind.Minus); } break; } case '.': { this.nextChar(); - this.token = TOKEN(TokenKind.Dot); + this._token = TOKEN(TokenKind.Dot); break; } case '/': { @@ -162,7 +178,7 @@ export class TokenStream { this.skipCommentLine(); continue; } else { - this.token = TOKEN(TokenKind.Slash); + this._token = TOKEN(TokenKind.Slash); } break; } @@ -170,27 +186,27 @@ export class TokenStream { this.nextChar(); if ((this.char as string) === ':') { this.nextChar(); - this.token = TOKEN(TokenKind.Colon2); + this._token = TOKEN(TokenKind.Colon2); } else { - this.token = TOKEN(TokenKind.Colon); + this._token = TOKEN(TokenKind.Colon); } break; } case ';': { this.nextChar(); - this.token = TOKEN(TokenKind.SemiColon); + this._token = TOKEN(TokenKind.SemiColon); break; } case '<': { this.nextChar(); if ((this.char as string) === '=') { this.nextChar(); - this.token = TOKEN(TokenKind.LtEq); + this._token = TOKEN(TokenKind.LtEq); } else if ((this.char as string) === ':') { this.nextChar(); - this.token = TOKEN(TokenKind.Out); + this._token = TOKEN(TokenKind.Out); } else { - this.token = TOKEN(TokenKind.Lt); + this._token = TOKEN(TokenKind.Lt); } break; } @@ -198,12 +214,12 @@ export class TokenStream { this.nextChar(); if ((this.char as string) === '=') { this.nextChar(); - this.token = TOKEN(TokenKind.Eq2); + this._token = TOKEN(TokenKind.Eq2); } else if ((this.char as string) === '>') { this.nextChar(); - this.token = TOKEN(TokenKind.Arrow); + this._token = TOKEN(TokenKind.Arrow); } else { - this.token = TOKEN(TokenKind.Eq); + this._token = TOKEN(TokenKind.Eq); } break; } @@ -211,42 +227,42 @@ export class TokenStream { this.nextChar(); if ((this.char as string) === '=') { this.nextChar(); - this.token = TOKEN(TokenKind.GtEq); + this._token = TOKEN(TokenKind.GtEq); } else { - this.token = TOKEN(TokenKind.Gt); + this._token = TOKEN(TokenKind.Gt); } break; } case '@': { this.nextChar(); - this.token = TOKEN(TokenKind.At); + this._token = TOKEN(TokenKind.At); break; } case '[': { this.nextChar(); - this.token = TOKEN(TokenKind.OpenBracket); + this._token = TOKEN(TokenKind.OpenBracket); break; } case ']': { this.nextChar(); - this.token = TOKEN(TokenKind.CloseBracket); + this._token = TOKEN(TokenKind.CloseBracket); break; } case '^': { this.nextChar(); - this.token = TOKEN(TokenKind.Hat); + this._token = TOKEN(TokenKind.Hat); break; } case '{': { this.nextChar(); - this.token = TOKEN(TokenKind.OpenBrace); + this._token = TOKEN(TokenKind.OpenBrace); break; } case '|': { this.nextChar(); if ((this.char as string) === '|') { this.nextChar(); - this.token = TOKEN(TokenKind.Or2); + this._token = TOKEN(TokenKind.Or2); } else { match = false; } @@ -254,7 +270,7 @@ export class TokenStream { } case '}': { this.nextChar(); - this.token = TOKEN(TokenKind.CloseBrace); + this._token = TOKEN(TokenKind.CloseBrace); break; } default: { @@ -287,75 +303,75 @@ export class TokenStream { // check word kind switch (word) { case 'null': { - this.token = TOKEN(TokenKind.NullKeyword); + this._token = TOKEN(TokenKind.NullKeyword); break; } case 'true': { - this.token = TOKEN(TokenKind.TrueKeyword); + this._token = TOKEN(TokenKind.TrueKeyword); break; } case 'false': { - this.token = TOKEN(TokenKind.FalseKeyword); + this._token = TOKEN(TokenKind.FalseKeyword); break; } case 'each': { - this.token = TOKEN(TokenKind.EachKeyword); + this._token = TOKEN(TokenKind.EachKeyword); break; } case 'for': { - this.token = TOKEN(TokenKind.ForKeyword); + this._token = TOKEN(TokenKind.ForKeyword); break; } case 'loop': { - this.token = TOKEN(TokenKind.LoopKeyword); + this._token = TOKEN(TokenKind.LoopKeyword); break; } case 'break': { - this.token = TOKEN(TokenKind.BreakKeyword); + this._token = TOKEN(TokenKind.BreakKeyword); break; } case 'continue': { - this.token = TOKEN(TokenKind.ContinueKeyword); + this._token = TOKEN(TokenKind.ContinueKeyword); break; } case 'match': { - this.token = TOKEN(TokenKind.MatchKeyword); + this._token = TOKEN(TokenKind.MatchKeyword); break; } case 'if': { - this.token = TOKEN(TokenKind.IfKeyword); + this._token = TOKEN(TokenKind.IfKeyword); break; } case 'elif': { - this.token = TOKEN(TokenKind.ElifKeyword); + this._token = TOKEN(TokenKind.ElifKeyword); break; } case 'else': { - this.token = TOKEN(TokenKind.ElseKeyword); + this._token = TOKEN(TokenKind.ElseKeyword); break; } case 'return': { - this.token = TOKEN(TokenKind.ReturnKeyword); + this._token = TOKEN(TokenKind.ReturnKeyword); break; } case 'eval': { - this.token = TOKEN(TokenKind.EvalKeyword); + this._token = TOKEN(TokenKind.EvalKeyword); break; } case 'var': { - this.token = TOKEN(TokenKind.VarKeyword); + this._token = TOKEN(TokenKind.VarKeyword); break; } case 'let': { - this.token = TOKEN(TokenKind.LetKeyword); + this._token = TOKEN(TokenKind.LetKeyword); break; } case 'exists': { - this.token = TOKEN(TokenKind.ExistsKeyword); + this._token = TOKEN(TokenKind.ExistsKeyword); break; } default: { - this.token = TOKEN(TokenKind.Identifier, word); + this._token = TOKEN(TokenKind.Identifier, word); break; } } @@ -371,7 +387,7 @@ export class TokenStream { if (digits.length === 0) { return false; } - this.token = TOKEN(TokenKind.NumberLiteral, digits); + this._token = TOKEN(TokenKind.NumberLiteral, digits); return true; } diff --git a/test/parser.ts b/test/parser.ts index 9c92b0e4..c4a866be 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -9,19 +9,19 @@ describe('TokenStream', () => { return stream; } function next(stream: TokenStream, kind: TokenKind, value?: string) { - stream.read(); - assert.deepStrictEqual(stream.current, TOKEN(kind, value)); + assert.deepStrictEqual(stream.token, TOKEN(kind, value)); + stream.next(); } - test.concurrent('can get a token after reading', async () => { + test.concurrent('can get a token after init', async () => { const source = ''; - const stream = init(source); + const stream = new TokenStream(source); try { - stream.current; + stream.token; assert.fail(); } catch (e) { } - stream.read(); - stream.current; + stream.init(); + stream.token; }); test.concurrent('eof', async () => { const source = ''; @@ -43,9 +43,9 @@ describe('TokenStream', () => { }); test.concurrent('invalid token', async () => { const source = '$'; - const stream = init(source); + const stream = new TokenStream(source); try { - stream.read(); + stream.init(); assert.fail(); } catch (e) { } }); From 9a8aee3aa81f6510492343f48f224df51a7b5e06 Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 15:44:59 +0900 Subject: [PATCH 019/126] refactor --- src/parser/token-stream.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index 804c35b5..dbef509b 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -56,7 +56,6 @@ export class TokenStream { this.loadChar(); } - /** readメソッドで読み取ったトークンを取得します。 */ public get token(): Token { if (this._token == null) { throw new Error('invalid operation: token is not read yet'); @@ -64,7 +63,6 @@ export class TokenStream { return this._token; } - /** トークンを読み取ります。 */ public next(): void { while (true) { // EOF terminated From 31f18096b37f082a7d25c37e2aa9e8a26afd51a0 Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 19:28:33 +0900 Subject: [PATCH 020/126] debug --- debug.ais | 1 + parse.js | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 debug.ais diff --git a/debug.ais b/debug.ais new file mode 100644 index 00000000..b506100a --- /dev/null +++ b/debug.ais @@ -0,0 +1 @@ +var x = 1; diff --git a/parse.js b/parse.js index 74a859cd..f8171ab8 100644 --- a/parse.js +++ b/parse.js @@ -1,6 +1,6 @@ import fs from 'fs'; -import { Parser } from '@syuilo/aiscript'; +import { Parser } from './built/esm/parser/index.js'; -const script = fs.readFileSync('./test.is', 'utf8'); +const script = fs.readFileSync('./debug.ais', 'utf8'); const ast = Parser.parse(script); console.log(JSON.stringify(ast, null, 2)); From 3ac6f60f5c0ee3aa7bbb808923e1cc5d02b86c23 Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 19:28:56 +0900 Subject: [PATCH 021/126] parser wip --- src/parser/syntaxes.ts | 110 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 98 insertions(+), 12 deletions(-) diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index 164be20e..bde02418 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -1,38 +1,124 @@ import { AiScriptSyntaxError } from '../error.js'; -import { Cst } from '../index.js'; -import { TokenStream } from './token-stream.js'; +import type { Cst } from '../index.js'; +import type { TokenStream } from './token-stream.js'; import { TokenKind } from './token.js'; -export function parseTopLevel(stream: TokenStream): Cst.Node[] { +function createNode(type: string, params: Record): Cst.Node { + const node: Record = { type }; + //params.children; + for (const key of Object.keys(params)) { + if (params[key] !== undefined) { + node[key] = params[key]; + } + } + //node.loc = { start, end }; + return node as Cst.Node; +} + +/** + * ```text + * = ( | | )* + * ``` +*/ +export function parseTopLevel(s: TokenStream): Cst.Node[] { const nodes: Cst.Node[] = []; - while (!stream.kindOf(TokenKind.EOF)) { - switch (stream.token.kind) { + while (!s.kindOf(TokenKind.EOF)) { + switch (s.token.kind) { case TokenKind.Colon2: { - nodes.push(parseNamespace(stream)); + nodes.push(parseNamespace(s)); break; } case TokenKind.Sharp3: { - nodes.push(parseMeta(stream)); + nodes.push(parseMeta(s)); break; } default: { - nodes.push(parseStatement(stream)); + nodes.push(parseStatement(s)); break; } } } - return nodes; } -export function parseNamespace(stream: TokenStream): Cst.Node { +/** + * ```text + * = "::" "{" ( | | )* "}" + * ``` +*/ +export function parseNamespace(s: TokenStream): Cst.Node { + s.consumeAs(TokenKind.Colon2); + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + const members: Cst.Node[] = []; + s.consumeAs(TokenKind.OpenBrace); + while (!s.kindOf(TokenKind.CloseBrace)) { + switch (s.token.kind) { + case TokenKind.VarKeyword: + case TokenKind.LetKeyword: { + members.push(parseVarDef(s)); + break; + } + case TokenKind.At: { + members.push(parseFnDef(s)); + break; + } + case TokenKind.Colon2: { + members.push(parseNamespace(s)); + break; + } + } + } + s.consumeAs(TokenKind.CloseBrace); + + return createNode('ns', { name, members }); +} + +/** + * = "###" ? +*/ +export function parseMeta(s: TokenStream): Cst.Node { throw new Error('todo'); } -export function parseMeta(stream: TokenStream): Cst.Node { +/** + * ```text + * = | | | | | | | + * | | | | + * ``` +*/ +export function parseStatement(s: TokenStream): Cst.Node { + switch (s.token.kind) { + case TokenKind.VarKeyword: + case TokenKind.LetKeyword: { + return parseVarDef(s); + } + case TokenKind.At: { + return parseFnDef(s); + } + default: { + throw new Error('todo'); + } + } +} + +/** + * ```text + * = ("let" | "var") (":" )? "=" + * ``` +*/ +export function parseVarDef(s: TokenStream): Cst.Node { throw new Error('todo'); } -export function parseStatement(stream: TokenStream): Cst.Node { +/** + * ```text + * = "@" "(" ")" (":" )? "{" * "}" + * ``` +*/ +export function parseFnDef(s: TokenStream): Cst.Node { throw new Error('todo'); } From c807ce403f910fb2593282f7127d8ba6375f0717 Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 21:15:29 +0900 Subject: [PATCH 022/126] parser wip --- debug.ais | 2 +- src/parser/syntaxes.ts | 92 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 87 insertions(+), 7 deletions(-) diff --git a/debug.ais b/debug.ais index b506100a..492fc438 100644 --- a/debug.ais +++ b/debug.ais @@ -1 +1 @@ -var x = 1; +var x = 1 diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index bde02418..d1d707d0 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -15,6 +15,9 @@ function createNode(type: string, params: Record): Cst.Node { return node as Cst.Node; } + +// Top-level Statement ======================================================== + /** * ```text * = ( | | )* @@ -46,7 +49,7 @@ export function parseTopLevel(s: TokenStream): Cst.Node[] { * = "::" "{" ( | | )* "}" * ``` */ -export function parseNamespace(s: TokenStream): Cst.Node { +function parseNamespace(s: TokenStream): Cst.Node { s.consumeAs(TokenKind.Colon2); s.expect(TokenKind.Identifier); @@ -80,17 +83,20 @@ export function parseNamespace(s: TokenStream): Cst.Node { /** * = "###" ? */ -export function parseMeta(s: TokenStream): Cst.Node { +function parseMeta(s: TokenStream): Cst.Node { throw new Error('todo'); } + +// Statement ================================================================== + /** * ```text * = | | | | | | | * | | | | * ``` */ -export function parseStatement(s: TokenStream): Cst.Node { +function parseStatement(s: TokenStream): Cst.Node { switch (s.token.kind) { case TokenKind.VarKeyword: case TokenKind.LetKeyword: { @@ -110,15 +116,89 @@ export function parseStatement(s: TokenStream): Cst.Node { * = ("let" | "var") (":" )? "=" * ``` */ -export function parseVarDef(s: TokenStream): Cst.Node { +function parseVarDef(s: TokenStream): Cst.Node { + throw new Error('todo'); +} + +// Out + +// Attr + +// Each + +// For + +// Return + +// Loop + +// Break + +// Continue + +// Assign + + +// Expression ================================================================= + +// expression + +// If + +// Match + +// Eval + +// Exists + +// Reference + +// template + +// object + +// array + + +// Function =================================================================== + +/** + * ```text + * = "@" "(" ")" (":" )? + * ``` +*/ +function parseFnDef(s: TokenStream): Cst.Node { throw new Error('todo'); } +// fn expression + + +// Static Literal ============================================================= + +// static array + +// static object + + +// Type ======================================================================= + +// fn type + +// named type + + +// Common ===================================================================== + +// namespace path + /** * ```text - * = "@" "(" ")" (":" )? "{" * "}" + * = "{" * "}" * ``` */ -export function parseFnDef(s: TokenStream): Cst.Node { +function parseBlock(s: TokenStream): Cst.Node[] { throw new Error('todo'); } + +// block or statement From 42bca5f98ca403695c98231124339842e72f0c17 Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 22:42:32 +0900 Subject: [PATCH 023/126] parser wip --- src/parser/syntaxes.ts | 157 +++++++++++++++++++++++++++++------------ 1 file changed, 112 insertions(+), 45 deletions(-) diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index d1d707d0..f45b361e 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -3,20 +3,7 @@ import type { Cst } from '../index.js'; import type { TokenStream } from './token-stream.js'; import { TokenKind } from './token.js'; -function createNode(type: string, params: Record): Cst.Node { - const node: Record = { type }; - //params.children; - for (const key of Object.keys(params)) { - if (params[key] !== undefined) { - node[key] = params[key]; - } - } - //node.loc = { start, end }; - return node as Cst.Node; -} - - -// Top-level Statement ======================================================== +//#region Top-level Statement /** * ```text @@ -25,6 +12,7 @@ function createNode(type: string, params: Record): Cst.Node { */ export function parseTopLevel(s: TokenStream): Cst.Node[] { const nodes: Cst.Node[] = []; + while (!s.kindOf(TokenKind.EOF)) { switch (s.token.kind) { case TokenKind.Colon2: { @@ -41,6 +29,7 @@ export function parseTopLevel(s: TokenStream): Cst.Node[] { } } } + return nodes; } @@ -77,7 +66,7 @@ function parseNamespace(s: TokenStream): Cst.Node { } s.consumeAs(TokenKind.CloseBrace); - return createNode('ns', { name, members }); + return NODE('ns', { name, members }); } /** @@ -87,8 +76,9 @@ function parseMeta(s: TokenStream): Cst.Node { throw new Error('todo'); } +//#endregion Top-level Statement -// Statement ================================================================== +//#region Statement /** * ```text @@ -120,47 +110,83 @@ function parseVarDef(s: TokenStream): Cst.Node { throw new Error('todo'); } -// Out +function parseOut(s: TokenStream): Cst.Node { + throw new Error('todo'); +} // Attr -// Each +function parseEach(s: TokenStream): Cst.Node { + throw new Error('todo'); +} -// For +function parseFor(s: TokenStream): Cst.Node { + throw new Error('todo'); +} -// Return +function parseReturn(s: TokenStream): Cst.Node { + throw new Error('todo'); +} -// Loop +function parseLoop(s: TokenStream): Cst.Node { + throw new Error('todo'); +} -// Break +function parseBreak(s: TokenStream): Cst.Node { + throw new Error('todo'); +} -// Continue +function parseContinue(s: TokenStream): Cst.Node { + throw new Error('todo'); +} -// Assign +function parseAssign(s: TokenStream): Cst.Node { + throw new Error('todo'); +} +//#endregion Statement -// Expression ================================================================= +//#region Expression -// expression +function parseExpr(s: TokenStream): Cst.Node { + throw new Error('todo'); +} -// If +function parseIf(s: TokenStream): Cst.Node { + throw new Error('todo'); +} -// Match +function parseMatch(s: TokenStream): Cst.Node { + throw new Error('todo'); +} -// Eval +function parseEval(s: TokenStream): Cst.Node { + throw new Error('todo'); +} -// Exists +function parseExists(s: TokenStream): Cst.Node { + throw new Error('todo'); +} -// Reference +function parseReference(s: TokenStream): Cst.Node { + throw new Error('todo'); +} -// template +function parseTemplate(s: TokenStream): Cst.Node { + throw new Error('todo'); +} -// object +function parseObject(s: TokenStream): Cst.Node { + throw new Error('todo'); +} -// array +function parseArray(s: TokenStream): Cst.Node { + throw new Error('todo'); +} +//#endregion Expression -// Function =================================================================== +//#region Function /** * ```text @@ -171,26 +197,58 @@ function parseFnDef(s: TokenStream): Cst.Node { throw new Error('todo'); } -// fn expression +function parseFnExpr(s: TokenStream): Cst.Node { + throw new Error('todo'); +} +//#endregion Function -// Static Literal ============================================================= +//#region Static Literal -// static array +function parseStaticArray(s: TokenStream): Cst.Node { + throw new Error('todo'); +} -// static object +function parseStaticObject(s: TokenStream): Cst.Node { + throw new Error('todo'); +} +//#endregion Static Literal -// Type ======================================================================= +//#region Type -// fn type +function parseFnType(s: TokenStream): Cst.Node { + throw new Error('todo'); +} -// named type +function parseNamedType(s: TokenStream): Cst.Node { + throw new Error('todo'); +} +//#endregion Type -// Common ===================================================================== +//#region Common -// namespace path +function NODE(type: string, params: Record): Cst.Node { + const node: Record = { type }; + //params.children; + for (const key of Object.keys(params)) { + if (params[key] !== undefined) { + node[key] = params[key]; + } + } + //node.loc = { start, end }; + return node as Cst.Node; +} + +/** + * ```text + * = (":" )* + * ``` +*/ +function parseNamePath(s: TokenStream): string { + throw new Error('todo'); +} /** * ```text @@ -201,4 +259,13 @@ function parseBlock(s: TokenStream): Cst.Node[] { throw new Error('todo'); } -// block or statement +/** + * ```text + * = | + * ``` +*/ +function parseBlockOrStatement(s: TokenStream): Cst.Node { + throw new Error('todo'); +} + +//#endregion Common From 3da0adacfa2d1b52709a0fc40cb9dcae25946b8d Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 23:29:51 +0900 Subject: [PATCH 024/126] parser: variable of number type --- src/parser/syntaxes.ts | 47 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index f45b361e..1d0d8087 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -107,7 +107,37 @@ function parseStatement(s: TokenStream): Cst.Node { * ``` */ function parseVarDef(s: TokenStream): Cst.Node { - throw new Error('todo'); + let mut; + switch (s.token.kind) { + case TokenKind.LetKeyword: { + mut = false; + break; + } + case TokenKind.VarKeyword: { + mut = true; + break; + } + default: { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[s.token.kind]}`); + } + } + s.next(); + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + let ty; + if (s.kindOf(TokenKind.Colon)) { + s.next(); + ty = parseType(s); + } + + s.consumeAs(TokenKind.Eq); + + const expr = parseExpr(s); + + return NODE('def', { name, varType: ty, expr, mut, attr: [] }); } function parseOut(s: TokenStream): Cst.Node { @@ -149,7 +179,16 @@ function parseAssign(s: TokenStream): Cst.Node { //#region Expression function parseExpr(s: TokenStream): Cst.Node { - throw new Error('todo'); + switch (s.token.kind) { + case TokenKind.NumberLiteral: { + const value = Number(s.token.value!); + s.next(); + return NODE('num', { value }); + } + default: { + throw new Error('todo'); + } + } } function parseIf(s: TokenStream): Cst.Node { @@ -217,6 +256,10 @@ function parseStaticObject(s: TokenStream): Cst.Node { //#region Type +function parseType(s: TokenStream): Cst.Node { + throw new Error('todo'); +} + function parseFnType(s: TokenStream): Cst.Node { throw new Error('todo'); } From d9f682ae1584c3c698c368fd86fe5a4889729609 Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 23:33:00 +0900 Subject: [PATCH 025/126] refactor --- src/parser/syntaxes.ts | 6 +++--- src/parser/token-stream.ts | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index 1d0d8087..05d236fc 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -13,7 +13,7 @@ import { TokenKind } from './token.js'; export function parseTopLevel(s: TokenStream): Cst.Node[] { const nodes: Cst.Node[] = []; - while (!s.kindOf(TokenKind.EOF)) { + while (!s.kindIs(TokenKind.EOF)) { switch (s.token.kind) { case TokenKind.Colon2: { nodes.push(parseNamespace(s)); @@ -47,7 +47,7 @@ function parseNamespace(s: TokenStream): Cst.Node { const members: Cst.Node[] = []; s.consumeAs(TokenKind.OpenBrace); - while (!s.kindOf(TokenKind.CloseBrace)) { + while (!s.kindIs(TokenKind.CloseBrace)) { switch (s.token.kind) { case TokenKind.VarKeyword: case TokenKind.LetKeyword: { @@ -128,7 +128,7 @@ function parseVarDef(s: TokenStream): Cst.Node { s.next(); let ty; - if (s.kindOf(TokenKind.Colon)) { + if (s.kindIs(TokenKind.Colon)) { s.next(); ty = parseType(s); } diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index dbef509b..f30fd2f5 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -22,12 +22,12 @@ export class TokenStream { this.next(); } - public kindOf(kind: TokenKind): boolean { + public kindIs(kind: TokenKind): boolean { return (this.token.kind === kind); } public expect(kind: TokenKind): void { - if (!this.kindOf(kind)) { + if (!this.kindIs(kind)) { throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.token.kind]}`); } } From 2206dc0aeee99b4668d86706bcf8c44ac8c5b55a Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 18 Sep 2023 23:41:15 +0900 Subject: [PATCH 026/126] wip --- src/parser/syntaxes.ts | 4 ++++ src/parser/token-stream.ts | 1 + 2 files changed, 5 insertions(+) diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index 05d236fc..04d498f6 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -179,8 +179,12 @@ function parseAssign(s: TokenStream): Cst.Node { //#region Expression function parseExpr(s: TokenStream): Cst.Node { + // TODO: Pratt parsing + switch (s.token.kind) { case TokenKind.NumberLiteral: { + // TODO: sign + // TODO: validate value const value = Number(s.token.value!); s.next(); return NODE('num', { value }); diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index f30fd2f5..ed4485b8 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -377,6 +377,7 @@ export class TokenStream { } private readDigits(): boolean { + // TODO: float number let digits = ''; while (this.char != null && digit.test(this.char)) { digits += this.char; From e641837d68fbf3a4bb2ed5f1850b76e6bb907f94 Mon Sep 17 00:00:00 2001 From: marihachi Date: Tue, 19 Sep 2023 20:20:24 +0900 Subject: [PATCH 027/126] parser wip --- src/parser/syntaxes.ts | 54 +++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index 04d498f6..bb01cdb2 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -144,7 +144,9 @@ function parseOut(s: TokenStream): Cst.Node { throw new Error('todo'); } -// Attr +function parseAttr(s: TokenStream): Cst.Node { + throw new Error('todo'); +} function parseEach(s: TokenStream): Cst.Node { throw new Error('todo'); @@ -195,6 +197,11 @@ function parseExpr(s: TokenStream): Cst.Node { } } +/** + * ```text + * = "if" ("elif" )* ("else" )? + * ``` +*/ function parseIf(s: TokenStream): Cst.Node { throw new Error('todo'); } @@ -211,8 +218,26 @@ function parseExists(s: TokenStream): Cst.Node { throw new Error('todo'); } -function parseReference(s: TokenStream): Cst.Node { - throw new Error('todo'); +/** + * ```text + * = (":" )* + * ``` +*/ +function parseReference(s: TokenStream): string { + const segs: string[] = []; + while (true) { + if (segs.length > 0) { + if (s.kindIs(TokenKind.Colon)) { + s.next(); + } else { + break; + } + } + s.expect(TokenKind.Identifier); + segs.push(s.token.value!); + s.next(); + } + return segs.join(':'); } function parseTemplate(s: TokenStream): Cst.Node { @@ -288,22 +313,19 @@ function NODE(type: string, params: Record): Cst.Node { return node as Cst.Node; } -/** - * ```text - * = (":" )* - * ``` -*/ -function parseNamePath(s: TokenStream): string { - throw new Error('todo'); -} - /** * ```text * = "{" * "}" * ``` */ function parseBlock(s: TokenStream): Cst.Node[] { - throw new Error('todo'); + s.consumeAs(TokenKind.OpenBrace); + const steps: Cst.Node[] = []; + while (!s.kindIs(TokenKind.CloseBrace)) { + steps.push(parseStatement(s)); + } + s.consumeAs(TokenKind.CloseBrace); + return steps; } /** @@ -312,7 +334,11 @@ function parseBlock(s: TokenStream): Cst.Node[] { * ``` */ function parseBlockOrStatement(s: TokenStream): Cst.Node { - throw new Error('todo'); + if (s.kindIs(TokenKind.OpenBrace)) { + return NODE('block', { statements: parseBlock(s) }); + } else { + return parseStatement(s); + } } //#endregion Common From 5c2cc252aa5e8e9dfa8487713e22d8773e51843d Mon Sep 17 00:00:00 2001 From: marihachi Date: Tue, 19 Sep 2023 20:43:56 +0900 Subject: [PATCH 028/126] doc --- src/parser/syntaxes.ts | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index bb01cdb2..cae2d056 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -6,8 +6,8 @@ import { TokenKind } from './token.js'; //#region Top-level Statement /** - * ```text - * = ( | | )* + * ```abnf + * TopLevel = *(Namespace / Meta / Statement) * ``` */ export function parseTopLevel(s: TokenStream): Cst.Node[] { @@ -34,8 +34,8 @@ export function parseTopLevel(s: TokenStream): Cst.Node[] { } /** - * ```text - * = "::" "{" ( | | )* "}" + * ```abnf + * Namespace = "::" IDENT "{" *(VarDef / FnDef / Namespace) "}" * ``` */ function parseNamespace(s: TokenStream): Cst.Node { @@ -70,7 +70,9 @@ function parseNamespace(s: TokenStream): Cst.Node { } /** - * = "###" ? + * ```abnf + * Meta = "###" [IDENT] StaticLiteral + * ``` */ function parseMeta(s: TokenStream): Cst.Node { throw new Error('todo'); @@ -81,9 +83,9 @@ function parseMeta(s: TokenStream): Cst.Node { //#region Statement /** - * ```text - * = | | | | | | | - * | | | | + * ```abnf + * Statement = VarDef / FnDef / Out / Return / Attr / Each / For / Loop + * / Break / Continue / Assign / Expr * ``` */ function parseStatement(s: TokenStream): Cst.Node { @@ -102,8 +104,8 @@ function parseStatement(s: TokenStream): Cst.Node { } /** - * ```text - * = ("let" | "var") (":" )? "=" + * ```abnf + * VarDef = ("let" / "var") IDENT [":" Type] "=" Expr * ``` */ function parseVarDef(s: TokenStream): Cst.Node { @@ -198,8 +200,8 @@ function parseExpr(s: TokenStream): Cst.Node { } /** - * ```text - * = "if" ("elif" )* ("else" )? + * ```abnf + * If = "if" Expr BlockOrStatement *("elif" Expr BlockOrStatement) ["else" BlockOrStatement] * ``` */ function parseIf(s: TokenStream): Cst.Node { @@ -219,8 +221,8 @@ function parseExists(s: TokenStream): Cst.Node { } /** - * ```text - * = (":" )* + * ```abnf + * Reference = IDENT *(":" IDENT) * ``` */ function parseReference(s: TokenStream): string { @@ -257,8 +259,8 @@ function parseArray(s: TokenStream): Cst.Node { //#region Function /** - * ```text - * = "@" "(" ")" (":" )? + * ```abnf + * FnDef = "@" IDENT "(" Args ")" [":" Type] Block * ``` */ function parseFnDef(s: TokenStream): Cst.Node { @@ -314,8 +316,8 @@ function NODE(type: string, params: Record): Cst.Node { } /** - * ```text - * = "{" * "}" + * ```abnf + * Block = "{" *Statement "}" * ``` */ function parseBlock(s: TokenStream): Cst.Node[] { @@ -329,8 +331,8 @@ function parseBlock(s: TokenStream): Cst.Node[] { } /** - * ```text - * = | + * ```abnf + * BlockOrStatement = Block / Statement * ``` */ function parseBlockOrStatement(s: TokenStream): Cst.Node { From b712b800dd30179715a385eba184c58690ae1451 Mon Sep 17 00:00:00 2001 From: marihachi Date: Tue, 19 Sep 2023 22:08:50 +0900 Subject: [PATCH 029/126] parser wip --- src/parser/syntaxes.ts | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index cae2d056..c3314d7e 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -98,7 +98,7 @@ function parseStatement(s: TokenStream): Cst.Node { return parseFnDef(s); } default: { - throw new Error('todo'); + return parseExpr(s); } } } @@ -186,6 +186,9 @@ function parseExpr(s: TokenStream): Cst.Node { // TODO: Pratt parsing switch (s.token.kind) { + case TokenKind.Identifier: { + return parseReference(s); + } case TokenKind.NumberLiteral: { // TODO: sign // TODO: validate value @@ -193,6 +196,9 @@ function parseExpr(s: TokenStream): Cst.Node { s.next(); return NODE('num', { value }); } + case TokenKind.IfKeyword: { + return parseIf(s); + } default: { throw new Error('todo'); } @@ -205,7 +211,25 @@ function parseExpr(s: TokenStream): Cst.Node { * ``` */ function parseIf(s: TokenStream): Cst.Node { - throw new Error('todo'); + s.consumeAs(TokenKind.IfKeyword); + const cond = parseExpr(s); + const then = parseBlockOrStatement(s); + + const elseif: { cond: any, then: any }[] = []; + while (s.kindIs(TokenKind.ElifKeyword)) { + s.next(); + const elifCond = parseExpr(s); + const elifThen = parseBlockOrStatement(s); + elseif.push({ cond: elifCond, then: elifThen }); + } + + let _else = undefined; + if (s.kindIs(TokenKind.ElseKeyword)) { + s.next(); + _else = parseBlockOrStatement(s); + } + + return NODE('if', { cond, then, elseif, else: _else }); } function parseMatch(s: TokenStream): Cst.Node { @@ -225,7 +249,7 @@ function parseExists(s: TokenStream): Cst.Node { * Reference = IDENT *(":" IDENT) * ``` */ -function parseReference(s: TokenStream): string { +function parseReference(s: TokenStream): Cst.Node { const segs: string[] = []; while (true) { if (segs.length > 0) { @@ -239,7 +263,7 @@ function parseReference(s: TokenStream): string { segs.push(s.token.value!); s.next(); } - return segs.join(':'); + return NODE('identifier', { name: segs.join(':') }); } function parseTemplate(s: TokenStream): Cst.Node { From 6d3d0ca3a12827807422305977956a2fc28217bb Mon Sep 17 00:00:00 2001 From: marihachi Date: Wed, 20 Sep 2023 19:09:36 +0900 Subject: [PATCH 030/126] refactor --- src/parser/syntaxes.ts | 30 +++++++++++++++--------------- src/parser/token-stream.ts | 12 ++++++------ 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index c3314d7e..7ed8a868 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -13,7 +13,7 @@ import { TokenKind } from './token.js'; export function parseTopLevel(s: TokenStream): Cst.Node[] { const nodes: Cst.Node[] = []; - while (!s.kindIs(TokenKind.EOF)) { + while (s.kind !== TokenKind.EOF) { switch (s.token.kind) { case TokenKind.Colon2: { nodes.push(parseNamespace(s)); @@ -39,15 +39,15 @@ export function parseTopLevel(s: TokenStream): Cst.Node[] { * ``` */ function parseNamespace(s: TokenStream): Cst.Node { - s.consumeAs(TokenKind.Colon2); + s.nextWith(TokenKind.Colon2); s.expect(TokenKind.Identifier); const name = s.token.value!; s.next(); const members: Cst.Node[] = []; - s.consumeAs(TokenKind.OpenBrace); - while (!s.kindIs(TokenKind.CloseBrace)) { + s.nextWith(TokenKind.OpenBrace); + while (s.kind !== TokenKind.CloseBrace) { switch (s.token.kind) { case TokenKind.VarKeyword: case TokenKind.LetKeyword: { @@ -64,7 +64,7 @@ function parseNamespace(s: TokenStream): Cst.Node { } } } - s.consumeAs(TokenKind.CloseBrace); + s.nextWith(TokenKind.CloseBrace); return NODE('ns', { name, members }); } @@ -130,12 +130,12 @@ function parseVarDef(s: TokenStream): Cst.Node { s.next(); let ty; - if (s.kindIs(TokenKind.Colon)) { + if (s.kind === TokenKind.Colon) { s.next(); ty = parseType(s); } - s.consumeAs(TokenKind.Eq); + s.nextWith(TokenKind.Eq); const expr = parseExpr(s); @@ -211,12 +211,12 @@ function parseExpr(s: TokenStream): Cst.Node { * ``` */ function parseIf(s: TokenStream): Cst.Node { - s.consumeAs(TokenKind.IfKeyword); + s.nextWith(TokenKind.IfKeyword); const cond = parseExpr(s); const then = parseBlockOrStatement(s); const elseif: { cond: any, then: any }[] = []; - while (s.kindIs(TokenKind.ElifKeyword)) { + while (s.kind === TokenKind.ElifKeyword) { s.next(); const elifCond = parseExpr(s); const elifThen = parseBlockOrStatement(s); @@ -224,7 +224,7 @@ function parseIf(s: TokenStream): Cst.Node { } let _else = undefined; - if (s.kindIs(TokenKind.ElseKeyword)) { + if (s.kind === TokenKind.ElseKeyword) { s.next(); _else = parseBlockOrStatement(s); } @@ -253,7 +253,7 @@ function parseReference(s: TokenStream): Cst.Node { const segs: string[] = []; while (true) { if (segs.length > 0) { - if (s.kindIs(TokenKind.Colon)) { + if (s.kind === TokenKind.Colon) { s.next(); } else { break; @@ -345,12 +345,12 @@ function NODE(type: string, params: Record): Cst.Node { * ``` */ function parseBlock(s: TokenStream): Cst.Node[] { - s.consumeAs(TokenKind.OpenBrace); + s.nextWith(TokenKind.OpenBrace); const steps: Cst.Node[] = []; - while (!s.kindIs(TokenKind.CloseBrace)) { + while (s.kind !== TokenKind.CloseBrace) { steps.push(parseStatement(s)); } - s.consumeAs(TokenKind.CloseBrace); + s.nextWith(TokenKind.CloseBrace); return steps; } @@ -360,7 +360,7 @@ function parseBlock(s: TokenStream): Cst.Node[] { * ``` */ function parseBlockOrStatement(s: TokenStream): Cst.Node { - if (s.kindIs(TokenKind.OpenBrace)) { + if (s.kind === TokenKind.OpenBrace) { return NODE('block', { statements: parseBlock(s) }); } else { return parseStatement(s); diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index ed4485b8..4015cbf8 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -22,17 +22,13 @@ export class TokenStream { this.next(); } - public kindIs(kind: TokenKind): boolean { - return (this.token.kind === kind); - } - public expect(kind: TokenKind): void { - if (!this.kindIs(kind)) { + if (this.kind !== kind) { throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.token.kind]}`); } } - public consumeAs(kind: TokenKind): void { + public nextWith(kind: TokenKind): void { this.expect(kind); this.next(); } @@ -63,6 +59,10 @@ export class TokenStream { return this._token; } + public get kind(): TokenKind { + return this.token.kind; + } + public next(): void { while (true) { // EOF terminated From 98aa836609f4c29b5ac3dc6224026ea2b7b3fba0 Mon Sep 17 00:00:00 2001 From: marihachi Date: Wed, 20 Sep 2023 21:25:20 +0900 Subject: [PATCH 031/126] parser wip --- src/parser/syntaxes.ts | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index 7ed8a868..5a424e5c 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -236,12 +236,26 @@ function parseMatch(s: TokenStream): Cst.Node { throw new Error('todo'); } +/** + * ```abnf + * Eval = "eval" Block + * ``` +*/ function parseEval(s: TokenStream): Cst.Node { - throw new Error('todo'); + s.nextWith(TokenKind.EvalKeyword); + const statements = parseBlock(s); + return NODE('block', { statements }); } +/** + * ```abnf + * Exists = "exists" Reference + * ``` +*/ function parseExists(s: TokenStream): Cst.Node { - throw new Error('todo'); + s.nextWith(TokenKind.ExistsKeyword); + const identifier = parseReference(s); + return NODE('exists', { identifier }); } /** @@ -361,7 +375,8 @@ function parseBlock(s: TokenStream): Cst.Node[] { */ function parseBlockOrStatement(s: TokenStream): Cst.Node { if (s.kind === TokenKind.OpenBrace) { - return NODE('block', { statements: parseBlock(s) }); + const statements = parseBlock(s); + return NODE('block', { statements }); } else { return parseStatement(s); } From 5545b63ea56080b34fdfbb94da9cf7f9bdda7f04 Mon Sep 17 00:00:00 2001 From: marihachi Date: Thu, 21 Sep 2023 23:47:02 +0900 Subject: [PATCH 032/126] debug --- parse.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parse.js b/parse.js index f8171ab8..dea19f39 100644 --- a/parse.js +++ b/parse.js @@ -1,5 +1,5 @@ import fs from 'fs'; -import { Parser } from './built/esm/parser/index.js'; +import { Parser } from '@syuilo/aiscript'; const script = fs.readFileSync('./debug.ais', 'utf8'); const ast = Parser.parse(script); From 8eb63f0ee6fb93449b7ddc3de49564ac45adb77b Mon Sep 17 00:00:00 2001 From: marihachi Date: Thu, 21 Sep 2023 23:47:39 +0900 Subject: [PATCH 033/126] parser wip --- src/parser/syntaxes.ts | 77 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 15 deletions(-) diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index 5a424e5c..bd666d96 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -75,7 +75,17 @@ function parseNamespace(s: TokenStream): Cst.Node { * ``` */ function parseMeta(s: TokenStream): Cst.Node { - throw new Error('todo'); + s.nextWith(TokenKind.Sharp3); + + let name; + if (s.kind === TokenKind.Identifier) { + name = s.token.value; + s.next(); + } + + const value = parseStaticLiteral(s); + + return NODE('meta', { name, value }); } //#endregion Top-level Statement @@ -97,6 +107,14 @@ function parseStatement(s: TokenStream): Cst.Node { case TokenKind.At: { return parseFnDef(s); } + case TokenKind.BreakKeyword: { + s.next(); + return NODE('break', {}); + } + case TokenKind.ContinueKeyword: { + s.next(); + return NODE('continue', {}); + } default: { return parseExpr(s); } @@ -142,8 +160,18 @@ function parseVarDef(s: TokenStream): Cst.Node { return NODE('def', { name, varType: ty, expr, mut, attr: [] }); } +/** + * ```abnf + * Out = "<:" Expr + * ``` +*/ function parseOut(s: TokenStream): Cst.Node { - throw new Error('todo'); + s.nextWith(TokenKind.Out); + const expr = parseExpr(s); + return NODE('identifier', { + name: 'print', + chain: [NODE('callChain', { args: [expr] })], + }); } function parseAttr(s: TokenStream): Cst.Node { @@ -158,20 +186,26 @@ function parseFor(s: TokenStream): Cst.Node { throw new Error('todo'); } +/** + * ```abnf + * Return = "return" Expr + * ``` +*/ function parseReturn(s: TokenStream): Cst.Node { - throw new Error('todo'); + s.nextWith(TokenKind.ReturnKeyword); + const expr = parseExpr(s); + return NODE('return', { expr }); } +/** + * ```abnf + * Loop = "loop" Block + * ``` +*/ function parseLoop(s: TokenStream): Cst.Node { - throw new Error('todo'); -} - -function parseBreak(s: TokenStream): Cst.Node { - throw new Error('todo'); -} - -function parseContinue(s: TokenStream): Cst.Node { - throw new Error('todo'); + s.nextWith(TokenKind.LoopKeyword); + const statements = parseBlock(s); + return NODE('loop', { statements }); } function parseAssign(s: TokenStream): Cst.Node { @@ -186,9 +220,6 @@ function parseExpr(s: TokenStream): Cst.Node { // TODO: Pratt parsing switch (s.token.kind) { - case TokenKind.Identifier: { - return parseReference(s); - } case TokenKind.NumberLiteral: { // TODO: sign // TODO: validate value @@ -199,6 +230,15 @@ function parseExpr(s: TokenStream): Cst.Node { case TokenKind.IfKeyword: { return parseIf(s); } + case TokenKind.EvalKeyword: { + return parseEval(s); + } + case TokenKind.ExistsKeyword: { + return parseExists(s); + } + case TokenKind.Identifier: { + return parseReference(s); + } default: { throw new Error('todo'); } @@ -313,6 +353,10 @@ function parseFnExpr(s: TokenStream): Cst.Node { //#region Static Literal +function parseStaticLiteral(s: TokenStream): Cst.Node { + throw new Error('todo'); +} + function parseStaticArray(s: TokenStream): Cst.Node { throw new Error('todo'); } @@ -360,11 +404,14 @@ function NODE(type: string, params: Record): Cst.Node { */ function parseBlock(s: TokenStream): Cst.Node[] { s.nextWith(TokenKind.OpenBrace); + const steps: Cst.Node[] = []; while (s.kind !== TokenKind.CloseBrace) { steps.push(parseStatement(s)); } + s.nextWith(TokenKind.CloseBrace); + return steps; } From c2da110aacba9ed49ed791ce34749d55ee2aaa10 Mon Sep 17 00:00:00 2001 From: marihachi Date: Fri, 22 Sep 2023 00:07:14 +0900 Subject: [PATCH 034/126] clean --- package.json | 5 ----- 1 file changed, 5 deletions(-) diff --git a/package.json b/package.json index ea7e7f8d..a14bb46a 100644 --- a/package.json +++ b/package.json @@ -20,9 +20,6 @@ "scripts": { "start": "node ./run", "parse": "node ./parse", - "peg": "peggy --format es --cache -o src/parser/parser.js --allowed-start-rules Preprocess,Main src/parser/parser.peggy && npm run peg-copy", - "peg-debug": "peggy --trace --format es --cache -o src/parser/parser.js --allowed-start-rules Preprocess,Main src/parser/parser.peggy && npm run peg-copy", - "peg-copy": "copyfiles -f src/parser/parser.js built/parser/", "ts": "npm run ts-esm && npm run ts-dts", "ts-esm": "tsc --outDir built/esm", "ts-dts": "tsc --outDir built/dts --declaration true --emitDeclarationOnly true --declarationMap true", @@ -43,11 +40,9 @@ "@typescript-eslint/eslint-plugin": "6.7.2", "@typescript-eslint/parser": "6.7.2", "chalk": "5.3.0", - "copyfiles": "2.4.1", "eslint": "8.49.0", "eslint-plugin-import": "2.28.1", "jest": "29.7.0", - "peggy": "3.0.2", "ts-jest": "29.1.1", "ts-jest-resolver": "2.0.1", "ts-node": "10.9.1", From f222e8eed187de33e29be8a9bc545321b24b7462 Mon Sep 17 00:00:00 2001 From: marihachi Date: Fri, 22 Sep 2023 00:14:30 +0900 Subject: [PATCH 035/126] parser wip: statement --- src/parser/syntaxes.ts | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index bd666d96..87da6fe1 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -107,6 +107,18 @@ function parseStatement(s: TokenStream): Cst.Node { case TokenKind.At: { return parseFnDef(s); } + case TokenKind.Out: { + return parseOut(s); + } + case TokenKind.ReturnKeyword: { + return parseReturn(s); + } + // Attr + // Each + // For + case TokenKind.LoopKeyword: { + return parseLoop(s); + } case TokenKind.BreakKeyword: { s.next(); return NODE('break', {}); @@ -115,6 +127,7 @@ function parseStatement(s: TokenStream): Cst.Node { s.next(); return NODE('continue', {}); } + // Assign default: { return parseExpr(s); } From e7ec8adc42b34ea6dd7c4a17c2b0db087ea13728 Mon Sep 17 00:00:00 2001 From: marihachi Date: Fri, 22 Sep 2023 21:19:49 +0900 Subject: [PATCH 036/126] parser wip: array --- src/parser/syntaxes.ts | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index 87da6fe1..4783aacd 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -252,6 +252,9 @@ function parseExpr(s: TokenStream): Cst.Node { case TokenKind.Identifier: { return parseReference(s); } + case TokenKind.OpenBracket: { + return parseArray(s); + } default: { throw new Error('todo'); } @@ -341,8 +344,25 @@ function parseObject(s: TokenStream): Cst.Node { throw new Error('todo'); } +/** + * ```abnf + * Array = "[" *(Expr [","]) "]" + * ``` +*/ function parseArray(s: TokenStream): Cst.Node { - throw new Error('todo'); + s.nextWith(TokenKind.OpenBracket); + + const value = []; + while (s.kind !== TokenKind.CloseBracket) { + value.push(parseExpr(s)); + if (s.kind === TokenKind.Comma) { + s.next(); + } + } + + s.nextWith(TokenKind.CloseBracket); + + return NODE('arr', { value }); } //#endregion Expression From 69f17e489474dc2bdda3628a1d19dfc653b54499 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 23 Sep 2023 21:31:41 +0900 Subject: [PATCH 037/126] parser wip: tmpl, string literal --- src/parser/index.ts | 4 +- src/parser/syntaxes.ts | 105 +++++++----- src/parser/token-stream.ts | 332 +++++++++++++++++++++++++------------ src/parser/token.ts | 16 +- 4 files changed, 313 insertions(+), 144 deletions(-) diff --git a/src/parser/index.ts b/src/parser/index.ts index 2bdb8eea..dcc4a57d 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -1,4 +1,4 @@ -import { TokenStream } from './token-stream.js'; +import { SourceReader } from './token-stream.js'; import { parseTopLevel } from './syntaxes.js'; import { validateKeyword } from './plugins/validate-keyword.js'; @@ -56,7 +56,7 @@ export class Parser { public parse(input: string): Ast.Node[] { let nodes: Cst.Node[]; - const stream = new TokenStream(input); + const stream = new SourceReader(input); stream.init(); nodes = parseTopLevel(stream); diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index 4783aacd..80887b9b 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -1,7 +1,9 @@ import { AiScriptSyntaxError } from '../error.js'; -import type { Cst } from '../index.js'; -import type { TokenStream } from './token-stream.js'; import { TokenKind } from './token.js'; +import { TokenSequence } from './token-stream.js'; +import type { ITokenStream } from './token-stream.js'; + +import type * as Cst from './node.js'; //#region Top-level Statement @@ -10,7 +12,7 @@ import { TokenKind } from './token.js'; * TopLevel = *(Namespace / Meta / Statement) * ``` */ -export function parseTopLevel(s: TokenStream): Cst.Node[] { +export function parseTopLevel(s: ITokenStream): Cst.Node[] { const nodes: Cst.Node[] = []; while (s.kind !== TokenKind.EOF) { @@ -38,7 +40,7 @@ export function parseTopLevel(s: TokenStream): Cst.Node[] { * Namespace = "::" IDENT "{" *(VarDef / FnDef / Namespace) "}" * ``` */ -function parseNamespace(s: TokenStream): Cst.Node { +function parseNamespace(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.Colon2); s.expect(TokenKind.Identifier); @@ -74,7 +76,7 @@ function parseNamespace(s: TokenStream): Cst.Node { * Meta = "###" [IDENT] StaticLiteral * ``` */ -function parseMeta(s: TokenStream): Cst.Node { +function parseMeta(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.Sharp3); let name; @@ -98,7 +100,7 @@ function parseMeta(s: TokenStream): Cst.Node { * / Break / Continue / Assign / Expr * ``` */ -function parseStatement(s: TokenStream): Cst.Node { +function parseStatement(s: ITokenStream): Cst.Node { switch (s.token.kind) { case TokenKind.VarKeyword: case TokenKind.LetKeyword: { @@ -139,7 +141,7 @@ function parseStatement(s: TokenStream): Cst.Node { * VarDef = ("let" / "var") IDENT [":" Type] "=" Expr * ``` */ -function parseVarDef(s: TokenStream): Cst.Node { +function parseVarDef(s: ITokenStream): Cst.Node { let mut; switch (s.token.kind) { case TokenKind.LetKeyword: { @@ -178,7 +180,7 @@ function parseVarDef(s: TokenStream): Cst.Node { * Out = "<:" Expr * ``` */ -function parseOut(s: TokenStream): Cst.Node { +function parseOut(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.Out); const expr = parseExpr(s); return NODE('identifier', { @@ -187,15 +189,15 @@ function parseOut(s: TokenStream): Cst.Node { }); } -function parseAttr(s: TokenStream): Cst.Node { +function parseAttr(s: ITokenStream): Cst.Node { throw new Error('todo'); } -function parseEach(s: TokenStream): Cst.Node { +function parseEach(s: ITokenStream): Cst.Node { throw new Error('todo'); } -function parseFor(s: TokenStream): Cst.Node { +function parseFor(s: ITokenStream): Cst.Node { throw new Error('todo'); } @@ -204,7 +206,7 @@ function parseFor(s: TokenStream): Cst.Node { * Return = "return" Expr * ``` */ -function parseReturn(s: TokenStream): Cst.Node { +function parseReturn(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.ReturnKeyword); const expr = parseExpr(s); return NODE('return', { expr }); @@ -215,13 +217,13 @@ function parseReturn(s: TokenStream): Cst.Node { * Loop = "loop" Block * ``` */ -function parseLoop(s: TokenStream): Cst.Node { +function parseLoop(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.LoopKeyword); const statements = parseBlock(s); return NODE('loop', { statements }); } -function parseAssign(s: TokenStream): Cst.Node { +function parseAssign(s: ITokenStream): Cst.Node { throw new Error('todo'); } @@ -229,10 +231,10 @@ function parseAssign(s: TokenStream): Cst.Node { //#region Expression -function parseExpr(s: TokenStream): Cst.Node { +function parseExpr(s: ITokenStream): Cst.Node { // TODO: Pratt parsing - switch (s.token.kind) { + switch (s.kind) { case TokenKind.NumberLiteral: { // TODO: sign // TODO: validate value @@ -240,6 +242,39 @@ function parseExpr(s: TokenStream): Cst.Node { s.next(); return NODE('num', { value }); } + case TokenKind.StringLiteral: { + const value = s.token.value!; + s.next(); + return NODE('str', { value }); + } + case TokenKind.Template: { + const values: (string | Cst.Node)[] = []; + + for (const element of s.token.children!) { + switch (element.kind) { + case TokenKind.TemplateStringElement: { + values.push(NODE('str', { value: element.value! })); + break; + } + case TokenKind.TemplateExprElement: { + const exprStream = new TokenSequence(element.children!); + exprStream.init(); + const expr = parseExpr(exprStream); + if (exprStream.kind != TokenKind.EOF) { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[exprStream.token.kind]}`); + } + values.push(expr); + break; + } + default: { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[element.kind]}`); + } + } + } + + s.next(); + return NODE('tmpl', { tmpl: values }); + } case TokenKind.IfKeyword: { return parseIf(s); } @@ -266,7 +301,7 @@ function parseExpr(s: TokenStream): Cst.Node { * If = "if" Expr BlockOrStatement *("elif" Expr BlockOrStatement) ["else" BlockOrStatement] * ``` */ -function parseIf(s: TokenStream): Cst.Node { +function parseIf(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.IfKeyword); const cond = parseExpr(s); const then = parseBlockOrStatement(s); @@ -288,7 +323,7 @@ function parseIf(s: TokenStream): Cst.Node { return NODE('if', { cond, then, elseif, else: _else }); } -function parseMatch(s: TokenStream): Cst.Node { +function parseMatch(s: ITokenStream): Cst.Node { throw new Error('todo'); } @@ -297,7 +332,7 @@ function parseMatch(s: TokenStream): Cst.Node { * Eval = "eval" Block * ``` */ -function parseEval(s: TokenStream): Cst.Node { +function parseEval(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.EvalKeyword); const statements = parseBlock(s); return NODE('block', { statements }); @@ -308,7 +343,7 @@ function parseEval(s: TokenStream): Cst.Node { * Exists = "exists" Reference * ``` */ -function parseExists(s: TokenStream): Cst.Node { +function parseExists(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.ExistsKeyword); const identifier = parseReference(s); return NODE('exists', { identifier }); @@ -319,7 +354,7 @@ function parseExists(s: TokenStream): Cst.Node { * Reference = IDENT *(":" IDENT) * ``` */ -function parseReference(s: TokenStream): Cst.Node { +function parseReference(s: ITokenStream): Cst.Node { const segs: string[] = []; while (true) { if (segs.length > 0) { @@ -336,11 +371,7 @@ function parseReference(s: TokenStream): Cst.Node { return NODE('identifier', { name: segs.join(':') }); } -function parseTemplate(s: TokenStream): Cst.Node { - throw new Error('todo'); -} - -function parseObject(s: TokenStream): Cst.Node { +function parseObject(s: ITokenStream): Cst.Node { throw new Error('todo'); } @@ -349,7 +380,7 @@ function parseObject(s: TokenStream): Cst.Node { * Array = "[" *(Expr [","]) "]" * ``` */ -function parseArray(s: TokenStream): Cst.Node { +function parseArray(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.OpenBracket); const value = []; @@ -374,11 +405,11 @@ function parseArray(s: TokenStream): Cst.Node { * FnDef = "@" IDENT "(" Args ")" [":" Type] Block * ``` */ -function parseFnDef(s: TokenStream): Cst.Node { +function parseFnDef(s: ITokenStream): Cst.Node { throw new Error('todo'); } -function parseFnExpr(s: TokenStream): Cst.Node { +function parseFnExpr(s: ITokenStream): Cst.Node { throw new Error('todo'); } @@ -386,15 +417,15 @@ function parseFnExpr(s: TokenStream): Cst.Node { //#region Static Literal -function parseStaticLiteral(s: TokenStream): Cst.Node { +function parseStaticLiteral(s: ITokenStream): Cst.Node { throw new Error('todo'); } -function parseStaticArray(s: TokenStream): Cst.Node { +function parseStaticArray(s: ITokenStream): Cst.Node { throw new Error('todo'); } -function parseStaticObject(s: TokenStream): Cst.Node { +function parseStaticObject(s: ITokenStream): Cst.Node { throw new Error('todo'); } @@ -402,15 +433,15 @@ function parseStaticObject(s: TokenStream): Cst.Node { //#region Type -function parseType(s: TokenStream): Cst.Node { +function parseType(s: ITokenStream): Cst.Node { throw new Error('todo'); } -function parseFnType(s: TokenStream): Cst.Node { +function parseFnType(s: ITokenStream): Cst.Node { throw new Error('todo'); } -function parseNamedType(s: TokenStream): Cst.Node { +function parseNamedType(s: ITokenStream): Cst.Node { throw new Error('todo'); } @@ -435,7 +466,7 @@ function NODE(type: string, params: Record): Cst.Node { * Block = "{" *Statement "}" * ``` */ -function parseBlock(s: TokenStream): Cst.Node[] { +function parseBlock(s: ITokenStream): Cst.Node[] { s.nextWith(TokenKind.OpenBrace); const steps: Cst.Node[] = []; @@ -453,7 +484,7 @@ function parseBlock(s: TokenStream): Cst.Node[] { * BlockOrStatement = Block / Statement * ``` */ -function parseBlockOrStatement(s: TokenStream): Cst.Node { +function parseBlockOrStatement(s: ITokenStream): Cst.Node { if (s.kind === TokenKind.OpenBrace) { const statements = parseBlock(s); return NODE('block', { statements }); diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index 4015cbf8..0c47f7f7 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -6,7 +6,15 @@ const spacingChars = [' ', '\t', '\r', '\n']; const digit = /^[0-9]$/; const wordChar = /^[A-Za-z0-9_]$/; -export class TokenStream { +export interface ITokenStream { + expect(kind: TokenKind): void; + nextWith(kind: TokenKind): void; + get token(): Token; + get kind(): TokenKind; + next(): void; +} + +export class SourceReader implements ITokenStream { private source: string; private _token?: Token; private index: number; @@ -17,9 +25,9 @@ export class TokenStream { this.index = 0; } - public init() { + public init(): void { this.loadChar(); - this.next(); + this._token = this.nextToken(); } public expect(kind: TokenKind): void { @@ -54,7 +62,7 @@ export class TokenStream { public get token(): Token { if (this._token == null) { - throw new Error('invalid operation: token is not read yet'); + throw new Error('stream is not initialized yet'); } return this._token; } @@ -64,10 +72,18 @@ export class TokenStream { } public next(): void { + if (this._token == null) { + throw new Error('stream is not initialized yet'); + } + this._token = this.nextToken(); + } + + private nextToken(): Token { + let token; while (true) { // EOF terminated if (this.char == null) { - this._token = TOKEN(TokenKind.EOF); + token = TOKEN(TokenKind.EOF); break; } // skip spasing @@ -75,94 +91,94 @@ export class TokenStream { this.nextChar(); continue; } - let match = true; switch (this.char) { case '!': { this.nextChar(); if ((this.char as string) === '=') { this.nextChar(); - this._token = TOKEN(TokenKind.NotEq); + token = TOKEN(TokenKind.NotEq); } else { - this._token = TOKEN(TokenKind.Not); + token = TOKEN(TokenKind.Not); } break; } + case '"': { + this.nextChar(); + token = this.readStringLiteral(); + break; + } case '#': { this.nextChar(); if ((this.char as string) === '#') { this.nextChar(); if ((this.char as string) === '#') { this.nextChar(); - this._token = TOKEN(TokenKind.Sharp3); - } else { - match = false; + token = TOKEN(TokenKind.Sharp3); } } else if ((this.char as string) === '[') { this.nextChar(); - this._token = TOKEN(TokenKind.OpenSharpBracket); + token = TOKEN(TokenKind.OpenSharpBracket); } else { - this._token = TOKEN(TokenKind.Sharp); + token = TOKEN(TokenKind.Sharp); } break; } case '%': { this.nextChar(); - this._token = TOKEN(TokenKind.Percent); + token = TOKEN(TokenKind.Percent); break; } case '&': { this.nextChar(); if ((this.char as string) === '&') { this.nextChar(); - this._token = TOKEN(TokenKind.And2); - } else { - match = false; + token = TOKEN(TokenKind.And2); } break; } case '(': { this.nextChar(); - this._token = TOKEN(TokenKind.OpenParen); + token = TOKEN(TokenKind.OpenParen); break; } case ')': { this.nextChar(); - this._token = TOKEN(TokenKind.CloseParen); + token = TOKEN(TokenKind.CloseParen); break; } case '*': { this.nextChar(); - this._token = TOKEN(TokenKind.Asterisk); + token = TOKEN(TokenKind.Asterisk); break; } case '+': { this.nextChar(); if ((this.char as string) === '=') { this.nextChar(); - this._token = TOKEN(TokenKind.PlusEq); + token = TOKEN(TokenKind.PlusEq); } else { - this._token = TOKEN(TokenKind.Plus); + token = TOKEN(TokenKind.Plus); } break; } case ',': { this.nextChar(); - this._token = TOKEN(TokenKind.Comma); + token = TOKEN(TokenKind.Comma); break; } case '-': { this.nextChar(); if ((this.char as string) === '=') { this.nextChar(); - this._token = TOKEN(TokenKind.MinusEq); + token = TOKEN(TokenKind.MinusEq); } else { - this._token = TOKEN(TokenKind.Minus); + token = TOKEN(TokenKind.Minus); } break; } case '.': { this.nextChar(); - this._token = TOKEN(TokenKind.Dot); + token = TOKEN(TokenKind.Dot); break; } case '/': { @@ -176,7 +192,7 @@ export class TokenStream { this.skipCommentLine(); continue; } else { - this._token = TOKEN(TokenKind.Slash); + token = TOKEN(TokenKind.Slash); } break; } @@ -184,27 +200,27 @@ export class TokenStream { this.nextChar(); if ((this.char as string) === ':') { this.nextChar(); - this._token = TOKEN(TokenKind.Colon2); + token = TOKEN(TokenKind.Colon2); } else { - this._token = TOKEN(TokenKind.Colon); + token = TOKEN(TokenKind.Colon); } break; } case ';': { this.nextChar(); - this._token = TOKEN(TokenKind.SemiColon); + token = TOKEN(TokenKind.SemiColon); break; } case '<': { this.nextChar(); if ((this.char as string) === '=') { this.nextChar(); - this._token = TOKEN(TokenKind.LtEq); + token = TOKEN(TokenKind.LtEq); } else if ((this.char as string) === ':') { this.nextChar(); - this._token = TOKEN(TokenKind.Out); + token = TOKEN(TokenKind.Out); } else { - this._token = TOKEN(TokenKind.Lt); + token = TOKEN(TokenKind.Lt); } break; } @@ -212,12 +228,12 @@ export class TokenStream { this.nextChar(); if ((this.char as string) === '=') { this.nextChar(); - this._token = TOKEN(TokenKind.Eq2); + token = TOKEN(TokenKind.Eq2); } else if ((this.char as string) === '>') { this.nextChar(); - this._token = TOKEN(TokenKind.Arrow); + token = TOKEN(TokenKind.Arrow); } else { - this._token = TOKEN(TokenKind.Eq); + token = TOKEN(TokenKind.Eq); } break; } @@ -225,169 +241,234 @@ export class TokenStream { this.nextChar(); if ((this.char as string) === '=') { this.nextChar(); - this._token = TOKEN(TokenKind.GtEq); + token = TOKEN(TokenKind.GtEq); } else { - this._token = TOKEN(TokenKind.Gt); + token = TOKEN(TokenKind.Gt); } break; } case '@': { this.nextChar(); - this._token = TOKEN(TokenKind.At); + token = TOKEN(TokenKind.At); break; } case '[': { this.nextChar(); - this._token = TOKEN(TokenKind.OpenBracket); + token = TOKEN(TokenKind.OpenBracket); break; } case ']': { this.nextChar(); - this._token = TOKEN(TokenKind.CloseBracket); + token = TOKEN(TokenKind.CloseBracket); break; } case '^': { this.nextChar(); - this._token = TOKEN(TokenKind.Hat); + token = TOKEN(TokenKind.Hat); + break; + } + case '`': { + this.nextChar(); + token = this.readTemplate(); break; } case '{': { this.nextChar(); - this._token = TOKEN(TokenKind.OpenBrace); + token = TOKEN(TokenKind.OpenBrace); break; } case '|': { this.nextChar(); if ((this.char as string) === '|') { this.nextChar(); - this._token = TOKEN(TokenKind.Or2); - } else { - match = false; + token = TOKEN(TokenKind.Or2); } break; } case '}': { this.nextChar(); - this._token = TOKEN(TokenKind.CloseBrace); + token = TOKEN(TokenKind.CloseBrace); break; } - default: { - match = false; - } } - if (!match) { - if (this.readDigits()) { + if (token == null) { + const digitToken = this.tryReadDigits(); + if (digitToken) { + token = digitToken; break; } - if (this.readWord()) { + const wordToken = this.tryReadWord(); + if (wordToken) { + token = wordToken; break; } throw new AiScriptSyntaxError(`invalid character: "${this.char}"`); } break; } + return token; } - private readWord(): boolean { + private tryReadWord(): Token | undefined { // read a word - let word = ''; + let value = ''; while (this.char != null && wordChar.test(this.char)) { - word += this.char; + value += this.char; this.nextChar(); } - if (word.length === 0) { - return false; + if (value.length === 0) { + return; } // check word kind - switch (word) { + switch (value) { case 'null': { - this._token = TOKEN(TokenKind.NullKeyword); - break; + return TOKEN(TokenKind.NullKeyword); } case 'true': { - this._token = TOKEN(TokenKind.TrueKeyword); - break; + return TOKEN(TokenKind.TrueKeyword); } case 'false': { - this._token = TOKEN(TokenKind.FalseKeyword); - break; + return TOKEN(TokenKind.FalseKeyword); } case 'each': { - this._token = TOKEN(TokenKind.EachKeyword); - break; + return TOKEN(TokenKind.EachKeyword); } case 'for': { - this._token = TOKEN(TokenKind.ForKeyword); - break; + return TOKEN(TokenKind.ForKeyword); } case 'loop': { - this._token = TOKEN(TokenKind.LoopKeyword); - break; + return TOKEN(TokenKind.LoopKeyword); } case 'break': { - this._token = TOKEN(TokenKind.BreakKeyword); - break; + return TOKEN(TokenKind.BreakKeyword); } case 'continue': { - this._token = TOKEN(TokenKind.ContinueKeyword); - break; + return TOKEN(TokenKind.ContinueKeyword); } case 'match': { - this._token = TOKEN(TokenKind.MatchKeyword); - break; + return TOKEN(TokenKind.MatchKeyword); } case 'if': { - this._token = TOKEN(TokenKind.IfKeyword); - break; + return TOKEN(TokenKind.IfKeyword); } case 'elif': { - this._token = TOKEN(TokenKind.ElifKeyword); - break; + return TOKEN(TokenKind.ElifKeyword); } case 'else': { - this._token = TOKEN(TokenKind.ElseKeyword); - break; + return TOKEN(TokenKind.ElseKeyword); } case 'return': { - this._token = TOKEN(TokenKind.ReturnKeyword); - break; + return TOKEN(TokenKind.ReturnKeyword); } case 'eval': { - this._token = TOKEN(TokenKind.EvalKeyword); - break; + return TOKEN(TokenKind.EvalKeyword); } case 'var': { - this._token = TOKEN(TokenKind.VarKeyword); - break; + return TOKEN(TokenKind.VarKeyword); } case 'let': { - this._token = TOKEN(TokenKind.LetKeyword); - break; + return TOKEN(TokenKind.LetKeyword); } case 'exists': { - this._token = TOKEN(TokenKind.ExistsKeyword); - break; + return TOKEN(TokenKind.ExistsKeyword); } default: { - this._token = TOKEN(TokenKind.Identifier, word); - break; + return TOKEN(TokenKind.Identifier, { value }); } } - return true; } - private readDigits(): boolean { + private tryReadDigits(): Token | undefined { // TODO: float number - let digits = ''; + let value = ''; while (this.char != null && digit.test(this.char)) { - digits += this.char; + value += this.char; + this.nextChar(); + } + if (value.length === 0) { + return; + } + return TOKEN(TokenKind.NumberLiteral, { value }); + } + + private readStringLiteral(): Token { + let value = ''; + while (true) { + if (this.char == null) { + throw new AiScriptSyntaxError(`unexpected EOF`); + } + if (this.char === '"') { + this.nextChar(); + break; + } + value += this.char; this.nextChar(); } - if (digits.length === 0) { - return false; + return TOKEN(TokenKind.StringLiteral, { value }); + } + + private readTemplate(): Token { + const elements: Token[] = []; + let buf = ''; + let tokenBuf: Token[] = []; + let state: 'string' | 'expr' | 'finish' = 'string'; + + while (state != 'finish') { + switch (state) { + case 'string': { + // テンプレートの終了が無いままEOFに達した + if (this.char == null) { + throw new AiScriptSyntaxError(`unexpected EOF`); + } + // テンプレートの終了 + if (this.char == '`') { + this.nextChar(); + if (buf.length > 0) { + elements.push(TOKEN(TokenKind.TemplateStringElement, { value: buf })); + } + state = 'finish'; + break; + } + // 埋め込み式の開始 + if (this.char == '{') { + this.nextChar(); + if (buf.length > 0) { + elements.push(TOKEN(TokenKind.TemplateStringElement, { value: buf })); + buf = ''; + } + state = 'expr'; + break; + } + buf += this.char; + this.nextChar(); + break; + } + case 'expr': { + // 埋め込み式の終端記号が無いままEOFに達した + if (this.char == null) { + throw new AiScriptSyntaxError(`unexpected EOF`); + } + // skip spasing + if (spacingChars.includes(this.char)) { + this.nextChar(); + continue; + } + // 埋め込み式の終了 + if ((this.char as string) === '}') { + this.nextChar(); + elements.push(TOKEN(TokenKind.TemplateExprElement, { children: tokenBuf })); + tokenBuf = []; + state = 'string'; + break; + } + const token = this.nextToken(); + tokenBuf.push(token); + break; + } + } } - this._token = TOKEN(TokenKind.NumberLiteral, digits); - return true; + + return TOKEN(TokenKind.Template, { children: elements }); } private skipCommentLine() { @@ -420,3 +501,48 @@ export class TokenStream { } } } +export class TokenSequence implements ITokenStream { + private seq: Token[]; + private _token?: Token; + private index: number; + + constructor(sequence: TokenSequence['seq']) { + this.seq = sequence; + this.index = 0; + } + + public init() { + this.next(); + } + + public expect(kind: TokenKind): void { + if (this.kind !== kind) { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.token.kind]}`); + } + } + + public nextWith(kind: TokenKind): void { + this.expect(kind); + this.next(); + } + + public get token(): Token { + if (this._token == null) { + throw new Error('stream is not initialized yet'); + } + return this._token; + } + + public get kind(): TokenKind { + return this.token.kind; + } + + public next(): void { + if (this.index >= this.seq.length) { + this._token = TOKEN(TokenKind.EOF); + } else { + this._token = this.seq[this.index]; + this.index++; + } + } +} diff --git a/src/parser/token.ts b/src/parser/token.ts index 650082db..a8abbda5 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -6,6 +6,11 @@ export enum TokenKind { NumberLiteral, StringLiteral, + // template string + Template, + TemplateStringElement, + TemplateExprElement, + // keyword NullKeyword, TrueKeyword, @@ -100,10 +105,17 @@ export enum TokenKind { export class Token { constructor( public kind: TokenKind, + /** for number literal, string literal */ public value?: string, + /** for template syntax */ + public children?: Token[], ) { } } -export function TOKEN(kind: TokenKind, value?: Token['value']) { - return new Token(kind, value); +/** + * - opts.value: for number literal, string literal + * - opts.children: for template syntax +*/ +export function TOKEN(kind: TokenKind, opts?: { value?: Token['value'], children?: Token['children'] }) { + return new Token(kind, opts?.value, opts?.children); } From b70c35c228befaa56a1b74d9d250523f480c0aea Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 23 Sep 2023 21:38:05 +0900 Subject: [PATCH 038/126] lint --- src/parser/syntaxes.ts | 2 +- src/parser/token-stream.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index 80887b9b..b6af1b05 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -260,7 +260,7 @@ function parseExpr(s: ITokenStream): Cst.Node { const exprStream = new TokenSequence(element.children!); exprStream.init(); const expr = parseExpr(exprStream); - if (exprStream.kind != TokenKind.EOF) { + if (exprStream.kind !== TokenKind.EOF) { throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[exprStream.token.kind]}`); } values.push(expr); diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index 0c47f7f7..edc6f48d 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -413,7 +413,7 @@ export class SourceReader implements ITokenStream { let tokenBuf: Token[] = []; let state: 'string' | 'expr' | 'finish' = 'string'; - while (state != 'finish') { + while (state !== 'finish') { switch (state) { case 'string': { // テンプレートの終了が無いままEOFに達した From 706c99f23232fc38bd278c1fda1c625d642b2cef Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 09:21:57 +0900 Subject: [PATCH 039/126] refactor --- src/parser/index.ts | 8 +- src/parser/syntaxes.ts | 4 +- src/parser/token-stream.ts | 328 +++++++++++++++++++++---------------- 3 files changed, 194 insertions(+), 146 deletions(-) diff --git a/src/parser/index.ts b/src/parser/index.ts index dcc4a57d..4b9c79e9 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -1,4 +1,4 @@ -import { SourceReader } from './token-stream.js'; +import { Scanner } from './token-stream.js'; import { parseTopLevel } from './syntaxes.js'; import { validateKeyword } from './plugins/validate-keyword.js'; @@ -56,9 +56,9 @@ export class Parser { public parse(input: string): Ast.Node[] { let nodes: Cst.Node[]; - const stream = new SourceReader(input); - stream.init(); - nodes = parseTopLevel(stream); + const scanner = new Scanner(input); + scanner.init(); + nodes = parseTopLevel(scanner); // validate the node tree for (const plugin of this.plugins.validate) { diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index b6af1b05..1bd2a42d 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -1,6 +1,6 @@ import { AiScriptSyntaxError } from '../error.js'; import { TokenKind } from './token.js'; -import { TokenSequence } from './token-stream.js'; +import { TokenStream } from './token-stream.js'; import type { ITokenStream } from './token-stream.js'; import type * as Cst from './node.js'; @@ -257,7 +257,7 @@ function parseExpr(s: ITokenStream): Cst.Node { break; } case TokenKind.TemplateExprElement: { - const exprStream = new TokenSequence(element.children!); + const exprStream = new TokenStream(element.children!); exprStream.init(); const expr = parseExpr(exprStream); if (exprStream.kind !== TokenKind.EOF) { diff --git a/src/parser/token-stream.ts b/src/parser/token-stream.ts index edc6f48d..f0c4da22 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/token-stream.ts @@ -6,58 +6,88 @@ const spacingChars = [' ', '\t', '\r', '\n']; const digit = /^[0-9]$/; const wordChar = /^[A-Za-z0-9_]$/; -export interface ITokenStream { - expect(kind: TokenKind): void; - nextWith(kind: TokenKind): void; - get token(): Token; - get kind(): TokenKind; - next(): void; -} - -export class SourceReader implements ITokenStream { +/** + * 入力文字列から文字を読み取るクラス + * 通常はScannerクラスの内部で利用される。 +*/ +export class StringReader { private source: string; - private _token?: Token; private index: number; - private char?: string; + private _char?: string; - public constructor(source: string) { + constructor(source: string) { this.source = source; this.index = 0; } public init(): void { - this.loadChar(); - this._token = this.nextToken(); + this.load(); } - public expect(kind: TokenKind): void { - if (this.kind !== kind) { - throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.token.kind]}`); + public get eof(): boolean { + return (this.index >= this.source.length); + } + + public get char(): string { + if (this.eof) { + throw new Error('End of stream'); + } + if (this._char == null) { + throw new Error('stream is not initialized yet'); } + return this._char; } - public nextWith(kind: TokenKind): void { - this.expect(kind); - this.next(); + public next(): void { + if (!this.eof) { + this.index++; + } + this.load(); } - private get isEof(): boolean { - return (this.index >= this.source.length); + private load(): void { + if (!this.eof) { + this._char = this.source[this.index]; + } } +} - private loadChar(): void { - if (this.isEof) { - this.char = undefined; +/** + * トークンの読み取りに関するインターフェース +*/ +export interface ITokenStream { + get eof(): boolean; + get token(): Token; + get kind(): TokenKind; + next(): void; + expect(kind: TokenKind): void; + nextWith(kind: TokenKind): void; +} + +/** + * 入力文字列からトークンを読み取るクラス +*/ +export class Scanner implements ITokenStream { + private stream: StringReader; + private _token?: Token; + + constructor(source: string) + constructor(stream: StringReader) + constructor(x: string | StringReader) { + if (typeof x === 'string') { + this.stream = new StringReader(x); + this.stream.init(); } else { - this.char = this.source[this.index]; + this.stream = x; } } - private nextChar(): void { - if (!this.isEof) { - this.index++; - } - this.loadChar(); + public init(): void { + this._token = this.nextToken(); + } + + public get eof(): boolean { + return this.stream.eof; } public get token(): Token { @@ -78,24 +108,34 @@ export class SourceReader implements ITokenStream { this._token = this.nextToken(); } + public expect(kind: TokenKind): void { + if (this.kind !== kind) { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.token.kind]}`); + } + } + + public nextWith(kind: TokenKind): void { + this.expect(kind); + this.next(); + } + private nextToken(): Token { let token; while (true) { - // EOF terminated - if (this.char == null) { + if (this.stream.eof) { token = TOKEN(TokenKind.EOF); break; } // skip spasing - if (spacingChars.includes(this.char)) { - this.nextChar(); + if (spacingChars.includes(this.stream.char)) { + this.stream.next(); continue; } - switch (this.char) { + switch (this.stream.char) { case '!': { - this.nextChar(); - if ((this.char as string) === '=') { - this.nextChar(); + this.stream.next(); + if ((this.stream.char as string) === '=') { + this.stream.next(); token = TOKEN(TokenKind.NotEq); } else { token = TOKEN(TokenKind.Not); @@ -103,20 +143,20 @@ export class SourceReader implements ITokenStream { break; } case '"': { - this.nextChar(); + this.stream.next(); token = this.readStringLiteral(); break; } case '#': { - this.nextChar(); - if ((this.char as string) === '#') { - this.nextChar(); - if ((this.char as string) === '#') { - this.nextChar(); + this.stream.next(); + if ((this.stream.char as string) === '#') { + this.stream.next(); + if ((this.stream.char as string) === '#') { + this.stream.next(); token = TOKEN(TokenKind.Sharp3); } - } else if ((this.char as string) === '[') { - this.nextChar(); + } else if ((this.stream.char as string) === '[') { + this.stream.next(); token = TOKEN(TokenKind.OpenSharpBracket); } else { token = TOKEN(TokenKind.Sharp); @@ -124,37 +164,37 @@ export class SourceReader implements ITokenStream { break; } case '%': { - this.nextChar(); + this.stream.next(); token = TOKEN(TokenKind.Percent); break; } case '&': { - this.nextChar(); - if ((this.char as string) === '&') { - this.nextChar(); + this.stream.next(); + if ((this.stream.char as string) === '&') { + this.stream.next(); token = TOKEN(TokenKind.And2); } break; } case '(': { - this.nextChar(); + this.stream.next(); token = TOKEN(TokenKind.OpenParen); break; } case ')': { - this.nextChar(); + this.stream.next(); token = TOKEN(TokenKind.CloseParen); break; } case '*': { - this.nextChar(); + this.stream.next(); token = TOKEN(TokenKind.Asterisk); break; } case '+': { - this.nextChar(); - if ((this.char as string) === '=') { - this.nextChar(); + this.stream.next(); + if ((this.stream.char as string) === '=') { + this.stream.next(); token = TOKEN(TokenKind.PlusEq); } else { token = TOKEN(TokenKind.Plus); @@ -162,14 +202,14 @@ export class SourceReader implements ITokenStream { break; } case ',': { - this.nextChar(); + this.stream.next(); token = TOKEN(TokenKind.Comma); break; } case '-': { - this.nextChar(); - if ((this.char as string) === '=') { - this.nextChar(); + this.stream.next(); + if ((this.stream.char as string) === '=') { + this.stream.next(); token = TOKEN(TokenKind.MinusEq); } else { token = TOKEN(TokenKind.Minus); @@ -177,18 +217,18 @@ export class SourceReader implements ITokenStream { break; } case '.': { - this.nextChar(); + this.stream.next(); token = TOKEN(TokenKind.Dot); break; } case '/': { - this.nextChar(); - if ((this.char as string) === '*') { - this.nextChar(); + this.stream.next(); + if ((this.stream.char as string) === '*') { + this.stream.next(); this.skipCommentRange(); continue; - } else if ((this.char as string) === '/') { - this.nextChar(); + } else if ((this.stream.char as string) === '/') { + this.stream.next(); this.skipCommentLine(); continue; } else { @@ -197,9 +237,9 @@ export class SourceReader implements ITokenStream { break; } case ':': { - this.nextChar(); - if ((this.char as string) === ':') { - this.nextChar(); + this.stream.next(); + if ((this.stream.char as string) === ':') { + this.stream.next(); token = TOKEN(TokenKind.Colon2); } else { token = TOKEN(TokenKind.Colon); @@ -207,17 +247,17 @@ export class SourceReader implements ITokenStream { break; } case ';': { - this.nextChar(); + this.stream.next(); token = TOKEN(TokenKind.SemiColon); break; } case '<': { - this.nextChar(); - if ((this.char as string) === '=') { - this.nextChar(); + this.stream.next(); + if ((this.stream.char as string) === '=') { + this.stream.next(); token = TOKEN(TokenKind.LtEq); - } else if ((this.char as string) === ':') { - this.nextChar(); + } else if ((this.stream.char as string) === ':') { + this.stream.next(); token = TOKEN(TokenKind.Out); } else { token = TOKEN(TokenKind.Lt); @@ -225,12 +265,12 @@ export class SourceReader implements ITokenStream { break; } case '=': { - this.nextChar(); - if ((this.char as string) === '=') { - this.nextChar(); + this.stream.next(); + if ((this.stream.char as string) === '=') { + this.stream.next(); token = TOKEN(TokenKind.Eq2); - } else if ((this.char as string) === '>') { - this.nextChar(); + } else if ((this.stream.char as string) === '>') { + this.stream.next(); token = TOKEN(TokenKind.Arrow); } else { token = TOKEN(TokenKind.Eq); @@ -238,9 +278,9 @@ export class SourceReader implements ITokenStream { break; } case '>': { - this.nextChar(); - if ((this.char as string) === '=') { - this.nextChar(); + this.stream.next(); + if ((this.stream.char as string) === '=') { + this.stream.next(); token = TOKEN(TokenKind.GtEq); } else { token = TOKEN(TokenKind.Gt); @@ -248,45 +288,45 @@ export class SourceReader implements ITokenStream { break; } case '@': { - this.nextChar(); + this.stream.next(); token = TOKEN(TokenKind.At); break; } case '[': { - this.nextChar(); + this.stream.next(); token = TOKEN(TokenKind.OpenBracket); break; } case ']': { - this.nextChar(); + this.stream.next(); token = TOKEN(TokenKind.CloseBracket); break; } case '^': { - this.nextChar(); + this.stream.next(); token = TOKEN(TokenKind.Hat); break; } case '`': { - this.nextChar(); + this.stream.next(); token = this.readTemplate(); break; } case '{': { - this.nextChar(); + this.stream.next(); token = TOKEN(TokenKind.OpenBrace); break; } case '|': { - this.nextChar(); - if ((this.char as string) === '|') { - this.nextChar(); + this.stream.next(); + if ((this.stream.char as string) === '|') { + this.stream.next(); token = TOKEN(TokenKind.Or2); } break; } case '}': { - this.nextChar(); + this.stream.next(); token = TOKEN(TokenKind.CloseBrace); break; } @@ -302,7 +342,7 @@ export class SourceReader implements ITokenStream { token = wordToken; break; } - throw new AiScriptSyntaxError(`invalid character: "${this.char}"`); + throw new AiScriptSyntaxError(`invalid character: "${this.stream.char}"`); } break; } @@ -312,9 +352,9 @@ export class SourceReader implements ITokenStream { private tryReadWord(): Token | undefined { // read a word let value = ''; - while (this.char != null && wordChar.test(this.char)) { - value += this.char; - this.nextChar(); + while (this.stream.char != null && wordChar.test(this.stream.char)) { + value += this.stream.char; + this.stream.next(); } if (value.length === 0) { return; @@ -381,9 +421,9 @@ export class SourceReader implements ITokenStream { private tryReadDigits(): Token | undefined { // TODO: float number let value = ''; - while (this.char != null && digit.test(this.char)) { - value += this.char; - this.nextChar(); + while (this.stream.char != null && digit.test(this.stream.char)) { + value += this.stream.char; + this.stream.next(); } if (value.length === 0) { return; @@ -394,15 +434,15 @@ export class SourceReader implements ITokenStream { private readStringLiteral(): Token { let value = ''; while (true) { - if (this.char == null) { + if (this.stream.char == null) { throw new AiScriptSyntaxError(`unexpected EOF`); } - if (this.char === '"') { - this.nextChar(); + if (this.stream.char === '"') { + this.stream.next(); break; } - value += this.char; - this.nextChar(); + value += this.stream.char; + this.stream.next(); } return TOKEN(TokenKind.StringLiteral, { value }); } @@ -417,12 +457,12 @@ export class SourceReader implements ITokenStream { switch (state) { case 'string': { // テンプレートの終了が無いままEOFに達した - if (this.char == null) { + if (this.stream.eof) { throw new AiScriptSyntaxError(`unexpected EOF`); } // テンプレートの終了 - if (this.char == '`') { - this.nextChar(); + if (this.stream.char == '`') { + this.stream.next(); if (buf.length > 0) { elements.push(TOKEN(TokenKind.TemplateStringElement, { value: buf })); } @@ -430,8 +470,8 @@ export class SourceReader implements ITokenStream { break; } // 埋め込み式の開始 - if (this.char == '{') { - this.nextChar(); + if (this.stream.char == '{') { + this.stream.next(); if (buf.length > 0) { elements.push(TOKEN(TokenKind.TemplateStringElement, { value: buf })); buf = ''; @@ -439,23 +479,23 @@ export class SourceReader implements ITokenStream { state = 'expr'; break; } - buf += this.char; - this.nextChar(); + buf += this.stream.char; + this.stream.next(); break; } case 'expr': { // 埋め込み式の終端記号が無いままEOFに達した - if (this.char == null) { + if (this.stream.eof) { throw new AiScriptSyntaxError(`unexpected EOF`); } // skip spasing - if (spacingChars.includes(this.char)) { - this.nextChar(); + if (spacingChars.includes(this.stream.char)) { + this.stream.next(); continue; } // 埋め込み式の終了 - if ((this.char as string) === '}') { - this.nextChar(); + if ((this.stream.char as string) === '}') { + this.stream.next(); elements.push(TOKEN(TokenKind.TemplateExprElement, { children: tokenBuf })); tokenBuf = []; state = 'string'; @@ -473,40 +513,44 @@ export class SourceReader implements ITokenStream { private skipCommentLine() { while (true) { - if (this.char == null) { + if (this.stream.eof) { break; } - if (this.char === '\n') { - this.nextChar(); + if (this.stream.char === '\n') { + this.stream.next(); break; } - this.nextChar(); + this.stream.next(); } } private skipCommentRange() { while (true) { - if (this.char == null) { + if (this.stream.eof) { break; } - if (this.char === '*') { - this.nextChar(); - if ((this.char as string) === '/') { - this.nextChar(); + if (this.stream.char === '*') { + this.stream.next(); + if ((this.stream.char as string) === '/') { + this.stream.next(); break; } continue; } - this.nextChar(); + this.stream.next(); } } } -export class TokenSequence implements ITokenStream { + +/** + * 既に生成済みのトークン列からトークンを読み取るクラス +*/ +export class TokenStream implements ITokenStream { private seq: Token[]; private _token?: Token; private index: number; - constructor(sequence: TokenSequence['seq']) { + constructor(sequence: TokenStream['seq']) { this.seq = sequence; this.index = 0; } @@ -515,15 +559,8 @@ export class TokenSequence implements ITokenStream { this.next(); } - public expect(kind: TokenKind): void { - if (this.kind !== kind) { - throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.token.kind]}`); - } - } - - public nextWith(kind: TokenKind): void { - this.expect(kind); - this.next(); + public get eof(): boolean { + return (this.index >= this.seq.length); } public get token(): Token { @@ -538,11 +575,22 @@ export class TokenSequence implements ITokenStream { } public next(): void { - if (this.index >= this.seq.length) { + if (this.eof) { this._token = TOKEN(TokenKind.EOF); } else { this._token = this.seq[this.index]; this.index++; } } + + public expect(kind: TokenKind): void { + if (this.kind !== kind) { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.token.kind]}`); + } + } + + public nextWith(kind: TokenKind): void { + this.expect(kind); + this.next(); + } } From b5ac28caaed763af7dcbec57e6d7f8b15b6f773e Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 09:40:06 +0900 Subject: [PATCH 040/126] refactor --- src/parser/index.ts | 2 +- src/parser/{token-stream.ts => scanner.ts} | 121 +-------------------- src/parser/streams/char-stream.ts | 44 ++++++++ src/parser/streams/token-stream.ts | 68 ++++++++++++ src/parser/syntaxes.ts | 4 +- 5 files changed, 121 insertions(+), 118 deletions(-) rename src/parser/{token-stream.ts => scanner.ts} (82%) create mode 100644 src/parser/streams/char-stream.ts create mode 100644 src/parser/streams/token-stream.ts diff --git a/src/parser/index.ts b/src/parser/index.ts index 4b9c79e9..c77dc197 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -1,4 +1,4 @@ -import { Scanner } from './token-stream.js'; +import { Scanner } from './scanner.js'; import { parseTopLevel } from './syntaxes.js'; import { validateKeyword } from './plugins/validate-keyword.js'; diff --git a/src/parser/token-stream.ts b/src/parser/scanner.ts similarity index 82% rename from src/parser/token-stream.ts rename to src/parser/scanner.ts index f0c4da22..4437e391 100644 --- a/src/parser/token-stream.ts +++ b/src/parser/scanner.ts @@ -1,4 +1,6 @@ import { AiScriptSyntaxError } from '../error.js'; +import { CharStream } from './streams/char-stream.js'; +import { ITokenStream } from './streams/token-stream.js'; import { TOKEN, TokenKind } from './token.js'; import type { Token } from './token.js'; @@ -6,76 +8,18 @@ const spacingChars = [' ', '\t', '\r', '\n']; const digit = /^[0-9]$/; const wordChar = /^[A-Za-z0-9_]$/; -/** - * 入力文字列から文字を読み取るクラス - * 通常はScannerクラスの内部で利用される。 -*/ -export class StringReader { - private source: string; - private index: number; - private _char?: string; - - constructor(source: string) { - this.source = source; - this.index = 0; - } - - public init(): void { - this.load(); - } - - public get eof(): boolean { - return (this.index >= this.source.length); - } - - public get char(): string { - if (this.eof) { - throw new Error('End of stream'); - } - if (this._char == null) { - throw new Error('stream is not initialized yet'); - } - return this._char; - } - - public next(): void { - if (!this.eof) { - this.index++; - } - this.load(); - } - - private load(): void { - if (!this.eof) { - this._char = this.source[this.index]; - } - } -} - -/** - * トークンの読み取りに関するインターフェース -*/ -export interface ITokenStream { - get eof(): boolean; - get token(): Token; - get kind(): TokenKind; - next(): void; - expect(kind: TokenKind): void; - nextWith(kind: TokenKind): void; -} - /** * 入力文字列からトークンを読み取るクラス */ export class Scanner implements ITokenStream { - private stream: StringReader; + private stream: CharStream; private _token?: Token; constructor(source: string) - constructor(stream: StringReader) - constructor(x: string | StringReader) { + constructor(stream: CharStream) + constructor(x: string | CharStream) { if (typeof x === 'string') { - this.stream = new StringReader(x); + this.stream = new CharStream(x); this.stream.init(); } else { this.stream = x; @@ -541,56 +485,3 @@ export class Scanner implements ITokenStream { } } } - -/** - * 既に生成済みのトークン列からトークンを読み取るクラス -*/ -export class TokenStream implements ITokenStream { - private seq: Token[]; - private _token?: Token; - private index: number; - - constructor(sequence: TokenStream['seq']) { - this.seq = sequence; - this.index = 0; - } - - public init() { - this.next(); - } - - public get eof(): boolean { - return (this.index >= this.seq.length); - } - - public get token(): Token { - if (this._token == null) { - throw new Error('stream is not initialized yet'); - } - return this._token; - } - - public get kind(): TokenKind { - return this.token.kind; - } - - public next(): void { - if (this.eof) { - this._token = TOKEN(TokenKind.EOF); - } else { - this._token = this.seq[this.index]; - this.index++; - } - } - - public expect(kind: TokenKind): void { - if (this.kind !== kind) { - throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.token.kind]}`); - } - } - - public nextWith(kind: TokenKind): void { - this.expect(kind); - this.next(); - } -} diff --git a/src/parser/streams/char-stream.ts b/src/parser/streams/char-stream.ts new file mode 100644 index 00000000..efb5a919 --- /dev/null +++ b/src/parser/streams/char-stream.ts @@ -0,0 +1,44 @@ +/** + * 入力文字列から文字を読み取るクラス +*/ +export class CharStream { + private source: string; + private index: number; + private _char?: string; + + constructor(source: string) { + this.source = source; + this.index = 0; + } + + public init(): void { + this.load(); + } + + public get eof(): boolean { + return (this.index >= this.source.length); + } + + public get char(): string { + if (this.eof) { + throw new Error('End of stream'); + } + if (this._char == null) { + throw new Error('stream is not initialized yet'); + } + return this._char; + } + + public next(): void { + if (!this.eof) { + this.index++; + } + this.load(); + } + + private load(): void { + if (!this.eof) { + this._char = this.source[this.index]; + } + } +} diff --git a/src/parser/streams/token-stream.ts b/src/parser/streams/token-stream.ts new file mode 100644 index 00000000..cd33f450 --- /dev/null +++ b/src/parser/streams/token-stream.ts @@ -0,0 +1,68 @@ +import { AiScriptSyntaxError } from '../../error.js'; +import { TOKEN, TokenKind } from '../token.js'; +import type { Token } from '../token.js'; + +/** + * トークンの読み取りに関するインターフェース +*/ +export interface ITokenStream { + get eof(): boolean; + get token(): Token; + get kind(): TokenKind; + next(): void; + expect(kind: TokenKind): void; + nextWith(kind: TokenKind): void; +} + +/** + * トークン列からトークンを読み取るクラス +*/ +export class TokenStream implements ITokenStream { + private seq: Token[]; + private _token?: Token; + private index: number; + + constructor(sequence: TokenStream['seq']) { + this.seq = sequence; + this.index = 0; + } + + public init() { + this.next(); + } + + public get eof(): boolean { + return (this.index >= this.seq.length); + } + + public get token(): Token { + if (this._token == null) { + throw new Error('stream is not initialized yet'); + } + return this._token; + } + + public get kind(): TokenKind { + return this.token.kind; + } + + public next(): void { + if (this.eof) { + this._token = TOKEN(TokenKind.EOF); + } else { + this._token = this.seq[this.index]; + this.index++; + } + } + + public expect(kind: TokenKind): void { + if (this.kind !== kind) { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.token.kind]}`); + } + } + + public nextWith(kind: TokenKind): void { + this.expect(kind); + this.next(); + } +} diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index 1bd2a42d..c5e6f4a5 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -1,7 +1,7 @@ import { AiScriptSyntaxError } from '../error.js'; import { TokenKind } from './token.js'; -import { TokenStream } from './token-stream.js'; -import type { ITokenStream } from './token-stream.js'; +import { TokenStream } from './streams/token-stream.js'; +import type { ITokenStream } from './streams/token-stream.js'; import type * as Cst from './node.js'; From d13c3da9669969e36ac76ea9a8df651e3ba1473f Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 10:26:09 +0900 Subject: [PATCH 041/126] fix test --- test/parser.ts | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/parser.ts b/test/parser.ts index c4a866be..5a227b5c 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -1,21 +1,21 @@ import * as assert from 'assert'; -import { TokenStream } from '../src/parser/token-stream'; +import { Scanner } from '../src/parser/scanner'; import { TOKEN, TokenKind } from '../src/parser/token'; -describe('TokenStream', () => { +describe('Scanner', () => { function init(source: string) { - const stream = new TokenStream(source); + const stream = new Scanner(source); stream.init(); return stream; } - function next(stream: TokenStream, kind: TokenKind, value?: string) { - assert.deepStrictEqual(stream.token, TOKEN(kind, value)); + function next(stream: Scanner, kind: TokenKind, value?: string) { + assert.deepStrictEqual(stream.token, TOKEN(kind, { value })); stream.next(); } test.concurrent('can get a token after init', async () => { const source = ''; - const stream = new TokenStream(source); + const stream = new Scanner(source); try { stream.token; assert.fail(); @@ -43,7 +43,7 @@ describe('TokenStream', () => { }); test.concurrent('invalid token', async () => { const source = '$'; - const stream = new TokenStream(source); + const stream = new Scanner(source); try { stream.init(); assert.fail(); From 169d951ceb84efeb6a9b60aa32ca803c8a1d390b Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 10:27:01 +0900 Subject: [PATCH 042/126] debug --- src/parser/scanner.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 4437e391..32f9fc4c 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -296,7 +296,7 @@ export class Scanner implements ITokenStream { private tryReadWord(): Token | undefined { // read a word let value = ''; - while (this.stream.char != null && wordChar.test(this.stream.char)) { + while (!this.stream.eof && wordChar.test(this.stream.char)) { value += this.stream.char; this.stream.next(); } @@ -365,7 +365,7 @@ export class Scanner implements ITokenStream { private tryReadDigits(): Token | undefined { // TODO: float number let value = ''; - while (this.stream.char != null && digit.test(this.stream.char)) { + while (!this.stream.eof && digit.test(this.stream.char)) { value += this.stream.char; this.stream.next(); } @@ -378,7 +378,7 @@ export class Scanner implements ITokenStream { private readStringLiteral(): Token { let value = ''; while (true) { - if (this.stream.char == null) { + if (this.stream.eof) { throw new AiScriptSyntaxError(`unexpected EOF`); } if (this.stream.char === '"') { From b4050193fd5a770330ea526d4bda9333020ae338 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 10:31:04 +0900 Subject: [PATCH 043/126] lint --- src/parser/scanner.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 32f9fc4c..62974964 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -1,6 +1,6 @@ import { AiScriptSyntaxError } from '../error.js'; import { CharStream } from './streams/char-stream.js'; -import { ITokenStream } from './streams/token-stream.js'; +import type { ITokenStream } from './streams/token-stream.js'; import { TOKEN, TokenKind } from './token.js'; import type { Token } from './token.js'; @@ -405,7 +405,7 @@ export class Scanner implements ITokenStream { throw new AiScriptSyntaxError(`unexpected EOF`); } // テンプレートの終了 - if (this.stream.char == '`') { + if (this.stream.char === '`') { this.stream.next(); if (buf.length > 0) { elements.push(TOKEN(TokenKind.TemplateStringElement, { value: buf })); @@ -414,7 +414,7 @@ export class Scanner implements ITokenStream { break; } // 埋め込み式の開始 - if (this.stream.char == '{') { + if (this.stream.char === '{') { this.stream.next(); if (buf.length > 0) { elements.push(TOKEN(TokenKind.TemplateStringElement, { value: buf })); From d9b2aeb6ec691926a2ebdbd4db22b5162242cbb2 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 11:12:29 +0900 Subject: [PATCH 044/126] refactor --- src/parser/streams/char-stream.ts | 3 ++- src/parser/streams/token-stream.ts | 32 ++++++++++++++++++++---------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/parser/streams/char-stream.ts b/src/parser/streams/char-stream.ts index efb5a919..4f5f2012 100644 --- a/src/parser/streams/char-stream.ts +++ b/src/parser/streams/char-stream.ts @@ -21,9 +21,10 @@ export class CharStream { public get char(): string { if (this.eof) { - throw new Error('End of stream'); + throw new Error('end of stream'); } if (this._char == null) { + // EOFではない時にnullだったらinitされていない throw new Error('stream is not initialized yet'); } return this._char; diff --git a/src/parser/streams/token-stream.ts b/src/parser/streams/token-stream.ts index cd33f450..de312138 100644 --- a/src/parser/streams/token-stream.ts +++ b/src/parser/streams/token-stream.ts @@ -18,27 +18,31 @@ export interface ITokenStream { * トークン列からトークンを読み取るクラス */ export class TokenStream implements ITokenStream { - private seq: Token[]; - private _token?: Token; + private source: Token[]; private index: number; + private _token?: Token; - constructor(sequence: TokenStream['seq']) { - this.seq = sequence; + constructor(source: TokenStream['source']) { + this.source = source; this.index = 0; } public init() { - this.next(); + this.load(); } public get eof(): boolean { - return (this.index >= this.seq.length); + return (this.index >= this.source.length); } public get token(): Token { if (this._token == null) { + // EOFトークンさえも入っていなかったらinitされていない throw new Error('stream is not initialized yet'); } + if (this.eof) { + throw new Error('end of stream'); + } return this._token; } @@ -47,17 +51,15 @@ export class TokenStream implements ITokenStream { } public next(): void { - if (this.eof) { - this._token = TOKEN(TokenKind.EOF); - } else { - this._token = this.seq[this.index]; + if (!this.eof) { this.index++; } + this.load(); } public expect(kind: TokenKind): void { if (this.kind !== kind) { - throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.token.kind]}`); + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.kind]}`); } } @@ -65,4 +67,12 @@ export class TokenStream implements ITokenStream { this.expect(kind); this.next(); } + + private load(): void { + if (this.eof) { + this._token = TOKEN(TokenKind.EOF); + } else { + this._token = this.source[this.index]; + } + } } From e0f9035b4eb888b5809815d511e32f4f7fc9e98b Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 11:26:57 +0900 Subject: [PATCH 045/126] refactor --- src/parser/scanner.ts | 8 ++++---- src/parser/syntaxes.ts | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 62974964..1aacddf8 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -27,7 +27,7 @@ export class Scanner implements ITokenStream { } public init(): void { - this._token = this.nextToken(); + this._token = this.readToken(); } public get eof(): boolean { @@ -49,7 +49,7 @@ export class Scanner implements ITokenStream { if (this._token == null) { throw new Error('stream is not initialized yet'); } - this._token = this.nextToken(); + this._token = this.readToken(); } public expect(kind: TokenKind): void { @@ -63,7 +63,7 @@ export class Scanner implements ITokenStream { this.next(); } - private nextToken(): Token { + private readToken(): Token { let token; while (true) { if (this.stream.eof) { @@ -445,7 +445,7 @@ export class Scanner implements ITokenStream { state = 'string'; break; } - const token = this.nextToken(); + const token = this.readToken(); tokenBuf.push(token); break; } diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index c5e6f4a5..94a03f68 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -257,6 +257,7 @@ function parseExpr(s: ITokenStream): Cst.Node { break; } case TokenKind.TemplateExprElement: { + // スキャナで埋め込み式として事前に読み取っておいたトークン列をパースする const exprStream = new TokenStream(element.children!); exprStream.init(); const expr = parseExpr(exprStream); From 442ff8df3b752f6a0e548224000aeb6f0f7faca2 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 11:44:07 +0900 Subject: [PATCH 046/126] parser wip: each --- src/parser/syntaxes.ts | 49 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index 94a03f68..7d48e419 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -116,7 +116,9 @@ function parseStatement(s: ITokenStream): Cst.Node { return parseReturn(s); } // Attr - // Each + case TokenKind.EachKeyword: { + return parseEach(s); + } // For case TokenKind.LoopKeyword: { return parseLoop(s); @@ -189,12 +191,45 @@ function parseOut(s: ITokenStream): Cst.Node { }); } -function parseAttr(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - +/** + * ```abnf + * Each = "each" "let" IDENT [","] Expr BlockOrStatement + * / "each" "(" "let" IDENT [","] Expr ")" BlockOrStatement + * ``` +*/ function parseEach(s: ITokenStream): Cst.Node { - throw new Error('todo'); + let hasParen = false; + + s.nextWith(TokenKind.EachKeyword); + + if (s.kind === TokenKind.OpenParen) { + hasParen = true; + s.next(); + } + + s.nextWith(TokenKind.LetKeyword); + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + if (s.kind == TokenKind.Comma) { + s.next(); + } + + const items = parseExpr(s); + + if (hasParen) { + s.nextWith(TokenKind.CloseParen); + } + + const body = parseBlockOrStatement(s); + + return NODE('each', { + var: name, + items: items, + for: body, + }); } function parseFor(s: ITokenStream): Cst.Node { @@ -234,6 +269,8 @@ function parseAssign(s: ITokenStream): Cst.Node { function parseExpr(s: ITokenStream): Cst.Node { // TODO: Pratt parsing + // prefix: attr + switch (s.kind) { case TokenKind.NumberLiteral: { // TODO: sign From 771b54ad96f861422d017529150902e1eb136e69 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 12:07:33 +0900 Subject: [PATCH 047/126] parser wip: for --- src/parser/syntaxes.ts | 62 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index 7d48e419..94a1a7b9 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -119,7 +119,9 @@ function parseStatement(s: ITokenStream): Cst.Node { case TokenKind.EachKeyword: { return parseEach(s); } - // For + case TokenKind.ForKeyword: { + return parseFor(s); + } case TokenKind.LoopKeyword: { return parseLoop(s); } @@ -233,7 +235,61 @@ function parseEach(s: ITokenStream): Cst.Node { } function parseFor(s: ITokenStream): Cst.Node { - throw new Error('todo'); + let hasParen = false; + + s.nextWith(TokenKind.ForKeyword); + + if (s.kind === TokenKind.OpenParen) { + hasParen = true; + s.next(); + } + + if (s.kind == TokenKind.LetKeyword) { + // range syntax + s.next(); + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + let from; + if ((s.kind as TokenKind) == TokenKind.Eq) { + s.next(); + from = parseExpr(s); + } else { + from = NODE('num', { value: 0 }); + } + + const to = parseExpr(s); + + if (hasParen) { + s.nextWith(TokenKind.CloseParen); + } + + const body = parseBlockOrStatement(s); + + return NODE('for', { + var: name, + from, + to, + for: body, + }); + } else { + // times syntax + + const times = parseExpr(s); + + if (hasParen) { + s.nextWith(TokenKind.CloseParen); + } + + const body = parseBlockOrStatement(s); + + return NODE('for', { + times, + for: body, + }); + } } /** @@ -269,8 +325,6 @@ function parseAssign(s: ITokenStream): Cst.Node { function parseExpr(s: ITokenStream): Cst.Node { // TODO: Pratt parsing - // prefix: attr - switch (s.kind) { case TokenKind.NumberLiteral: { // TODO: sign From 9f39bc08815c5238f74bbd45945aea19d0f0eb27 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 13:58:03 +0900 Subject: [PATCH 048/126] assign, fn expr, exprs --- src/parser/scanner.ts | 7 ++- src/parser/syntaxes.ts | 98 ++++++++++++++++++++++++++++++++---------- src/parser/token.ts | 2 + 3 files changed, 84 insertions(+), 23 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 1aacddf8..3c67d69c 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -233,7 +233,12 @@ export class Scanner implements ITokenStream { } case '@': { this.stream.next(); - token = TOKEN(TokenKind.At); + if ((this.stream.char as string) == '(') { + this.stream.next(); + token = TOKEN(TokenKind.OpenAtParen); + } else { + token = TOKEN(TokenKind.At); + } break; } case '[': { diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts index 94a1a7b9..642ff6ae 100644 --- a/src/parser/syntaxes.ts +++ b/src/parser/syntaxes.ts @@ -133,9 +133,13 @@ function parseStatement(s: ITokenStream): Cst.Node { s.next(); return NODE('continue', {}); } - // Assign default: { - return parseExpr(s); + const expr = parseExpr(s); + const assign = tryParseAssign(s, expr); + if (assign) { + return assign; + } + return expr; } } } @@ -314,8 +318,33 @@ function parseLoop(s: ITokenStream): Cst.Node { return NODE('loop', { statements }); } -function parseAssign(s: ITokenStream): Cst.Node { - throw new Error('todo'); +/** + * ```abnf + * Assign = Expr ("=" / "+=" / "-=") Expr + * ``` +*/ +function tryParseAssign(s: ITokenStream, dest: Cst.Node): Cst.Node | undefined { + // Assign + switch (s.kind) { + case TokenKind.Eq: { + s.next(); + const expr = parseExpr(s); + return NODE('assign', { dest, expr }); + } + case TokenKind.PlusEq: { + s.next(); + const expr = parseExpr(s); + return NODE('addAssign', { dest, expr }); + } + case TokenKind.MinusEq: { + s.next(); + const expr = parseExpr(s); + return NODE('subAssign', { dest, expr }); + } + default: { + return; + } + } } //#endregion Statement @@ -325,18 +354,28 @@ function parseAssign(s: ITokenStream): Cst.Node { function parseExpr(s: ITokenStream): Cst.Node { // TODO: Pratt parsing + // prefix: not + // prefix: sign + // infix + // call chain + // prop chain + // index chain + switch (s.kind) { - case TokenKind.NumberLiteral: { - // TODO: sign - // TODO: validate value - const value = Number(s.token.value!); - s.next(); - return NODE('num', { value }); + case TokenKind.IfKeyword: { + return parseIf(s); } - case TokenKind.StringLiteral: { - const value = s.token.value!; - s.next(); - return NODE('str', { value }); + case TokenKind.OpenAtParen: { + return parseFnExpr(s); + } + case TokenKind.MatchKeyword: { + return parseMatch(s); + } + case TokenKind.EvalKeyword: { + return parseEval(s); + } + case TokenKind.ExistsKeyword: { + return parseExists(s); } case TokenKind.Template: { const values: (string | Cst.Node)[] = []; @@ -367,21 +406,36 @@ function parseExpr(s: ITokenStream): Cst.Node { s.next(); return NODE('tmpl', { tmpl: values }); } - case TokenKind.IfKeyword: { - return parseIf(s); + case TokenKind.StringLiteral: { + const value = s.token.value!; + s.next(); + return NODE('str', { value }); } - case TokenKind.EvalKeyword: { - return parseEval(s); + case TokenKind.NumberLiteral: { + // TODO: validate number value + const value = Number(s.token.value!); + s.next(); + return NODE('num', { value }); } - case TokenKind.ExistsKeyword: { - return parseExists(s); + case TokenKind.TrueKeyword: + case TokenKind.FalseKeyword: { + const value = (s.kind === TokenKind.TrueKeyword); + s.next(); + return NODE('bool', { value }); } - case TokenKind.Identifier: { - return parseReference(s); + case TokenKind.NullKeyword: { + s.next(); + return NODE('null', { }); + } + case TokenKind.OpenBrace: { + return parseObject(s); } case TokenKind.OpenBracket: { return parseArray(s); } + case TokenKind.Identifier: { + return parseReference(s); + } default: { throw new Error('todo'); } diff --git a/src/parser/token.ts b/src/parser/token.ts index a8abbda5..34231ee3 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -88,6 +88,8 @@ export enum TokenKind { GtEq, /** "@" */ At, + /** "@(" */ + OpenAtParen, /** "[" */ OpenBracket, /** "]" */ From 2a3be46a333425638c3351bd87e0b14f0120ba85 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 14:37:32 +0900 Subject: [PATCH 049/126] split source files --- src/parser/index.ts | 2 +- src/parser/node.ts | 12 + src/parser/syntaxes.ts | 642 ----------------------------- src/parser/syntaxes/common.ts | 39 ++ src/parser/syntaxes/expressions.ts | 201 +++++++++ src/parser/syntaxes/function.ts | 19 + src/parser/syntaxes/statements.ts | 277 +++++++++++++ src/parser/syntaxes/toplevel.ts | 108 +++++ 8 files changed, 657 insertions(+), 643 deletions(-) delete mode 100644 src/parser/syntaxes.ts create mode 100644 src/parser/syntaxes/common.ts create mode 100644 src/parser/syntaxes/expressions.ts create mode 100644 src/parser/syntaxes/function.ts create mode 100644 src/parser/syntaxes/statements.ts create mode 100644 src/parser/syntaxes/toplevel.ts diff --git a/src/parser/index.ts b/src/parser/index.ts index c77dc197..d48c2b46 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -1,5 +1,5 @@ import { Scanner } from './scanner.js'; -import { parseTopLevel } from './syntaxes.js'; +import { parseTopLevel } from './syntaxes/toplevel.js'; import { validateKeyword } from './plugins/validate-keyword.js'; import { validateType } from './plugins/validate-type.js'; diff --git a/src/parser/node.ts b/src/parser/node.ts index b9a2dd77..544028ce 100644 --- a/src/parser/node.ts +++ b/src/parser/node.ts @@ -8,6 +8,18 @@ export type Node = Namespace | Meta | Statement | Expression | ChainMember | TypeSource; +export function NODE(type: string, params: Record): Node { + const node: Record = { type }; + //params.children; + for (const key of Object.keys(params)) { + if (params[key] !== undefined) { + node[key] = params[key]; + } + } + //node.loc = { start, end }; + return node as Node; +} + export type Statement = Definition | Return | diff --git a/src/parser/syntaxes.ts b/src/parser/syntaxes.ts deleted file mode 100644 index 642ff6ae..00000000 --- a/src/parser/syntaxes.ts +++ /dev/null @@ -1,642 +0,0 @@ -import { AiScriptSyntaxError } from '../error.js'; -import { TokenKind } from './token.js'; -import { TokenStream } from './streams/token-stream.js'; -import type { ITokenStream } from './streams/token-stream.js'; - -import type * as Cst from './node.js'; - -//#region Top-level Statement - -/** - * ```abnf - * TopLevel = *(Namespace / Meta / Statement) - * ``` -*/ -export function parseTopLevel(s: ITokenStream): Cst.Node[] { - const nodes: Cst.Node[] = []; - - while (s.kind !== TokenKind.EOF) { - switch (s.token.kind) { - case TokenKind.Colon2: { - nodes.push(parseNamespace(s)); - break; - } - case TokenKind.Sharp3: { - nodes.push(parseMeta(s)); - break; - } - default: { - nodes.push(parseStatement(s)); - break; - } - } - } - - return nodes; -} - -/** - * ```abnf - * Namespace = "::" IDENT "{" *(VarDef / FnDef / Namespace) "}" - * ``` -*/ -function parseNamespace(s: ITokenStream): Cst.Node { - s.nextWith(TokenKind.Colon2); - - s.expect(TokenKind.Identifier); - const name = s.token.value!; - s.next(); - - const members: Cst.Node[] = []; - s.nextWith(TokenKind.OpenBrace); - while (s.kind !== TokenKind.CloseBrace) { - switch (s.token.kind) { - case TokenKind.VarKeyword: - case TokenKind.LetKeyword: { - members.push(parseVarDef(s)); - break; - } - case TokenKind.At: { - members.push(parseFnDef(s)); - break; - } - case TokenKind.Colon2: { - members.push(parseNamespace(s)); - break; - } - } - } - s.nextWith(TokenKind.CloseBrace); - - return NODE('ns', { name, members }); -} - -/** - * ```abnf - * Meta = "###" [IDENT] StaticLiteral - * ``` -*/ -function parseMeta(s: ITokenStream): Cst.Node { - s.nextWith(TokenKind.Sharp3); - - let name; - if (s.kind === TokenKind.Identifier) { - name = s.token.value; - s.next(); - } - - const value = parseStaticLiteral(s); - - return NODE('meta', { name, value }); -} - -//#endregion Top-level Statement - -//#region Statement - -/** - * ```abnf - * Statement = VarDef / FnDef / Out / Return / Attr / Each / For / Loop - * / Break / Continue / Assign / Expr - * ``` -*/ -function parseStatement(s: ITokenStream): Cst.Node { - switch (s.token.kind) { - case TokenKind.VarKeyword: - case TokenKind.LetKeyword: { - return parseVarDef(s); - } - case TokenKind.At: { - return parseFnDef(s); - } - case TokenKind.Out: { - return parseOut(s); - } - case TokenKind.ReturnKeyword: { - return parseReturn(s); - } - // Attr - case TokenKind.EachKeyword: { - return parseEach(s); - } - case TokenKind.ForKeyword: { - return parseFor(s); - } - case TokenKind.LoopKeyword: { - return parseLoop(s); - } - case TokenKind.BreakKeyword: { - s.next(); - return NODE('break', {}); - } - case TokenKind.ContinueKeyword: { - s.next(); - return NODE('continue', {}); - } - default: { - const expr = parseExpr(s); - const assign = tryParseAssign(s, expr); - if (assign) { - return assign; - } - return expr; - } - } -} - -/** - * ```abnf - * VarDef = ("let" / "var") IDENT [":" Type] "=" Expr - * ``` -*/ -function parseVarDef(s: ITokenStream): Cst.Node { - let mut; - switch (s.token.kind) { - case TokenKind.LetKeyword: { - mut = false; - break; - } - case TokenKind.VarKeyword: { - mut = true; - break; - } - default: { - throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[s.token.kind]}`); - } - } - s.next(); - - s.expect(TokenKind.Identifier); - const name = s.token.value!; - s.next(); - - let ty; - if (s.kind === TokenKind.Colon) { - s.next(); - ty = parseType(s); - } - - s.nextWith(TokenKind.Eq); - - const expr = parseExpr(s); - - return NODE('def', { name, varType: ty, expr, mut, attr: [] }); -} - -/** - * ```abnf - * Out = "<:" Expr - * ``` -*/ -function parseOut(s: ITokenStream): Cst.Node { - s.nextWith(TokenKind.Out); - const expr = parseExpr(s); - return NODE('identifier', { - name: 'print', - chain: [NODE('callChain', { args: [expr] })], - }); -} - -/** - * ```abnf - * Each = "each" "let" IDENT [","] Expr BlockOrStatement - * / "each" "(" "let" IDENT [","] Expr ")" BlockOrStatement - * ``` -*/ -function parseEach(s: ITokenStream): Cst.Node { - let hasParen = false; - - s.nextWith(TokenKind.EachKeyword); - - if (s.kind === TokenKind.OpenParen) { - hasParen = true; - s.next(); - } - - s.nextWith(TokenKind.LetKeyword); - - s.expect(TokenKind.Identifier); - const name = s.token.value!; - s.next(); - - if (s.kind == TokenKind.Comma) { - s.next(); - } - - const items = parseExpr(s); - - if (hasParen) { - s.nextWith(TokenKind.CloseParen); - } - - const body = parseBlockOrStatement(s); - - return NODE('each', { - var: name, - items: items, - for: body, - }); -} - -function parseFor(s: ITokenStream): Cst.Node { - let hasParen = false; - - s.nextWith(TokenKind.ForKeyword); - - if (s.kind === TokenKind.OpenParen) { - hasParen = true; - s.next(); - } - - if (s.kind == TokenKind.LetKeyword) { - // range syntax - s.next(); - - s.expect(TokenKind.Identifier); - const name = s.token.value!; - s.next(); - - let from; - if ((s.kind as TokenKind) == TokenKind.Eq) { - s.next(); - from = parseExpr(s); - } else { - from = NODE('num', { value: 0 }); - } - - const to = parseExpr(s); - - if (hasParen) { - s.nextWith(TokenKind.CloseParen); - } - - const body = parseBlockOrStatement(s); - - return NODE('for', { - var: name, - from, - to, - for: body, - }); - } else { - // times syntax - - const times = parseExpr(s); - - if (hasParen) { - s.nextWith(TokenKind.CloseParen); - } - - const body = parseBlockOrStatement(s); - - return NODE('for', { - times, - for: body, - }); - } -} - -/** - * ```abnf - * Return = "return" Expr - * ``` -*/ -function parseReturn(s: ITokenStream): Cst.Node { - s.nextWith(TokenKind.ReturnKeyword); - const expr = parseExpr(s); - return NODE('return', { expr }); -} - -/** - * ```abnf - * Loop = "loop" Block - * ``` -*/ -function parseLoop(s: ITokenStream): Cst.Node { - s.nextWith(TokenKind.LoopKeyword); - const statements = parseBlock(s); - return NODE('loop', { statements }); -} - -/** - * ```abnf - * Assign = Expr ("=" / "+=" / "-=") Expr - * ``` -*/ -function tryParseAssign(s: ITokenStream, dest: Cst.Node): Cst.Node | undefined { - // Assign - switch (s.kind) { - case TokenKind.Eq: { - s.next(); - const expr = parseExpr(s); - return NODE('assign', { dest, expr }); - } - case TokenKind.PlusEq: { - s.next(); - const expr = parseExpr(s); - return NODE('addAssign', { dest, expr }); - } - case TokenKind.MinusEq: { - s.next(); - const expr = parseExpr(s); - return NODE('subAssign', { dest, expr }); - } - default: { - return; - } - } -} - -//#endregion Statement - -//#region Expression - -function parseExpr(s: ITokenStream): Cst.Node { - // TODO: Pratt parsing - - // prefix: not - // prefix: sign - // infix - // call chain - // prop chain - // index chain - - switch (s.kind) { - case TokenKind.IfKeyword: { - return parseIf(s); - } - case TokenKind.OpenAtParen: { - return parseFnExpr(s); - } - case TokenKind.MatchKeyword: { - return parseMatch(s); - } - case TokenKind.EvalKeyword: { - return parseEval(s); - } - case TokenKind.ExistsKeyword: { - return parseExists(s); - } - case TokenKind.Template: { - const values: (string | Cst.Node)[] = []; - - for (const element of s.token.children!) { - switch (element.kind) { - case TokenKind.TemplateStringElement: { - values.push(NODE('str', { value: element.value! })); - break; - } - case TokenKind.TemplateExprElement: { - // スキャナで埋め込み式として事前に読み取っておいたトークン列をパースする - const exprStream = new TokenStream(element.children!); - exprStream.init(); - const expr = parseExpr(exprStream); - if (exprStream.kind !== TokenKind.EOF) { - throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[exprStream.token.kind]}`); - } - values.push(expr); - break; - } - default: { - throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[element.kind]}`); - } - } - } - - s.next(); - return NODE('tmpl', { tmpl: values }); - } - case TokenKind.StringLiteral: { - const value = s.token.value!; - s.next(); - return NODE('str', { value }); - } - case TokenKind.NumberLiteral: { - // TODO: validate number value - const value = Number(s.token.value!); - s.next(); - return NODE('num', { value }); - } - case TokenKind.TrueKeyword: - case TokenKind.FalseKeyword: { - const value = (s.kind === TokenKind.TrueKeyword); - s.next(); - return NODE('bool', { value }); - } - case TokenKind.NullKeyword: { - s.next(); - return NODE('null', { }); - } - case TokenKind.OpenBrace: { - return parseObject(s); - } - case TokenKind.OpenBracket: { - return parseArray(s); - } - case TokenKind.Identifier: { - return parseReference(s); - } - default: { - throw new Error('todo'); - } - } -} - -/** - * ```abnf - * If = "if" Expr BlockOrStatement *("elif" Expr BlockOrStatement) ["else" BlockOrStatement] - * ``` -*/ -function parseIf(s: ITokenStream): Cst.Node { - s.nextWith(TokenKind.IfKeyword); - const cond = parseExpr(s); - const then = parseBlockOrStatement(s); - - const elseif: { cond: any, then: any }[] = []; - while (s.kind === TokenKind.ElifKeyword) { - s.next(); - const elifCond = parseExpr(s); - const elifThen = parseBlockOrStatement(s); - elseif.push({ cond: elifCond, then: elifThen }); - } - - let _else = undefined; - if (s.kind === TokenKind.ElseKeyword) { - s.next(); - _else = parseBlockOrStatement(s); - } - - return NODE('if', { cond, then, elseif, else: _else }); -} - -function parseMatch(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - -/** - * ```abnf - * Eval = "eval" Block - * ``` -*/ -function parseEval(s: ITokenStream): Cst.Node { - s.nextWith(TokenKind.EvalKeyword); - const statements = parseBlock(s); - return NODE('block', { statements }); -} - -/** - * ```abnf - * Exists = "exists" Reference - * ``` -*/ -function parseExists(s: ITokenStream): Cst.Node { - s.nextWith(TokenKind.ExistsKeyword); - const identifier = parseReference(s); - return NODE('exists', { identifier }); -} - -/** - * ```abnf - * Reference = IDENT *(":" IDENT) - * ``` -*/ -function parseReference(s: ITokenStream): Cst.Node { - const segs: string[] = []; - while (true) { - if (segs.length > 0) { - if (s.kind === TokenKind.Colon) { - s.next(); - } else { - break; - } - } - s.expect(TokenKind.Identifier); - segs.push(s.token.value!); - s.next(); - } - return NODE('identifier', { name: segs.join(':') }); -} - -function parseObject(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - -/** - * ```abnf - * Array = "[" *(Expr [","]) "]" - * ``` -*/ -function parseArray(s: ITokenStream): Cst.Node { - s.nextWith(TokenKind.OpenBracket); - - const value = []; - while (s.kind !== TokenKind.CloseBracket) { - value.push(parseExpr(s)); - if (s.kind === TokenKind.Comma) { - s.next(); - } - } - - s.nextWith(TokenKind.CloseBracket); - - return NODE('arr', { value }); -} - -//#endregion Expression - -//#region Function - -/** - * ```abnf - * FnDef = "@" IDENT "(" Args ")" [":" Type] Block - * ``` -*/ -function parseFnDef(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - -function parseFnExpr(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - -//#endregion Function - -//#region Static Literal - -function parseStaticLiteral(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - -function parseStaticArray(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - -function parseStaticObject(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - -//#endregion Static Literal - -//#region Type - -function parseType(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - -function parseFnType(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - -function parseNamedType(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - -//#endregion Type - -//#region Common - -function NODE(type: string, params: Record): Cst.Node { - const node: Record = { type }; - //params.children; - for (const key of Object.keys(params)) { - if (params[key] !== undefined) { - node[key] = params[key]; - } - } - //node.loc = { start, end }; - return node as Cst.Node; -} - -/** - * ```abnf - * Block = "{" *Statement "}" - * ``` -*/ -function parseBlock(s: ITokenStream): Cst.Node[] { - s.nextWith(TokenKind.OpenBrace); - - const steps: Cst.Node[] = []; - while (s.kind !== TokenKind.CloseBrace) { - steps.push(parseStatement(s)); - } - - s.nextWith(TokenKind.CloseBrace); - - return steps; -} - -/** - * ```abnf - * BlockOrStatement = Block / Statement - * ``` -*/ -function parseBlockOrStatement(s: ITokenStream): Cst.Node { - if (s.kind === TokenKind.OpenBrace) { - const statements = parseBlock(s); - return NODE('block', { statements }); - } else { - return parseStatement(s); - } -} - -//#endregion Common diff --git a/src/parser/syntaxes/common.ts b/src/parser/syntaxes/common.ts new file mode 100644 index 00000000..cd1d7023 --- /dev/null +++ b/src/parser/syntaxes/common.ts @@ -0,0 +1,39 @@ +import { TokenKind } from '../token.js'; +import type { ITokenStream } from '../streams/token-stream.js'; +import { parseStatement } from './statements.js'; + +import type * as Cst from '../node.js'; + +/** + * ```abnf + * Block = "{" *Statement "}" + * ``` +*/ +export function parseBlock(s: ITokenStream): Cst.Node[] { + s.nextWith(TokenKind.OpenBrace); + + const steps: Cst.Node[] = []; + while (s.kind !== TokenKind.CloseBrace) { + steps.push(parseStatement(s)); + } + + s.nextWith(TokenKind.CloseBrace); + + return steps; +} + +//#region Type + +export function parseType(s: ITokenStream): Cst.Node { + throw new Error('todo'); +} + +export function parseFnType(s: ITokenStream): Cst.Node { + throw new Error('todo'); +} + +export function parseNamedType(s: ITokenStream): Cst.Node { + throw new Error('todo'); +} + +//#endregion Type diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts new file mode 100644 index 00000000..df9b444e --- /dev/null +++ b/src/parser/syntaxes/expressions.ts @@ -0,0 +1,201 @@ +import { AiScriptSyntaxError } from '../../error.js'; +import { TokenKind } from '../token.js'; +import { TokenStream } from '../streams/token-stream.js'; +import type { ITokenStream } from '../streams/token-stream.js'; +import { NODE } from '../node.js'; +import type * as Cst from '../node.js'; + +import { parseBlockOrStatement } from './statements.js'; +import { parseBlock } from './common.js'; +import { parseFnExpr } from './function.js'; + +export function parseExpr(s: ITokenStream): Cst.Node { + // TODO: Pratt parsing + + // prefix: not + // prefix: sign + // infix + // call chain + // prop chain + // index chain + + switch (s.kind) { + case TokenKind.IfKeyword: { + return parseIf(s); + } + case TokenKind.OpenAtParen: { + return parseFnExpr(s); + } + case TokenKind.MatchKeyword: { + return parseMatch(s); + } + case TokenKind.EvalKeyword: { + return parseEval(s); + } + case TokenKind.ExistsKeyword: { + return parseExists(s); + } + case TokenKind.Template: { + const values: (string | Cst.Node)[] = []; + + for (const element of s.token.children!) { + switch (element.kind) { + case TokenKind.TemplateStringElement: { + values.push(NODE('str', { value: element.value! })); + break; + } + case TokenKind.TemplateExprElement: { + // スキャナで埋め込み式として事前に読み取っておいたトークン列をパースする + const exprStream = new TokenStream(element.children!); + exprStream.init(); + const expr = parseExpr(exprStream); + if (exprStream.kind !== TokenKind.EOF) { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[exprStream.token.kind]}`); + } + values.push(expr); + break; + } + default: { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[element.kind]}`); + } + } + } + + s.next(); + return NODE('tmpl', { tmpl: values }); + } + case TokenKind.StringLiteral: { + const value = s.token.value!; + s.next(); + return NODE('str', { value }); + } + case TokenKind.NumberLiteral: { + // TODO: validate number value + const value = Number(s.token.value!); + s.next(); + return NODE('num', { value }); + } + case TokenKind.TrueKeyword: + case TokenKind.FalseKeyword: { + const value = (s.kind === TokenKind.TrueKeyword); + s.next(); + return NODE('bool', { value }); + } + case TokenKind.NullKeyword: { + s.next(); + return NODE('null', { }); + } + case TokenKind.OpenBrace: { + return parseObject(s); + } + case TokenKind.OpenBracket: { + return parseArray(s); + } + case TokenKind.Identifier: { + return parseReference(s); + } + default: { + throw new Error('todo'); + } + } +} + +/** + * ```abnf + * If = "if" Expr BlockOrStatement *("elif" Expr BlockOrStatement) ["else" BlockOrStatement] + * ``` +*/ +export function parseIf(s: ITokenStream): Cst.Node { + s.nextWith(TokenKind.IfKeyword); + const cond = parseExpr(s); + const then = parseBlockOrStatement(s); + + const elseif: { cond: any, then: any }[] = []; + while (s.kind === TokenKind.ElifKeyword) { + s.next(); + const elifCond = parseExpr(s); + const elifThen = parseBlockOrStatement(s); + elseif.push({ cond: elifCond, then: elifThen }); + } + + let _else = undefined; + if (s.kind === TokenKind.ElseKeyword) { + s.next(); + _else = parseBlockOrStatement(s); + } + + return NODE('if', { cond, then, elseif, else: _else }); +} + +export function parseMatch(s: ITokenStream): Cst.Node { + throw new Error('todo'); +} + +/** + * ```abnf + * Eval = "eval" Block + * ``` +*/ +export function parseEval(s: ITokenStream): Cst.Node { + s.nextWith(TokenKind.EvalKeyword); + const statements = parseBlock(s); + return NODE('block', { statements }); +} + +/** + * ```abnf + * Exists = "exists" Reference + * ``` +*/ +export function parseExists(s: ITokenStream): Cst.Node { + s.nextWith(TokenKind.ExistsKeyword); + const identifier = parseReference(s); + return NODE('exists', { identifier }); +} + +/** + * ```abnf + * Reference = IDENT *(":" IDENT) + * ``` +*/ +export function parseReference(s: ITokenStream): Cst.Node { + const segs: string[] = []; + while (true) { + if (segs.length > 0) { + if (s.kind === TokenKind.Colon) { + s.next(); + } else { + break; + } + } + s.expect(TokenKind.Identifier); + segs.push(s.token.value!); + s.next(); + } + return NODE('identifier', { name: segs.join(':') }); +} + +export function parseObject(s: ITokenStream): Cst.Node { + throw new Error('todo'); +} + +/** + * ```abnf + * Array = "[" *(Expr [","]) "]" + * ``` +*/ +export function parseArray(s: ITokenStream): Cst.Node { + s.nextWith(TokenKind.OpenBracket); + + const value = []; + while (s.kind !== TokenKind.CloseBracket) { + value.push(parseExpr(s)); + if (s.kind === TokenKind.Comma) { + s.next(); + } + } + + s.nextWith(TokenKind.CloseBracket); + + return NODE('arr', { value }); +} diff --git a/src/parser/syntaxes/function.ts b/src/parser/syntaxes/function.ts new file mode 100644 index 00000000..b2a83aad --- /dev/null +++ b/src/parser/syntaxes/function.ts @@ -0,0 +1,19 @@ +import { AiScriptSyntaxError } from '../../error.js'; +import { TokenKind } from '../token.js'; +import { TokenStream } from '../streams/token-stream.js'; +import type { ITokenStream } from '../streams/token-stream.js'; +import { NODE } from '../node.js'; +import type * as Cst from '../node.js'; + +/** + * ```abnf + * FnDef = "@" IDENT "(" Args ")" [":" Type] Block + * ``` +*/ +export function parseFnDef(s: ITokenStream): Cst.Node { + throw new Error('todo'); +} + +export function parseFnExpr(s: ITokenStream): Cst.Node { + throw new Error('todo'); +} diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts new file mode 100644 index 00000000..7cebdf7c --- /dev/null +++ b/src/parser/syntaxes/statements.ts @@ -0,0 +1,277 @@ +import { AiScriptSyntaxError } from '../../error.js'; +import { TokenKind } from '../token.js'; +import { TokenStream } from '../streams/token-stream.js'; +import type { ITokenStream } from '../streams/token-stream.js'; +import { NODE } from '../node.js'; +import type * as Cst from '../node.js'; + +import { parseBlock, parseType } from './common.js'; +import { parseExpr } from './expressions.js'; +import { parseFnDef } from './function.js'; + +/** + * ```abnf + * Statement = VarDef / FnDef / Out / Return / Attr / Each / For / Loop + * / Break / Continue / Assign / Expr + * ``` +*/ +export function parseStatement(s: ITokenStream): Cst.Node { + switch (s.token.kind) { + case TokenKind.VarKeyword: + case TokenKind.LetKeyword: { + return parseVarDef(s); + } + case TokenKind.At: { + return parseFnDef(s); + } + case TokenKind.Out: { + return parseOut(s); + } + case TokenKind.ReturnKeyword: { + return parseReturn(s); + } + // Attr + case TokenKind.EachKeyword: { + return parseEach(s); + } + case TokenKind.ForKeyword: { + return parseFor(s); + } + case TokenKind.LoopKeyword: { + return parseLoop(s); + } + case TokenKind.BreakKeyword: { + s.next(); + return NODE('break', {}); + } + case TokenKind.ContinueKeyword: { + s.next(); + return NODE('continue', {}); + } + default: { + const expr = parseExpr(s); + const assign = tryParseAssign(s, expr); + if (assign) { + return assign; + } + return expr; + } + } +} + +/** + * ```abnf + * VarDef = ("let" / "var") IDENT [":" Type] "=" Expr + * ``` +*/ +export function parseVarDef(s: ITokenStream): Cst.Node { + let mut; + switch (s.token.kind) { + case TokenKind.LetKeyword: { + mut = false; + break; + } + case TokenKind.VarKeyword: { + mut = true; + break; + } + default: { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[s.token.kind]}`); + } + } + s.next(); + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + let ty; + if (s.kind === TokenKind.Colon) { + s.next(); + ty = parseType(s); + } + + s.nextWith(TokenKind.Eq); + + const expr = parseExpr(s); + + return NODE('def', { name, varType: ty, expr, mut, attr: [] }); +} + +/** + * ```abnf + * Out = "<:" Expr + * ``` +*/ +export function parseOut(s: ITokenStream): Cst.Node { + s.nextWith(TokenKind.Out); + const expr = parseExpr(s); + return NODE('identifier', { + name: 'print', + chain: [NODE('callChain', { args: [expr] })], + }); +} + +/** + * ```abnf + * Each = "each" "let" IDENT [","] Expr BlockOrStatement + * / "each" "(" "let" IDENT [","] Expr ")" BlockOrStatement + * ``` +*/ +export function parseEach(s: ITokenStream): Cst.Node { + let hasParen = false; + + s.nextWith(TokenKind.EachKeyword); + + if (s.kind === TokenKind.OpenParen) { + hasParen = true; + s.next(); + } + + s.nextWith(TokenKind.LetKeyword); + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + if (s.kind == TokenKind.Comma) { + s.next(); + } + + const items = parseExpr(s); + + if (hasParen) { + s.nextWith(TokenKind.CloseParen); + } + + const body = parseBlockOrStatement(s); + + return NODE('each', { + var: name, + items: items, + for: body, + }); +} + +export function parseFor(s: ITokenStream): Cst.Node { + let hasParen = false; + + s.nextWith(TokenKind.ForKeyword); + + if (s.kind === TokenKind.OpenParen) { + hasParen = true; + s.next(); + } + + if (s.kind == TokenKind.LetKeyword) { + // range syntax + s.next(); + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + let from; + if ((s.kind as TokenKind) == TokenKind.Eq) { + s.next(); + from = parseExpr(s); + } else { + from = NODE('num', { value: 0 }); + } + + const to = parseExpr(s); + + if (hasParen) { + s.nextWith(TokenKind.CloseParen); + } + + const body = parseBlockOrStatement(s); + + return NODE('for', { + var: name, + from, + to, + for: body, + }); + } else { + // times syntax + + const times = parseExpr(s); + + if (hasParen) { + s.nextWith(TokenKind.CloseParen); + } + + const body = parseBlockOrStatement(s); + + return NODE('for', { + times, + for: body, + }); + } +} + +/** + * ```abnf + * Return = "return" Expr + * ``` +*/ +export function parseReturn(s: ITokenStream): Cst.Node { + s.nextWith(TokenKind.ReturnKeyword); + const expr = parseExpr(s); + return NODE('return', { expr }); +} + +/** + * ```abnf + * Loop = "loop" Block + * ``` +*/ +export function parseLoop(s: ITokenStream): Cst.Node { + s.nextWith(TokenKind.LoopKeyword); + const statements = parseBlock(s); + return NODE('loop', { statements }); +} + +/** + * ```abnf + * Assign = Expr ("=" / "+=" / "-=") Expr + * ``` +*/ +export function tryParseAssign(s: ITokenStream, dest: Cst.Node): Cst.Node | undefined { + // Assign + switch (s.kind) { + case TokenKind.Eq: { + s.next(); + const expr = parseExpr(s); + return NODE('assign', { dest, expr }); + } + case TokenKind.PlusEq: { + s.next(); + const expr = parseExpr(s); + return NODE('addAssign', { dest, expr }); + } + case TokenKind.MinusEq: { + s.next(); + const expr = parseExpr(s); + return NODE('subAssign', { dest, expr }); + } + default: { + return; + } + } +} + +/** + * ```abnf + * BlockOrStatement = Block / Statement + * ``` +*/ +export function parseBlockOrStatement(s: ITokenStream): Cst.Node { + if (s.kind === TokenKind.OpenBrace) { + const statements = parseBlock(s); + return NODE('block', { statements }); + } else { + return parseStatement(s); + } +} diff --git a/src/parser/syntaxes/toplevel.ts b/src/parser/syntaxes/toplevel.ts new file mode 100644 index 00000000..0826d180 --- /dev/null +++ b/src/parser/syntaxes/toplevel.ts @@ -0,0 +1,108 @@ +import { AiScriptSyntaxError } from '../../error.js'; +import { TokenKind } from '../token.js'; +import { TokenStream } from '../streams/token-stream.js'; +import type { ITokenStream } from '../streams/token-stream.js'; +import { NODE } from '../node.js'; +import type * as Cst from '../node.js'; + +import { parseStatement, parseVarDef } from './statements.js'; +import { parseFnDef } from './function.js'; + +/** + * ```abnf + * TopLevel = *(Namespace / Meta / Statement) + * ``` +*/ +export function parseTopLevel(s: ITokenStream): Cst.Node[] { + const nodes: Cst.Node[] = []; + + while (s.kind !== TokenKind.EOF) { + switch (s.token.kind) { + case TokenKind.Colon2: { + nodes.push(parseNamespace(s)); + break; + } + case TokenKind.Sharp3: { + nodes.push(parseMeta(s)); + break; + } + default: { + nodes.push(parseStatement(s)); + break; + } + } + } + + return nodes; +} + +/** + * ```abnf + * Namespace = "::" IDENT "{" *(VarDef / FnDef / Namespace) "}" + * ``` +*/ +export function parseNamespace(s: ITokenStream): Cst.Node { + s.nextWith(TokenKind.Colon2); + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + const members: Cst.Node[] = []; + s.nextWith(TokenKind.OpenBrace); + while (s.kind !== TokenKind.CloseBrace) { + switch (s.token.kind) { + case TokenKind.VarKeyword: + case TokenKind.LetKeyword: { + members.push(parseVarDef(s)); + break; + } + case TokenKind.At: { + members.push(parseFnDef(s)); + break; + } + case TokenKind.Colon2: { + members.push(parseNamespace(s)); + break; + } + } + } + s.nextWith(TokenKind.CloseBrace); + + return NODE('ns', { name, members }); +} + +/** + * ```abnf + * Meta = "###" [IDENT] StaticLiteral + * ``` +*/ +export function parseMeta(s: ITokenStream): Cst.Node { + s.nextWith(TokenKind.Sharp3); + + let name; + if (s.kind === TokenKind.Identifier) { + name = s.token.value; + s.next(); + } + + const value = parseStaticLiteral(s); + + return NODE('meta', { name, value }); +} + +//#region Static Literal + +export function parseStaticLiteral(s: ITokenStream): Cst.Node { + throw new Error('todo'); +} + +export function parseStaticArray(s: ITokenStream): Cst.Node { + throw new Error('todo'); +} + +export function parseStaticObject(s: ITokenStream): Cst.Node { + throw new Error('todo'); +} + +//#endregion Static Literal From 350f6b12715435b3791fc0a148dcbae04db860e3 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 14:42:03 +0900 Subject: [PATCH 050/126] lint --- src/parser/scanner.ts | 2 +- src/parser/syntaxes/statements.ts | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 3c67d69c..8013c490 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -233,7 +233,7 @@ export class Scanner implements ITokenStream { } case '@': { this.stream.next(); - if ((this.stream.char as string) == '(') { + if ((this.stream.char as string) === '(') { this.stream.next(); token = TOKEN(TokenKind.OpenAtParen); } else { diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index 7cebdf7c..4ef7f481 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -134,7 +134,7 @@ export function parseEach(s: ITokenStream): Cst.Node { const name = s.token.value!; s.next(); - if (s.kind == TokenKind.Comma) { + if (s.kind === TokenKind.Comma) { s.next(); } @@ -163,7 +163,7 @@ export function parseFor(s: ITokenStream): Cst.Node { s.next(); } - if (s.kind == TokenKind.LetKeyword) { + if (s.kind === TokenKind.LetKeyword) { // range syntax s.next(); @@ -172,7 +172,7 @@ export function parseFor(s: ITokenStream): Cst.Node { s.next(); let from; - if ((s.kind as TokenKind) == TokenKind.Eq) { + if ((s.kind as TokenKind) === TokenKind.Eq) { s.next(); from = parseExpr(s); } else { From 3179ff251b5c308b99a16c0ed4e3a06a0f9846b1 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 15:39:47 +0900 Subject: [PATCH 051/126] pratt parsing --- src/parser/syntaxes/expressions.ts | 84 +++++++++++++++++++++++++++--- 1 file changed, 78 insertions(+), 6 deletions(-) diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index df9b444e..f476d34b 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -9,16 +9,36 @@ import { parseBlockOrStatement } from './statements.js'; import { parseBlock } from './common.js'; import { parseFnExpr } from './function.js'; -export function parseExpr(s: ITokenStream): Cst.Node { - // TODO: Pratt parsing +export function parseExpr(s: ITokenStream) { + return parsePratt(s, 0); +} - // prefix: not - // prefix: sign - // infix - // call chain +const operators: OpInfo[] = [ + +]; + +function parsePrefix(s: ITokenStream, info: PrefixInfo): Cst.Node { + // not + // sign + + throw new Error('todo'); +} + +function parseInfix(s: ITokenStream, left: Cst.Node, info: InfixInfo): Cst.Node { + // arithmetic ops // prop chain + + throw new Error('todo'); +} + +function parsePostfix(s: ITokenStream, left: Cst.Node, info: PostfixInfo): Cst.Node { + // call chain // index chain + throw new Error('todo'); +} + +function parseAtom(s: ITokenStream): Cst.Node { switch (s.kind) { case TokenKind.IfKeyword: { return parseIf(s); @@ -199,3 +219,55 @@ export function parseArray(s: ITokenStream): Cst.Node { return NODE('arr', { value }); } + +//#region Pratt parsing + +type PrefixInfo = { opKind: 'prefix', kind: TokenKind, bp: number }; +type InfixInfo = { opKind: 'infix', kind: TokenKind, lbp: number, rbp: number }; +type PostfixInfo = { opKind: 'postfix', kind: TokenKind, bp: number }; +type OpInfo = PrefixInfo | InfixInfo | PostfixInfo; + +function parsePratt(s: ITokenStream, minBp: number) { + // pratt parsing + // https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html + + let left: Cst.Node; + + const tokenKind = s.kind; + const prefix = operators.find((x): x is PrefixInfo => x.opKind == 'prefix' && x.kind == tokenKind); + if (prefix != null) { + left = parsePrefix(s, prefix); + } else { + left = parseAtom(s); + } + + while (true) { + const tokenKind = s.kind; + + const postfix = operators.find((x): x is PostfixInfo => x.opKind == 'postfix' && x.kind == tokenKind); + if (postfix != null) { + if (postfix.bp < minBp) { + break; + } + + left = parsePostfix(s, left, postfix); + continue; + } + + const infix = operators.find((x): x is InfixInfo => x.opKind == 'infix' && x.kind == tokenKind); + if (infix != null) { + if (infix.lbp < minBp) { + break; + } + + left = parseInfix(s, left, infix); + continue; + } + + break; + } + + return left; +} + +//#endregion Pratt parsing From 9af45e12a7f7238b699f3d644cb0d77c637c21c3 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 15:42:55 +0900 Subject: [PATCH 052/126] lint --- src/parser/syntaxes/expressions.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index f476d34b..a5b359f9 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -234,7 +234,7 @@ function parsePratt(s: ITokenStream, minBp: number) { let left: Cst.Node; const tokenKind = s.kind; - const prefix = operators.find((x): x is PrefixInfo => x.opKind == 'prefix' && x.kind == tokenKind); + const prefix = operators.find((x): x is PrefixInfo => x.opKind === 'prefix' && x.kind === tokenKind); if (prefix != null) { left = parsePrefix(s, prefix); } else { @@ -244,7 +244,7 @@ function parsePratt(s: ITokenStream, minBp: number) { while (true) { const tokenKind = s.kind; - const postfix = operators.find((x): x is PostfixInfo => x.opKind == 'postfix' && x.kind == tokenKind); + const postfix = operators.find((x): x is PostfixInfo => x.opKind === 'postfix' && x.kind === tokenKind); if (postfix != null) { if (postfix.bp < minBp) { break; @@ -254,7 +254,7 @@ function parsePratt(s: ITokenStream, minBp: number) { continue; } - const infix = operators.find((x): x is InfixInfo => x.opKind == 'infix' && x.kind == tokenKind); + const infix = operators.find((x): x is InfixInfo => x.opKind === 'infix' && x.kind === tokenKind); if (infix != null) { if (infix.lbp < minBp) { break; From 5b51934ab747e7aa19d7ad51b2ca2781d8526a20 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 16:33:04 +0900 Subject: [PATCH 053/126] ops --- src/parser/syntaxes/expressions.ts | 34 ++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index a5b359f9..6d902768 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -14,27 +14,43 @@ export function parseExpr(s: ITokenStream) { } const operators: OpInfo[] = [ + { opKind: 'postfix', kind: TokenKind.OpenParen, bp: 90 }, + { opKind: 'postfix', kind: TokenKind.OpenBracket, bp: 90 }, + { opKind: 'infix', kind: TokenKind.Dot, lbp: 80, rbp: 81 }, + + { opKind: 'prefix', kind: TokenKind.Plus, bp: 70 }, + { opKind: 'prefix', kind: TokenKind.Minus, bp: 70 }, + { opKind: 'prefix', kind: TokenKind.Not, bp: 70 }, + + { opKind: 'infix', kind: TokenKind.Asterisk, lbp: 60, rbp: 61 }, + { opKind: 'infix', kind: TokenKind.Slash, lbp: 60, rbp: 61 }, + + { opKind: 'infix', kind: TokenKind.Plus, lbp: 50, rbp: 51 }, + { opKind: 'infix', kind: TokenKind.Minus, lbp: 50, rbp: 51 }, + + { opKind: 'infix', kind: TokenKind.Lt, lbp: 40, rbp: 41 }, + { opKind: 'infix', kind: TokenKind.LtEq, lbp: 40, rbp: 41 }, + { opKind: 'infix', kind: TokenKind.Gt, lbp: 40, rbp: 41 }, + { opKind: 'infix', kind: TokenKind.GtEq, lbp: 40, rbp: 41 }, + + { opKind: 'infix', kind: TokenKind.Eq2, lbp: 30, rbp: 31 }, + { opKind: 'infix', kind: TokenKind.NotEq, lbp: 30, rbp: 31 }, + + { opKind: 'infix', kind: TokenKind.And2, lbp: 20, rbp: 21 }, + + { opKind: 'infix', kind: TokenKind.Or2, lbp: 10, rbp: 11 }, ]; function parsePrefix(s: ITokenStream, info: PrefixInfo): Cst.Node { - // not - // sign - throw new Error('todo'); } function parseInfix(s: ITokenStream, left: Cst.Node, info: InfixInfo): Cst.Node { - // arithmetic ops - // prop chain - throw new Error('todo'); } function parsePostfix(s: ITokenStream, left: Cst.Node, info: PostfixInfo): Cst.Node { - // call chain - // index chain - throw new Error('todo'); } From 72ce1398c122e66295bbe95461f57e2f447ebb57 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 18:47:22 +0900 Subject: [PATCH 054/126] ops, spacing info for tokens --- src/parser/scanner.ts | 140 +++++++++++++++-------------- src/parser/streams/token-stream.ts | 2 +- src/parser/syntaxes/expressions.ts | 121 +++++++++++++++++++++++-- src/parser/token.ts | 5 +- test/parser.ts | 36 ++++---- 5 files changed, 205 insertions(+), 99 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 8013c490..9579a0db 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -65,14 +65,16 @@ export class Scanner implements ITokenStream { private readToken(): Token { let token; + let spaceSkipped = false; while (true) { if (this.stream.eof) { - token = TOKEN(TokenKind.EOF); + token = TOKEN(TokenKind.EOF, spaceSkipped); break; } // skip spasing if (spacingChars.includes(this.stream.char)) { this.stream.next(); + spaceSkipped = true; continue; } switch (this.stream.char) { @@ -80,15 +82,15 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.NotEq); + token = TOKEN(TokenKind.NotEq, spaceSkipped); } else { - token = TOKEN(TokenKind.Not); + token = TOKEN(TokenKind.Not, spaceSkipped); } break; } case '"': { this.stream.next(); - token = this.readStringLiteral(); + token = this.readStringLiteral(spaceSkipped); break; } case '#': { @@ -97,72 +99,72 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === '#') { this.stream.next(); - token = TOKEN(TokenKind.Sharp3); + token = TOKEN(TokenKind.Sharp3, spaceSkipped); } } else if ((this.stream.char as string) === '[') { this.stream.next(); - token = TOKEN(TokenKind.OpenSharpBracket); + token = TOKEN(TokenKind.OpenSharpBracket, spaceSkipped); } else { - token = TOKEN(TokenKind.Sharp); + token = TOKEN(TokenKind.Sharp, spaceSkipped); } break; } case '%': { this.stream.next(); - token = TOKEN(TokenKind.Percent); + token = TOKEN(TokenKind.Percent, spaceSkipped); break; } case '&': { this.stream.next(); if ((this.stream.char as string) === '&') { this.stream.next(); - token = TOKEN(TokenKind.And2); + token = TOKEN(TokenKind.And2, spaceSkipped); } break; } case '(': { this.stream.next(); - token = TOKEN(TokenKind.OpenParen); + token = TOKEN(TokenKind.OpenParen, spaceSkipped); break; } case ')': { this.stream.next(); - token = TOKEN(TokenKind.CloseParen); + token = TOKEN(TokenKind.CloseParen, spaceSkipped); break; } case '*': { this.stream.next(); - token = TOKEN(TokenKind.Asterisk); + token = TOKEN(TokenKind.Asterisk, spaceSkipped); break; } case '+': { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.PlusEq); + token = TOKEN(TokenKind.PlusEq, spaceSkipped); } else { - token = TOKEN(TokenKind.Plus); + token = TOKEN(TokenKind.Plus, spaceSkipped); } break; } case ',': { this.stream.next(); - token = TOKEN(TokenKind.Comma); + token = TOKEN(TokenKind.Comma, spaceSkipped); break; } case '-': { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.MinusEq); + token = TOKEN(TokenKind.MinusEq, spaceSkipped); } else { - token = TOKEN(TokenKind.Minus); + token = TOKEN(TokenKind.Minus, spaceSkipped); } break; } case '.': { this.stream.next(); - token = TOKEN(TokenKind.Dot); + token = TOKEN(TokenKind.Dot, spaceSkipped); break; } case '/': { @@ -176,7 +178,7 @@ export class Scanner implements ITokenStream { this.skipCommentLine(); continue; } else { - token = TOKEN(TokenKind.Slash); + token = TOKEN(TokenKind.Slash, spaceSkipped); } break; } @@ -184,27 +186,27 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === ':') { this.stream.next(); - token = TOKEN(TokenKind.Colon2); + token = TOKEN(TokenKind.Colon2, spaceSkipped); } else { - token = TOKEN(TokenKind.Colon); + token = TOKEN(TokenKind.Colon, spaceSkipped); } break; } case ';': { this.stream.next(); - token = TOKEN(TokenKind.SemiColon); + token = TOKEN(TokenKind.SemiColon, spaceSkipped); break; } case '<': { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.LtEq); + token = TOKEN(TokenKind.LtEq, spaceSkipped); } else if ((this.stream.char as string) === ':') { this.stream.next(); - token = TOKEN(TokenKind.Out); + token = TOKEN(TokenKind.Out, spaceSkipped); } else { - token = TOKEN(TokenKind.Lt); + token = TOKEN(TokenKind.Lt, spaceSkipped); } break; } @@ -212,12 +214,12 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.Eq2); + token = TOKEN(TokenKind.Eq2, spaceSkipped); } else if ((this.stream.char as string) === '>') { this.stream.next(); - token = TOKEN(TokenKind.Arrow); + token = TOKEN(TokenKind.Arrow, spaceSkipped); } else { - token = TOKEN(TokenKind.Eq); + token = TOKEN(TokenKind.Eq, spaceSkipped); } break; } @@ -225,9 +227,9 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.GtEq); + token = TOKEN(TokenKind.GtEq, spaceSkipped); } else { - token = TOKEN(TokenKind.Gt); + token = TOKEN(TokenKind.Gt, spaceSkipped); } break; } @@ -235,58 +237,58 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === '(') { this.stream.next(); - token = TOKEN(TokenKind.OpenAtParen); + token = TOKEN(TokenKind.OpenAtParen, spaceSkipped); } else { - token = TOKEN(TokenKind.At); + token = TOKEN(TokenKind.At, spaceSkipped); } break; } case '[': { this.stream.next(); - token = TOKEN(TokenKind.OpenBracket); + token = TOKEN(TokenKind.OpenBracket, spaceSkipped); break; } case ']': { this.stream.next(); - token = TOKEN(TokenKind.CloseBracket); + token = TOKEN(TokenKind.CloseBracket, spaceSkipped); break; } case '^': { this.stream.next(); - token = TOKEN(TokenKind.Hat); + token = TOKEN(TokenKind.Hat, spaceSkipped); break; } case '`': { this.stream.next(); - token = this.readTemplate(); + token = this.readTemplate(spaceSkipped); break; } case '{': { this.stream.next(); - token = TOKEN(TokenKind.OpenBrace); + token = TOKEN(TokenKind.OpenBrace, spaceSkipped); break; } case '|': { this.stream.next(); if ((this.stream.char as string) === '|') { this.stream.next(); - token = TOKEN(TokenKind.Or2); + token = TOKEN(TokenKind.Or2, spaceSkipped); } break; } case '}': { this.stream.next(); - token = TOKEN(TokenKind.CloseBrace); + token = TOKEN(TokenKind.CloseBrace, spaceSkipped); break; } } if (token == null) { - const digitToken = this.tryReadDigits(); + const digitToken = this.tryReadDigits(spaceSkipped); if (digitToken) { token = digitToken; break; } - const wordToken = this.tryReadWord(); + const wordToken = this.tryReadWord(spaceSkipped); if (wordToken) { token = wordToken; break; @@ -298,7 +300,7 @@ export class Scanner implements ITokenStream { return token; } - private tryReadWord(): Token | undefined { + private tryReadWord(spaceSkipped: boolean): Token | undefined { // read a word let value = ''; while (!this.stream.eof && wordChar.test(this.stream.char)) { @@ -311,63 +313,63 @@ export class Scanner implements ITokenStream { // check word kind switch (value) { case 'null': { - return TOKEN(TokenKind.NullKeyword); + return TOKEN(TokenKind.NullKeyword, spaceSkipped); } case 'true': { - return TOKEN(TokenKind.TrueKeyword); + return TOKEN(TokenKind.TrueKeyword, spaceSkipped); } case 'false': { - return TOKEN(TokenKind.FalseKeyword); + return TOKEN(TokenKind.FalseKeyword, spaceSkipped); } case 'each': { - return TOKEN(TokenKind.EachKeyword); + return TOKEN(TokenKind.EachKeyword, spaceSkipped); } case 'for': { - return TOKEN(TokenKind.ForKeyword); + return TOKEN(TokenKind.ForKeyword, spaceSkipped); } case 'loop': { - return TOKEN(TokenKind.LoopKeyword); + return TOKEN(TokenKind.LoopKeyword, spaceSkipped); } case 'break': { - return TOKEN(TokenKind.BreakKeyword); + return TOKEN(TokenKind.BreakKeyword, spaceSkipped); } case 'continue': { - return TOKEN(TokenKind.ContinueKeyword); + return TOKEN(TokenKind.ContinueKeyword, spaceSkipped); } case 'match': { - return TOKEN(TokenKind.MatchKeyword); + return TOKEN(TokenKind.MatchKeyword, spaceSkipped); } case 'if': { - return TOKEN(TokenKind.IfKeyword); + return TOKEN(TokenKind.IfKeyword, spaceSkipped); } case 'elif': { - return TOKEN(TokenKind.ElifKeyword); + return TOKEN(TokenKind.ElifKeyword, spaceSkipped); } case 'else': { - return TOKEN(TokenKind.ElseKeyword); + return TOKEN(TokenKind.ElseKeyword, spaceSkipped); } case 'return': { - return TOKEN(TokenKind.ReturnKeyword); + return TOKEN(TokenKind.ReturnKeyword, spaceSkipped); } case 'eval': { - return TOKEN(TokenKind.EvalKeyword); + return TOKEN(TokenKind.EvalKeyword, spaceSkipped); } case 'var': { - return TOKEN(TokenKind.VarKeyword); + return TOKEN(TokenKind.VarKeyword, spaceSkipped); } case 'let': { - return TOKEN(TokenKind.LetKeyword); + return TOKEN(TokenKind.LetKeyword, spaceSkipped); } case 'exists': { - return TOKEN(TokenKind.ExistsKeyword); + return TOKEN(TokenKind.ExistsKeyword, spaceSkipped); } default: { - return TOKEN(TokenKind.Identifier, { value }); + return TOKEN(TokenKind.Identifier, spaceSkipped, { value }); } } } - private tryReadDigits(): Token | undefined { + private tryReadDigits(spaceSkipped: boolean): Token | undefined { // TODO: float number let value = ''; while (!this.stream.eof && digit.test(this.stream.char)) { @@ -377,10 +379,10 @@ export class Scanner implements ITokenStream { if (value.length === 0) { return; } - return TOKEN(TokenKind.NumberLiteral, { value }); + return TOKEN(TokenKind.NumberLiteral, spaceSkipped, { value }); } - private readStringLiteral(): Token { + private readStringLiteral(spaceSkipped: boolean): Token { let value = ''; while (true) { if (this.stream.eof) { @@ -393,10 +395,10 @@ export class Scanner implements ITokenStream { value += this.stream.char; this.stream.next(); } - return TOKEN(TokenKind.StringLiteral, { value }); + return TOKEN(TokenKind.StringLiteral, spaceSkipped, { value }); } - private readTemplate(): Token { + private readTemplate(spaceSkipped: boolean): Token { const elements: Token[] = []; let buf = ''; let tokenBuf: Token[] = []; @@ -413,7 +415,7 @@ export class Scanner implements ITokenStream { if (this.stream.char === '`') { this.stream.next(); if (buf.length > 0) { - elements.push(TOKEN(TokenKind.TemplateStringElement, { value: buf })); + elements.push(TOKEN(TokenKind.TemplateStringElement, spaceSkipped, { value: buf })); } state = 'finish'; break; @@ -422,7 +424,7 @@ export class Scanner implements ITokenStream { if (this.stream.char === '{') { this.stream.next(); if (buf.length > 0) { - elements.push(TOKEN(TokenKind.TemplateStringElement, { value: buf })); + elements.push(TOKEN(TokenKind.TemplateStringElement, spaceSkipped, { value: buf })); buf = ''; } state = 'expr'; @@ -445,7 +447,7 @@ export class Scanner implements ITokenStream { // 埋め込み式の終了 if ((this.stream.char as string) === '}') { this.stream.next(); - elements.push(TOKEN(TokenKind.TemplateExprElement, { children: tokenBuf })); + elements.push(TOKEN(TokenKind.TemplateExprElement, spaceSkipped, { children: tokenBuf })); tokenBuf = []; state = 'string'; break; @@ -457,7 +459,7 @@ export class Scanner implements ITokenStream { } } - return TOKEN(TokenKind.Template, { children: elements }); + return TOKEN(TokenKind.Template, spaceSkipped, { children: elements }); } private skipCommentLine() { diff --git a/src/parser/streams/token-stream.ts b/src/parser/streams/token-stream.ts index de312138..9e280ab5 100644 --- a/src/parser/streams/token-stream.ts +++ b/src/parser/streams/token-stream.ts @@ -70,7 +70,7 @@ export class TokenStream implements ITokenStream { private load(): void { if (this.eof) { - this._token = TOKEN(TokenKind.EOF); + this._token = TOKEN(TokenKind.EOF, false); } else { this._token = this.source[this.index]; } diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 6d902768..47fefa46 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -42,16 +42,112 @@ const operators: OpInfo[] = [ { opKind: 'infix', kind: TokenKind.Or2, lbp: 10, rbp: 11 }, ]; -function parsePrefix(s: ITokenStream, info: PrefixInfo): Cst.Node { - throw new Error('todo'); +function parsePrefix(s: ITokenStream, minBp: number): Cst.Node { + const op = s.kind; + s.next(); + + const expr = parsePratt(s, minBp); + + switch (op) { + case TokenKind.Plus: { + return NODE('plus', { expr }); + } + case TokenKind.Minus: { + return NODE('minus', { expr }); + } + case TokenKind.Not: { + return NODE('not', { expr }); + } + default: { + throw new Error('unexpected token'); + } + } } -function parseInfix(s: ITokenStream, left: Cst.Node, info: InfixInfo): Cst.Node { - throw new Error('todo'); +function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { + const op = s.kind; + s.next(); + + if (op === TokenKind.Dot) { + s.expect(TokenKind.Identifier); + const name = s.token.value!; + + return NODE('prop', { + target: left, + name, + }); + } else { + const right = parsePratt(s, minBp); + + switch (op) { + case TokenKind.Asterisk: { + return NODE('mul', { left, right }); + } + case TokenKind.Slash: { + return NODE('div', { left, right }); + } + case TokenKind.Plus: { + return NODE('add', { left, right }); + } + case TokenKind.Minus: { + return NODE('sub', { left, right }); + } + case TokenKind.Lt: { + return NODE('lt', { left, right }); + } + case TokenKind.LtEq: { + return NODE('lteq', { left, right }); + } + case TokenKind.Gt: { + return NODE('gt', { left, right }); + } + case TokenKind.GtEq: { + return NODE('gteq', { left, right }); + } + case TokenKind.Eq2: { + return NODE('eq', { left, right }); + } + case TokenKind.NotEq: { + return NODE('neq', { left, right }); + } + case TokenKind.And2: { + return NODE('and', { left, right }); + } + case TokenKind.Or2: { + return NODE('or', { left, right }); + } + default: { + throw new Error('unexpected token'); + } + } + } } -function parsePostfix(s: ITokenStream, left: Cst.Node, info: PostfixInfo): Cst.Node { - throw new Error('todo'); +function parsePostfix(s: ITokenStream, expr: Cst.Node): Cst.Node { + const op = s.kind; + s.next(); + + switch (op) { + case TokenKind.OpenParen: { + const args = parseCallArgs(s); + + return NODE('call', { + target: expr, + args, + }); + } + case TokenKind.OpenBracket: { + const index = parseExpr(s); + + return NODE('index', { + target: expr, + index, + }); + } + default: { + throw new Error('unexpected token'); + } + } } function parseAtom(s: ITokenStream): Cst.Node { @@ -136,6 +232,13 @@ function parseAtom(s: ITokenStream): Cst.Node { } } +/** + * CallArgs = [Expr *(SEP Expr)] +*/ +function parseCallArgs(s: ITokenStream): Cst.Node[] { + throw new Error('todo'); +} + /** * ```abnf * If = "if" Expr BlockOrStatement *("elif" Expr BlockOrStatement) ["else" BlockOrStatement] @@ -252,7 +355,7 @@ function parsePratt(s: ITokenStream, minBp: number) { const tokenKind = s.kind; const prefix = operators.find((x): x is PrefixInfo => x.opKind === 'prefix' && x.kind === tokenKind); if (prefix != null) { - left = parsePrefix(s, prefix); + left = parsePrefix(s, prefix.bp); } else { left = parseAtom(s); } @@ -266,7 +369,7 @@ function parsePratt(s: ITokenStream, minBp: number) { break; } - left = parsePostfix(s, left, postfix); + left = parsePostfix(s, left); continue; } @@ -276,7 +379,7 @@ function parsePratt(s: ITokenStream, minBp: number) { break; } - left = parseInfix(s, left, infix); + left = parseInfix(s, left, infix.rbp); continue; } diff --git a/src/parser/token.ts b/src/parser/token.ts index 34231ee3..9c50b75e 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -107,6 +107,7 @@ export enum TokenKind { export class Token { constructor( public kind: TokenKind, + public spaceSkipped: boolean, /** for number literal, string literal */ public value?: string, /** for template syntax */ @@ -118,6 +119,6 @@ export class Token { * - opts.value: for number literal, string literal * - opts.children: for template syntax */ -export function TOKEN(kind: TokenKind, opts?: { value?: Token['value'], children?: Token['children'] }) { - return new Token(kind, opts?.value, opts?.children); +export function TOKEN(kind: TokenKind, spaceSkipped: boolean, opts?: { value?: Token['value'], children?: Token['children'] }) { + return new Token(kind, spaceSkipped, opts?.value, opts?.children); } diff --git a/test/parser.ts b/test/parser.ts index 5a227b5c..ef37dce0 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -8,8 +8,8 @@ describe('Scanner', () => { stream.init(); return stream; } - function next(stream: Scanner, kind: TokenKind, value?: string) { - assert.deepStrictEqual(stream.token, TOKEN(kind, { value })); + function next(stream: Scanner, kind: TokenKind, spaceSkipped: boolean, value?: string) { + assert.deepStrictEqual(stream.token, TOKEN(kind, spaceSkipped, { value })); stream.next(); } @@ -26,20 +26,20 @@ describe('Scanner', () => { test.concurrent('eof', async () => { const source = ''; const stream = init(source); - next(stream, TokenKind.EOF); - next(stream, TokenKind.EOF); + next(stream, TokenKind.EOF, false); + next(stream, TokenKind.EOF, false); }); test.concurrent('keyword', async () => { const source = 'if'; const stream = init(source); - next(stream, TokenKind.IfKeyword); - next(stream, TokenKind.EOF); + next(stream, TokenKind.IfKeyword, false); + next(stream, TokenKind.EOF, false); }); test.concurrent('identifier', async () => { const source = 'xyz'; const stream = init(source); - next(stream, TokenKind.Identifier, 'xyz'); - next(stream, TokenKind.EOF); + next(stream, TokenKind.Identifier, false, 'xyz'); + next(stream, TokenKind.EOF, false); }); test.concurrent('invalid token', async () => { const source = '$'; @@ -52,19 +52,19 @@ describe('Scanner', () => { test.concurrent('words', async () => { const source = 'abc xyz'; const stream = init(source); - next(stream, TokenKind.Identifier, 'abc'); - next(stream, TokenKind.Identifier, 'xyz'); - next(stream, TokenKind.EOF); + next(stream, TokenKind.Identifier, false, 'abc'); + next(stream, TokenKind.Identifier, true, 'xyz'); + next(stream, TokenKind.EOF, false); }); test.concurrent('stream', async () => { const source = '@abc() { }'; const stream = init(source); - next(stream, TokenKind.At); - next(stream, TokenKind.Identifier, 'abc'); - next(stream, TokenKind.OpenParen); - next(stream, TokenKind.CloseParen); - next(stream, TokenKind.OpenBrace); - next(stream, TokenKind.CloseBrace); - next(stream, TokenKind.EOF); + next(stream, TokenKind.At, false); + next(stream, TokenKind.Identifier, false, 'abc'); + next(stream, TokenKind.OpenParen, false); + next(stream, TokenKind.CloseParen, false); + next(stream, TokenKind.OpenBrace, true); + next(stream, TokenKind.CloseBrace, true); + next(stream, TokenKind.EOF, false); }); }); From 69cf976db306f56f381a667b9cd84092b5a1f63e Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 19:04:25 +0900 Subject: [PATCH 055/126] suger of op nodes --- src/parser/node.ts | 7 +++++++ src/parser/syntaxes/expressions.ts | 22 +++++++++++----------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/parser/node.ts b/src/parser/node.ts index 544028ce..48485762 100644 --- a/src/parser/node.ts +++ b/src/parser/node.ts @@ -20,6 +20,13 @@ export function NODE(type: string, params: Record): Node { return node as Node; } +export function CALL_NODE(name: string, args: Node[]) { + return NODE('call', { + target: NODE('identifier', { name }), + args, + }); +} + export type Statement = Definition | Return | diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 47fefa46..77beebae 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -2,7 +2,7 @@ import { AiScriptSyntaxError } from '../../error.js'; import { TokenKind } from '../token.js'; import { TokenStream } from '../streams/token-stream.js'; import type { ITokenStream } from '../streams/token-stream.js'; -import { NODE } from '../node.js'; +import { CALL_NODE, NODE } from '../node.js'; import type * as Cst from '../node.js'; import { parseBlockOrStatement } from './statements.js'; @@ -81,34 +81,34 @@ function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { switch (op) { case TokenKind.Asterisk: { - return NODE('mul', { left, right }); + return CALL_NODE('Core:mul', [left, right]); } case TokenKind.Slash: { - return NODE('div', { left, right }); + return CALL_NODE('Core:div', [left, right]); } case TokenKind.Plus: { - return NODE('add', { left, right }); + return CALL_NODE('Core:add', [left, right]); } case TokenKind.Minus: { - return NODE('sub', { left, right }); + return CALL_NODE('Core:sub', [left, right]); } case TokenKind.Lt: { - return NODE('lt', { left, right }); + return CALL_NODE('Core:lt', [left, right]); } case TokenKind.LtEq: { - return NODE('lteq', { left, right }); + return CALL_NODE('Core:lteq', [left, right]); } case TokenKind.Gt: { - return NODE('gt', { left, right }); + return CALL_NODE('Core:gt', [left, right]); } case TokenKind.GtEq: { - return NODE('gteq', { left, right }); + return CALL_NODE('Core:gteq', [left, right]); } case TokenKind.Eq2: { - return NODE('eq', { left, right }); + return CALL_NODE('Core:eq', [left, right]); } case TokenKind.NotEq: { - return NODE('neq', { left, right }); + return CALL_NODE('Core:neq', [left, right]); } case TokenKind.And2: { return NODE('and', { left, right }); From f4ba0c232e674b0e052a7ad25c988e96618a833d Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 20:19:35 +0900 Subject: [PATCH 056/126] fix variable name --- src/parser/syntaxes/statements.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index 4ef7f481..e4aef711 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -171,12 +171,12 @@ export function parseFor(s: ITokenStream): Cst.Node { const name = s.token.value!; s.next(); - let from; + let _from; if ((s.kind as TokenKind) === TokenKind.Eq) { s.next(); - from = parseExpr(s); + _from = parseExpr(s); } else { - from = NODE('num', { value: 0 }); + _from = NODE('num', { value: 0 }); } const to = parseExpr(s); @@ -189,7 +189,7 @@ export function parseFor(s: ITokenStream): Cst.Node { return NODE('for', { var: name, - from, + from: _from, to, for: body, }); From f9a11fb7d4232b9350388b94c1c29fb8497ccd15 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 20:21:33 +0900 Subject: [PATCH 057/126] single quoted string literal --- src/parser/scanner.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 9579a0db..4c9a852d 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -89,7 +89,6 @@ export class Scanner implements ITokenStream { break; } case '"': { - this.stream.next(); token = this.readStringLiteral(spaceSkipped); break; } @@ -384,11 +383,15 @@ export class Scanner implements ITokenStream { private readStringLiteral(spaceSkipped: boolean): Token { let value = ''; + + const literalMark = this.stream.char; + this.stream.next(); + while (true) { if (this.stream.eof) { throw new AiScriptSyntaxError(`unexpected EOF`); } - if (this.stream.char === '"') { + if (this.stream.char === literalMark) { this.stream.next(); break; } From 9043a9414878fe1933e01556ad1a0b944bdee13b Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 21:39:49 +0900 Subject: [PATCH 058/126] ops --- src/parser/syntaxes/expressions.ts | 44 ++++++++++++++++++------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 77beebae..99d31964 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -13,33 +13,38 @@ export function parseExpr(s: ITokenStream) { return parsePratt(s, 0); } +// NOTE: infix(中置演算子)ではlbpを大きくすると右結合、rbpを大きくすると左結合の演算子になります。 +// この値は演算子が左と右に対してどのくらい結合力があるかを表わしています。詳細はpratt parsingの説明ページを参照してください。 + const operators: OpInfo[] = [ - { opKind: 'postfix', kind: TokenKind.OpenParen, bp: 90 }, - { opKind: 'postfix', kind: TokenKind.OpenBracket, bp: 90 }, + { opKind: 'postfix', kind: TokenKind.OpenParen, bp: 20 }, + { opKind: 'postfix', kind: TokenKind.OpenBracket, bp: 20 }, + + { opKind: 'infix', kind: TokenKind.Dot, lbp: 18, rbp: 19 }, - { opKind: 'infix', kind: TokenKind.Dot, lbp: 80, rbp: 81 }, + { opKind: 'infix', kind: TokenKind.Hat, lbp: 17, rbp: 16 }, - { opKind: 'prefix', kind: TokenKind.Plus, bp: 70 }, - { opKind: 'prefix', kind: TokenKind.Minus, bp: 70 }, - { opKind: 'prefix', kind: TokenKind.Not, bp: 70 }, + { opKind: 'prefix', kind: TokenKind.Plus, bp: 14 }, + { opKind: 'prefix', kind: TokenKind.Minus, bp: 14 }, + { opKind: 'prefix', kind: TokenKind.Not, bp: 14 }, - { opKind: 'infix', kind: TokenKind.Asterisk, lbp: 60, rbp: 61 }, - { opKind: 'infix', kind: TokenKind.Slash, lbp: 60, rbp: 61 }, + { opKind: 'infix', kind: TokenKind.Asterisk, lbp: 12, rbp: 13 }, + { opKind: 'infix', kind: TokenKind.Slash, lbp: 12, rbp: 13 }, - { opKind: 'infix', kind: TokenKind.Plus, lbp: 50, rbp: 51 }, - { opKind: 'infix', kind: TokenKind.Minus, lbp: 50, rbp: 51 }, + { opKind: 'infix', kind: TokenKind.Plus, lbp: 10, rbp: 11 }, + { opKind: 'infix', kind: TokenKind.Minus, lbp: 10, rbp: 11 }, - { opKind: 'infix', kind: TokenKind.Lt, lbp: 40, rbp: 41 }, - { opKind: 'infix', kind: TokenKind.LtEq, lbp: 40, rbp: 41 }, - { opKind: 'infix', kind: TokenKind.Gt, lbp: 40, rbp: 41 }, - { opKind: 'infix', kind: TokenKind.GtEq, lbp: 40, rbp: 41 }, + { opKind: 'infix', kind: TokenKind.Lt, lbp: 8, rbp: 9 }, + { opKind: 'infix', kind: TokenKind.LtEq, lbp: 8, rbp: 9 }, + { opKind: 'infix', kind: TokenKind.Gt, lbp: 8, rbp: 9 }, + { opKind: 'infix', kind: TokenKind.GtEq, lbp: 8, rbp: 9 }, - { opKind: 'infix', kind: TokenKind.Eq2, lbp: 30, rbp: 31 }, - { opKind: 'infix', kind: TokenKind.NotEq, lbp: 30, rbp: 31 }, + { opKind: 'infix', kind: TokenKind.Eq2, lbp: 6, rbp: 7 }, + { opKind: 'infix', kind: TokenKind.NotEq, lbp: 6, rbp: 7 }, - { opKind: 'infix', kind: TokenKind.And2, lbp: 20, rbp: 21 }, + { opKind: 'infix', kind: TokenKind.And2, lbp: 4, rbp: 5 }, - { opKind: 'infix', kind: TokenKind.Or2, lbp: 10, rbp: 11 }, + { opKind: 'infix', kind: TokenKind.Or2, lbp: 2, rbp: 3 }, ]; function parsePrefix(s: ITokenStream, minBp: number): Cst.Node { @@ -80,6 +85,9 @@ function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { const right = parsePratt(s, minBp); switch (op) { + case TokenKind.Hat: { + return CALL_NODE('Core:pow', [left, right]); + } case TokenKind.Asterisk: { return CALL_NODE('Core:mul', [left, right]); } From 326e12fcd436638023efd4656fd5e1a0844fb0a8 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 24 Sep 2023 22:09:34 +0900 Subject: [PATCH 059/126] function wip --- src/parser/syntaxes/function.ts | 45 +++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/src/parser/syntaxes/function.ts b/src/parser/syntaxes/function.ts index b2a83aad..a7df8255 100644 --- a/src/parser/syntaxes/function.ts +++ b/src/parser/syntaxes/function.ts @@ -4,16 +4,57 @@ import { TokenStream } from '../streams/token-stream.js'; import type { ITokenStream } from '../streams/token-stream.js'; import { NODE } from '../node.js'; import type * as Cst from '../node.js'; +import { parseBlock } from './common.js'; /** * ```abnf - * FnDef = "@" IDENT "(" Args ")" [":" Type] Block + * FnDef = "@" IDENT "(" Params ")" [":" Type] Block * ``` */ export function parseFnDef(s: ITokenStream): Cst.Node { - throw new Error('todo'); + s.nextWith(TokenKind.At); + + s.expect(TokenKind.Identifier); + const name = s.token.value; + s.next(); + + s.nextWith(TokenKind.OpenParen); + + const params = parseParams(s); + + s.nextWith(TokenKind.CloseParen); + + // type + + const body = parseBlock(s); + + return NODE('def', { + name, + expr: NODE('fn', { args: params ?? [], retType: undefined, children: body ?? [] }), + mut: false, + attr: [] + }); } +/** + * ```abnf + * FnExpr = "@(" Params ")" [":" Type] Block + * ``` +*/ export function parseFnExpr(s: ITokenStream): Cst.Node { + s.nextWith(TokenKind.OpenAtParen); + + const params = parseParams(s); + + s.nextWith(TokenKind.CloseParen); + + // type + + const body = parseBlock(s); + + return NODE('fn', { args: params ?? [], retType: undefined, children: body ?? [] }); +} + +function parseParams(s: ITokenStream): Cst.Node[] { throw new Error('todo'); } From 0cf0bec73f6337705081bfca1c1b3cc197b42ecc Mon Sep 17 00:00:00 2001 From: marihachi Date: Tue, 26 Sep 2023 21:36:00 +0900 Subject: [PATCH 060/126] enhance CharStream --- src/parser/streams/char-stream.ts | 55 ++++++++++++++++++++++++------- test/parser.ts | 51 ++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 11 deletions(-) diff --git a/src/parser/streams/char-stream.ts b/src/parser/streams/char-stream.ts index 4f5f2012..4deecb7d 100644 --- a/src/parser/streams/char-stream.ts +++ b/src/parser/streams/char-stream.ts @@ -2,21 +2,28 @@ * 入力文字列から文字を読み取るクラス */ export class CharStream { - private source: string; - private index: number; + private pages: Map; + private firstPageIndex: number; + private lastPageIndex: number; + private pageIndex: number; + private address: number; private _char?: string; constructor(source: string) { - this.source = source; - this.index = 0; + this.pages = new Map(); + this.pages.set(0, source); + this.firstPageIndex = 0; + this.lastPageIndex = 0; + this.pageIndex = 0; + this.address = 0; } public init(): void { - this.load(); + this.loadChar(); } public get eof(): boolean { - return (this.index >= this.source.length); + return this.endOfPage && this.isLastPage; } public get char(): string { @@ -31,15 +38,41 @@ export class CharStream { } public next(): void { - if (!this.eof) { - this.index++; + if (!this.endOfPage) { + this.address++; + } else if (!this.isLastPage) { + this.pageIndex++; + this.address = 0; } - this.load(); + this.loadChar(); + } + + public prev(): void { + if (this.address > 0) { + this.address--; + } else if (!this.isFirstPage) { + this.pageIndex--; + this.address = this.pages.get(this.pageIndex)!.length - 1; + } + this.loadChar(); + } + + private get isFirstPage() { + return (this.pageIndex <= this.firstPageIndex); + } + + private get isLastPage() { + return (this.pageIndex >= this.lastPageIndex); + } + + private get endOfPage(): boolean { + const page = this.pages.get(this.pageIndex)!; + return (this.address >= page.length); } - private load(): void { + private loadChar(): void { if (!this.eof) { - this._char = this.source[this.index]; + this._char = this.pages.get(this.pageIndex)![this.address]; } } } diff --git a/test/parser.ts b/test/parser.ts index ef37dce0..e41f97a1 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -1,6 +1,57 @@ import * as assert from 'assert'; import { Scanner } from '../src/parser/scanner'; import { TOKEN, TokenKind } from '../src/parser/token'; +import { CharStream } from '../src/parser/streams/char-stream'; + +describe('CharStream', () => { + test.concurrent('char', async () => { + const source = 'abc'; + const stream = new CharStream(source); + stream.init(); + assert.strictEqual('a', stream.char); + }); + + test.concurrent('next', async () => { + const source = 'abc'; + const stream = new CharStream(source); + stream.init(); + stream.next(); + assert.strictEqual('b', stream.char); + }); + + describe('prev', () => { + test.concurrent('move', async () => { + const source = 'abc'; + const stream = new CharStream(source); + stream.init(); + stream.next(); + assert.strictEqual('b', stream.char); + stream.prev(); + assert.strictEqual('a', stream.char); + }); + + test.concurrent('境界外には移動しない', async () => { + const source = 'abc'; + const stream = new CharStream(source); + stream.init(); + stream.prev(); + assert.strictEqual('a', stream.char); + }); + }); + + test.concurrent('eof', async () => { + const source = 'abc'; + const stream = new CharStream(source); + stream.init(); + assert.strictEqual(false, stream.eof); + stream.next(); + assert.strictEqual(false, stream.eof); + stream.next(); + assert.strictEqual(false, stream.eof); + stream.next(); + assert.strictEqual(true, stream.eof); + }); +}); describe('Scanner', () => { function init(source: string) { From b1de0e88a5eba511ac249e58d0dffa84156fe657 Mon Sep 17 00:00:00 2001 From: marihachi Date: Tue, 26 Sep 2023 21:43:38 +0900 Subject: [PATCH 061/126] wip --- src/parser/scanner.ts | 7 +------ src/parser/syntaxes/function.ts | 3 ++- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 4c9a852d..c325c419 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -234,12 +234,7 @@ export class Scanner implements ITokenStream { } case '@': { this.stream.next(); - if ((this.stream.char as string) === '(') { - this.stream.next(); - token = TOKEN(TokenKind.OpenAtParen, spaceSkipped); - } else { - token = TOKEN(TokenKind.At, spaceSkipped); - } + token = TOKEN(TokenKind.At, spaceSkipped); break; } case '[': { diff --git a/src/parser/syntaxes/function.ts b/src/parser/syntaxes/function.ts index a7df8255..045fe8b2 100644 --- a/src/parser/syntaxes/function.ts +++ b/src/parser/syntaxes/function.ts @@ -42,7 +42,8 @@ export function parseFnDef(s: ITokenStream): Cst.Node { * ``` */ export function parseFnExpr(s: ITokenStream): Cst.Node { - s.nextWith(TokenKind.OpenAtParen); + s.nextWith(TokenKind.At); + s.nextWith(TokenKind.OpenParen); const params = parseParams(s); From a07c3008b475d4af360febd9e52f109e78e0aac5 Mon Sep 17 00:00:00 2001 From: marihachi Date: Tue, 26 Sep 2023 22:33:37 +0900 Subject: [PATCH 062/126] doc --- docs/parser/scanner.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 docs/parser/scanner.md diff --git a/docs/parser/scanner.md b/docs/parser/scanner.md new file mode 100644 index 00000000..6e78da1b --- /dev/null +++ b/docs/parser/scanner.md @@ -0,0 +1,9 @@ +# Scanner 設計資料 +作成者: marihachi + +## 現在のトークンと先読みされたトークン +_tokensの0番には現在のトークンが保持される。また、トークンが先読みされた場合は1番以降にそれらのトークンが保持されていくことになる。 +例えば、次のトークンを1つ先読みした場合は0番に現在のトークンが入り1番に先読みされたトークンが入る。 + +nextメソッドで現在位置が移動すると、それまで0番にあったトークン(現在のトークン)は配列から削除され、1番にあった要素は現在のトークンとなる。 +配列から全てのトークンが無くなった場合はトークンの読み取りが実行される。 From a06a532b3d67fc7abfdfe357288701a1c505bea7 Mon Sep 17 00:00:00 2001 From: marihachi Date: Thu, 28 Sep 2023 20:41:14 +0900 Subject: [PATCH 063/126] lookahead scan --- src/parser/scanner.ts | 29 +++++++++++++++++++++++------ test/parser.ts | 8 ++++++++ 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index c325c419..1105235a 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -13,7 +13,7 @@ const wordChar = /^[A-Za-z0-9_]$/; */ export class Scanner implements ITokenStream { private stream: CharStream; - private _token?: Token; + private _tokens: Token[] = []; constructor(source: string) constructor(stream: CharStream) @@ -27,7 +27,7 @@ export class Scanner implements ITokenStream { } public init(): void { - this._token = this.readToken(); + this._tokens.push(this.readToken()); } public get eof(): boolean { @@ -35,10 +35,10 @@ export class Scanner implements ITokenStream { } public get token(): Token { - if (this._token == null) { + if (this._tokens.length == 0) { throw new Error('stream is not initialized yet'); } - return this._token; + return this._tokens[0]!; } public get kind(): TokenKind { @@ -46,10 +46,27 @@ export class Scanner implements ITokenStream { } public next(): void { - if (this._token == null) { + if (this._tokens.length == 0) { throw new Error('stream is not initialized yet'); } - this._token = this.readToken(); + + this._tokens.shift(); + + if (this._tokens.length == 0) { + this._tokens.push(this.readToken()); + } + } + + public lookahead(offset: number): Token { + if (this._tokens.length == 0) { + throw new Error('stream is not initialized yet'); + } + + while (this._tokens.length <= offset) { + this._tokens.push(this.readToken()); + } + + return this._tokens[offset]!; } public expect(kind: TokenKind): void { diff --git a/test/parser.ts b/test/parser.ts index e41f97a1..d13abbe2 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -118,4 +118,12 @@ describe('Scanner', () => { next(stream, TokenKind.CloseBrace, true); next(stream, TokenKind.EOF, false); }); + test.concurrent('lookahead', async () => { + const source = '@abc() { }'; + const stream = init(source); + assert.deepStrictEqual(stream.lookahead(1), TOKEN(TokenKind.Identifier, false, { value: 'abc' })); + next(stream, TokenKind.At, false); + next(stream, TokenKind.Identifier, false, 'abc'); + next(stream, TokenKind.OpenParen, false); + }); }); From 7a51bb2ba1944fc7ce55925b35f463060dcc9817 Mon Sep 17 00:00:00 2001 From: marihachi Date: Thu, 28 Sep 2023 20:42:59 +0900 Subject: [PATCH 064/126] lint --- src/parser/scanner.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 1105235a..14ca7c2e 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -35,7 +35,7 @@ export class Scanner implements ITokenStream { } public get token(): Token { - if (this._tokens.length == 0) { + if (this._tokens.length === 0) { throw new Error('stream is not initialized yet'); } return this._tokens[0]!; @@ -46,19 +46,19 @@ export class Scanner implements ITokenStream { } public next(): void { - if (this._tokens.length == 0) { + if (this._tokens.length === 0) { throw new Error('stream is not initialized yet'); } this._tokens.shift(); - if (this._tokens.length == 0) { + if (this._tokens.length === 0) { this._tokens.push(this.readToken()); } } public lookahead(offset: number): Token { - if (this._tokens.length == 0) { + if (this._tokens.length === 0) { throw new Error('stream is not initialized yet'); } From 07570aa329a3c3198a2b9829d3bef5abafe3460b Mon Sep 17 00:00:00 2001 From: marihachi Date: Thu, 28 Sep 2023 21:08:09 +0900 Subject: [PATCH 065/126] lookahead --- src/parser/streams/token-stream.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/parser/streams/token-stream.ts b/src/parser/streams/token-stream.ts index 9e280ab5..20ba7d54 100644 --- a/src/parser/streams/token-stream.ts +++ b/src/parser/streams/token-stream.ts @@ -10,6 +10,7 @@ export interface ITokenStream { get token(): Token; get kind(): TokenKind; next(): void; + lookahead(offset: number): Token; expect(kind: TokenKind): void; nextWith(kind: TokenKind): void; } @@ -57,6 +58,14 @@ export class TokenStream implements ITokenStream { this.load(); } + public lookahead(offset: number): Token { + if (this.index + offset < this.source.length) { + return this.source[this.index + offset]!; + } else { + return TOKEN(TokenKind.EOF, false); + } + } + public expect(kind: TokenKind): void { if (this.kind !== kind) { throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.kind]}`); From 1ec51bdfef1062da0878c1d1f6598734417705d1 Mon Sep 17 00:00:00 2001 From: marihachi Date: Thu, 28 Sep 2023 21:10:32 +0900 Subject: [PATCH 066/126] parser wip: fn --- src/parser/syntaxes/common.ts | 4 ++ src/parser/syntaxes/expressions.ts | 23 ++++++++++- src/parser/syntaxes/function.ts | 61 ------------------------------ src/parser/syntaxes/statements.ts | 52 +++++++++++++++++++------ src/parser/syntaxes/toplevel.ts | 4 +- 5 files changed, 67 insertions(+), 77 deletions(-) delete mode 100644 src/parser/syntaxes/function.ts diff --git a/src/parser/syntaxes/common.ts b/src/parser/syntaxes/common.ts index cd1d7023..1c75b214 100644 --- a/src/parser/syntaxes/common.ts +++ b/src/parser/syntaxes/common.ts @@ -4,6 +4,10 @@ import { parseStatement } from './statements.js'; import type * as Cst from '../node.js'; +export function parseParams(s: ITokenStream): Cst.Node[] { + throw new Error('todo'); +} + /** * ```abnf * Block = "{" *Statement "}" diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 99d31964..dbe6e3de 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -6,8 +6,7 @@ import { CALL_NODE, NODE } from '../node.js'; import type * as Cst from '../node.js'; import { parseBlockOrStatement } from './statements.js'; -import { parseBlock } from './common.js'; -import { parseFnExpr } from './function.js'; +import { parseBlock, parseParams } from './common.js'; export function parseExpr(s: ITokenStream) { return parsePratt(s, 0); @@ -274,6 +273,26 @@ export function parseIf(s: ITokenStream): Cst.Node { return NODE('if', { cond, then, elseif, else: _else }); } +/** + * ```abnf + * FnExpr = "@(" Params ")" [":" Type] Block + * ``` +*/ +export function parseFnExpr(s: ITokenStream): Cst.Node { + s.nextWith(TokenKind.At); + s.nextWith(TokenKind.OpenParen); + + const params = parseParams(s); + + s.nextWith(TokenKind.CloseParen); + + // type + + const body = parseBlock(s); + + return NODE('fn', { args: params ?? [], retType: undefined, children: body ?? [] }); +} + export function parseMatch(s: ITokenStream): Cst.Node { throw new Error('todo'); } diff --git a/src/parser/syntaxes/function.ts b/src/parser/syntaxes/function.ts deleted file mode 100644 index 045fe8b2..00000000 --- a/src/parser/syntaxes/function.ts +++ /dev/null @@ -1,61 +0,0 @@ -import { AiScriptSyntaxError } from '../../error.js'; -import { TokenKind } from '../token.js'; -import { TokenStream } from '../streams/token-stream.js'; -import type { ITokenStream } from '../streams/token-stream.js'; -import { NODE } from '../node.js'; -import type * as Cst from '../node.js'; -import { parseBlock } from './common.js'; - -/** - * ```abnf - * FnDef = "@" IDENT "(" Params ")" [":" Type] Block - * ``` -*/ -export function parseFnDef(s: ITokenStream): Cst.Node { - s.nextWith(TokenKind.At); - - s.expect(TokenKind.Identifier); - const name = s.token.value; - s.next(); - - s.nextWith(TokenKind.OpenParen); - - const params = parseParams(s); - - s.nextWith(TokenKind.CloseParen); - - // type - - const body = parseBlock(s); - - return NODE('def', { - name, - expr: NODE('fn', { args: params ?? [], retType: undefined, children: body ?? [] }), - mut: false, - attr: [] - }); -} - -/** - * ```abnf - * FnExpr = "@(" Params ")" [":" Type] Block - * ``` -*/ -export function parseFnExpr(s: ITokenStream): Cst.Node { - s.nextWith(TokenKind.At); - s.nextWith(TokenKind.OpenParen); - - const params = parseParams(s); - - s.nextWith(TokenKind.CloseParen); - - // type - - const body = parseBlock(s); - - return NODE('fn', { args: params ?? [], retType: undefined, children: body ?? [] }); -} - -function parseParams(s: ITokenStream): Cst.Node[] { - throw new Error('todo'); -} diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index e4aef711..b111e1ab 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -5,9 +5,8 @@ import type { ITokenStream } from '../streams/token-stream.js'; import { NODE } from '../node.js'; import type * as Cst from '../node.js'; -import { parseBlock, parseType } from './common.js'; +import { parseBlock, parseParams, parseType } from './common.js'; import { parseExpr } from './expressions.js'; -import { parseFnDef } from './function.js'; /** * ```abnf @@ -22,7 +21,10 @@ export function parseStatement(s: ITokenStream): Cst.Node { return parseVarDef(s); } case TokenKind.At: { - return parseFnDef(s); + if (s.lookahead(1).kind === TokenKind.Identifier) { + return parseFnDef(s); + } + break; } case TokenKind.Out: { return parseOut(s); @@ -48,15 +50,13 @@ export function parseStatement(s: ITokenStream): Cst.Node { s.next(); return NODE('continue', {}); } - default: { - const expr = parseExpr(s); - const assign = tryParseAssign(s, expr); - if (assign) { - return assign; - } - return expr; - } } + const expr = parseExpr(s); + const assign = tryParseAssign(s, expr); + if (assign) { + return assign; + } + return expr; } /** @@ -98,6 +98,36 @@ export function parseVarDef(s: ITokenStream): Cst.Node { return NODE('def', { name, varType: ty, expr, mut, attr: [] }); } +/** + * ```abnf + * FnDef = "@" IDENT "(" Params ")" [":" Type] Block + * ``` +*/ +export function parseFnDef(s: ITokenStream): Cst.Node { + s.nextWith(TokenKind.At); + + s.expect(TokenKind.Identifier); + const name = s.token.value; + s.next(); + + s.nextWith(TokenKind.OpenParen); + + const params = parseParams(s); + + s.nextWith(TokenKind.CloseParen); + + // type + + const body = parseBlock(s); + + return NODE('def', { + name, + expr: NODE('fn', { args: params ?? [], retType: undefined, children: body ?? [] }), + mut: false, + attr: [] + }); +} + /** * ```abnf * Out = "<:" Expr diff --git a/src/parser/syntaxes/toplevel.ts b/src/parser/syntaxes/toplevel.ts index 0826d180..84fec41f 100644 --- a/src/parser/syntaxes/toplevel.ts +++ b/src/parser/syntaxes/toplevel.ts @@ -4,9 +4,7 @@ import { TokenStream } from '../streams/token-stream.js'; import type { ITokenStream } from '../streams/token-stream.js'; import { NODE } from '../node.js'; import type * as Cst from '../node.js'; - -import { parseStatement, parseVarDef } from './statements.js'; -import { parseFnDef } from './function.js'; +import { parseFnDef, parseStatement, parseVarDef } from './statements.js'; /** * ```abnf From 43dd075e193e0b31f45b15b2b72c2cc4a61c10a4 Mon Sep 17 00:00:00 2001 From: marihachi Date: Thu, 28 Sep 2023 21:38:11 +0900 Subject: [PATCH 067/126] refactor --- src/parser/node.ts | 2 +- src/parser/scanner.ts | 19 ++++++++++--------- src/parser/streams/char-stream.ts | 4 ++-- src/parser/streams/token-stream.ts | 2 +- src/parser/syntaxes/common.ts | 2 +- src/parser/syntaxes/expressions.ts | 18 +++++++++--------- src/parser/syntaxes/statements.ts | 11 +++++------ src/parser/syntaxes/toplevel.ts | 9 ++++----- src/parser/token.ts | 2 +- 9 files changed, 34 insertions(+), 35 deletions(-) diff --git a/src/parser/node.ts b/src/parser/node.ts index 48485762..161abd06 100644 --- a/src/parser/node.ts +++ b/src/parser/node.ts @@ -20,7 +20,7 @@ export function NODE(type: string, params: Record): Node { return node as Node; } -export function CALL_NODE(name: string, args: Node[]) { +export function CALL_NODE(name: string, args: Node[]): Node { return NODE('call', { target: NODE('identifier', { name }), args, diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 14ca7c2e..9d207d8c 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -1,7 +1,8 @@ import { AiScriptSyntaxError } from '../error.js'; import { CharStream } from './streams/char-stream.js'; -import type { ITokenStream } from './streams/token-stream.js'; import { TOKEN, TokenKind } from './token.js'; + +import type { ITokenStream } from './streams/token-stream.js'; import type { Token } from './token.js'; const spacingChars = [' ', '\t', '\r', '\n']; @@ -36,7 +37,7 @@ export class Scanner implements ITokenStream { public get token(): Token { if (this._tokens.length === 0) { - throw new Error('stream is not initialized yet'); + throw new Error('scanner is not initialized yet'); } return this._tokens[0]!; } @@ -47,7 +48,7 @@ export class Scanner implements ITokenStream { public next(): void { if (this._tokens.length === 0) { - throw new Error('stream is not initialized yet'); + throw new Error('scanner is not initialized yet'); } this._tokens.shift(); @@ -59,7 +60,7 @@ export class Scanner implements ITokenStream { public lookahead(offset: number): Token { if (this._tokens.length === 0) { - throw new Error('stream is not initialized yet'); + throw new Error('scanner is not initialized yet'); } while (this._tokens.length <= offset) { @@ -401,7 +402,7 @@ export class Scanner implements ITokenStream { while (true) { if (this.stream.eof) { - throw new AiScriptSyntaxError(`unexpected EOF`); + throw new AiScriptSyntaxError('unexpected EOF'); } if (this.stream.char === literalMark) { this.stream.next(); @@ -424,7 +425,7 @@ export class Scanner implements ITokenStream { case 'string': { // テンプレートの終了が無いままEOFに達した if (this.stream.eof) { - throw new AiScriptSyntaxError(`unexpected EOF`); + throw new AiScriptSyntaxError('unexpected EOF'); } // テンプレートの終了 if (this.stream.char === '`') { @@ -452,7 +453,7 @@ export class Scanner implements ITokenStream { case 'expr': { // 埋め込み式の終端記号が無いままEOFに達した if (this.stream.eof) { - throw new AiScriptSyntaxError(`unexpected EOF`); + throw new AiScriptSyntaxError('unexpected EOF'); } // skip spasing if (spacingChars.includes(this.stream.char)) { @@ -477,7 +478,7 @@ export class Scanner implements ITokenStream { return TOKEN(TokenKind.Template, spaceSkipped, { children: elements }); } - private skipCommentLine() { + private skipCommentLine(): void { while (true) { if (this.stream.eof) { break; @@ -490,7 +491,7 @@ export class Scanner implements ITokenStream { } } - private skipCommentRange() { + private skipCommentRange(): void { while (true) { if (this.stream.eof) { break; diff --git a/src/parser/streams/char-stream.ts b/src/parser/streams/char-stream.ts index 4deecb7d..d0b30849 100644 --- a/src/parser/streams/char-stream.ts +++ b/src/parser/streams/char-stream.ts @@ -57,11 +57,11 @@ export class CharStream { this.loadChar(); } - private get isFirstPage() { + private get isFirstPage(): boolean { return (this.pageIndex <= this.firstPageIndex); } - private get isLastPage() { + private get isLastPage(): boolean { return (this.pageIndex >= this.lastPageIndex); } diff --git a/src/parser/streams/token-stream.ts b/src/parser/streams/token-stream.ts index 20ba7d54..c4ca6093 100644 --- a/src/parser/streams/token-stream.ts +++ b/src/parser/streams/token-stream.ts @@ -28,7 +28,7 @@ export class TokenStream implements ITokenStream { this.index = 0; } - public init() { + public init(): void { this.load(); } diff --git a/src/parser/syntaxes/common.ts b/src/parser/syntaxes/common.ts index 1c75b214..d5d6bd59 100644 --- a/src/parser/syntaxes/common.ts +++ b/src/parser/syntaxes/common.ts @@ -1,7 +1,7 @@ import { TokenKind } from '../token.js'; -import type { ITokenStream } from '../streams/token-stream.js'; import { parseStatement } from './statements.js'; +import type { ITokenStream } from '../streams/token-stream.js'; import type * as Cst from '../node.js'; export function parseParams(s: ITokenStream): Cst.Node[] { diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index dbe6e3de..3494c2c3 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -1,14 +1,14 @@ import { AiScriptSyntaxError } from '../../error.js'; -import { TokenKind } from '../token.js'; -import { TokenStream } from '../streams/token-stream.js'; -import type { ITokenStream } from '../streams/token-stream.js'; import { CALL_NODE, NODE } from '../node.js'; -import type * as Cst from '../node.js'; - -import { parseBlockOrStatement } from './statements.js'; +import { TokenStream } from '../streams/token-stream.js'; +import { TokenKind } from '../token.js'; import { parseBlock, parseParams } from './common.js'; +import { parseBlockOrStatement } from './statements.js'; + +import type * as Cst from '../node.js'; +import type { ITokenStream } from '../streams/token-stream.js'; -export function parseExpr(s: ITokenStream) { +export function parseExpr(s: ITokenStream): Cst.Node { return parsePratt(s, 0); } @@ -256,7 +256,7 @@ export function parseIf(s: ITokenStream): Cst.Node { const cond = parseExpr(s); const then = parseBlockOrStatement(s); - const elseif: { cond: any, then: any }[] = []; + const elseif: { cond: Cst.Node, then: Cst.Node }[] = []; while (s.kind === TokenKind.ElifKeyword) { s.next(); const elifCond = parseExpr(s); @@ -373,7 +373,7 @@ type InfixInfo = { opKind: 'infix', kind: TokenKind, lbp: number, rbp: number }; type PostfixInfo = { opKind: 'postfix', kind: TokenKind, bp: number }; type OpInfo = PrefixInfo | InfixInfo | PostfixInfo; -function parsePratt(s: ITokenStream, minBp: number) { +function parsePratt(s: ITokenStream, minBp: number): Cst.Node { // pratt parsing // https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index b111e1ab..4b7c9975 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -1,13 +1,12 @@ import { AiScriptSyntaxError } from '../../error.js'; -import { TokenKind } from '../token.js'; -import { TokenStream } from '../streams/token-stream.js'; -import type { ITokenStream } from '../streams/token-stream.js'; import { NODE } from '../node.js'; -import type * as Cst from '../node.js'; - +import { TokenKind } from '../token.js'; import { parseBlock, parseParams, parseType } from './common.js'; import { parseExpr } from './expressions.js'; +import type * as Cst from '../node.js'; +import type { ITokenStream } from '../streams/token-stream.js'; + /** * ```abnf * Statement = VarDef / FnDef / Out / Return / Attr / Each / For / Loop @@ -124,7 +123,7 @@ export function parseFnDef(s: ITokenStream): Cst.Node { name, expr: NODE('fn', { args: params ?? [], retType: undefined, children: body ?? [] }), mut: false, - attr: [] + attr: [], }); } diff --git a/src/parser/syntaxes/toplevel.ts b/src/parser/syntaxes/toplevel.ts index 84fec41f..d22ecda2 100644 --- a/src/parser/syntaxes/toplevel.ts +++ b/src/parser/syntaxes/toplevel.ts @@ -1,11 +1,10 @@ -import { AiScriptSyntaxError } from '../../error.js'; -import { TokenKind } from '../token.js'; -import { TokenStream } from '../streams/token-stream.js'; -import type { ITokenStream } from '../streams/token-stream.js'; import { NODE } from '../node.js'; -import type * as Cst from '../node.js'; +import { TokenKind } from '../token.js'; import { parseFnDef, parseStatement, parseVarDef } from './statements.js'; +import type * as Cst from '../node.js'; +import type { ITokenStream } from '../streams/token-stream.js'; + /** * ```abnf * TopLevel = *(Namespace / Meta / Statement) diff --git a/src/parser/token.ts b/src/parser/token.ts index 9c50b75e..e03a01d2 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -119,6 +119,6 @@ export class Token { * - opts.value: for number literal, string literal * - opts.children: for template syntax */ -export function TOKEN(kind: TokenKind, spaceSkipped: boolean, opts?: { value?: Token['value'], children?: Token['children'] }) { +export function TOKEN(kind: TokenKind, spaceSkipped: boolean, opts?: { value?: Token['value'], children?: Token['children'] }): Token { return new Token(kind, spaceSkipped, opts?.value, opts?.children); } From a6e4cdebd1f0fe5ee048a75d0a4c1d1abdfcc7e2 Mon Sep 17 00:00:00 2001 From: marihachi Date: Fri, 29 Sep 2023 20:46:18 +0900 Subject: [PATCH 068/126] fix bug --- src/parser/syntaxes/expressions.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 3494c2c3..635e7b61 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -75,6 +75,7 @@ function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { if (op === TokenKind.Dot) { s.expect(TokenKind.Identifier); const name = s.token.value!; + s.next(); return NODE('prop', { target: left, From 6fdac7de424ca845dfb501b2d83ef3728afc227e Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 00:20:25 +0900 Subject: [PATCH 069/126] parser: function call --- src/parser/syntaxes/expressions.ts | 51 +++++++++++++++++++----------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 635e7b61..5053f739 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -49,7 +49,6 @@ const operators: OpInfo[] = [ function parsePrefix(s: ITokenStream, minBp: number): Cst.Node { const op = s.kind; s.next(); - const expr = parsePratt(s, minBp); switch (op) { @@ -63,16 +62,15 @@ function parsePrefix(s: ITokenStream, minBp: number): Cst.Node { return NODE('not', { expr }); } default: { - throw new Error('unexpected token'); + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[op]}`); } } } function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { const op = s.kind; - s.next(); - if (op === TokenKind.Dot) { + s.next(); s.expect(TokenKind.Identifier); const name = s.token.value!; s.next(); @@ -82,6 +80,7 @@ function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { name, }); } else { + s.next(); const right = parsePratt(s, minBp); switch (op) { @@ -125,7 +124,7 @@ function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { return NODE('or', { left, right }); } default: { - throw new Error('unexpected token'); + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[op]}`); } } } @@ -133,19 +132,14 @@ function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { function parsePostfix(s: ITokenStream, expr: Cst.Node): Cst.Node { const op = s.kind; - s.next(); - switch (op) { case TokenKind.OpenParen: { - const args = parseCallArgs(s); - - return NODE('call', { - target: expr, - args, - }); + return parseCall(s, expr); } case TokenKind.OpenBracket: { + s.next(); const index = parseExpr(s); + s.nextWith(TokenKind.CloseBracket); return NODE('index', { target: expr, @@ -153,7 +147,7 @@ function parsePostfix(s: ITokenStream, expr: Cst.Node): Cst.Node { }); } default: { - throw new Error('unexpected token'); + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[op]}`); } } } @@ -235,16 +229,37 @@ function parseAtom(s: ITokenStream): Cst.Node { return parseReference(s); } default: { - throw new Error('todo'); + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[s.token.kind]}`); } } } /** - * CallArgs = [Expr *(SEP Expr)] + * Call = "(" [Expr *(("," / +(" " / "\t")) Expr)] ")" */ -function parseCallArgs(s: ITokenStream): Cst.Node[] { - throw new Error('todo'); +function parseCall(s: ITokenStream, target: Cst.Node): Cst.Node { + const args: Cst.Node[] = []; + s.nextWith(TokenKind.OpenParen); + while (true) { + if (s.kind == TokenKind.CloseParen) { + break; + } + // separator + if (args.length > 0) { + if (s.kind === TokenKind.Comma) { + s.next(); + } else if (!s.token.spaceSkipped) { + throw new AiScriptSyntaxError('separator required'); + } + } + args.push(parseExpr(s)); + } + s.nextWith(TokenKind.CloseParen); + + return NODE('call', { + target, + args, + }); } /** From 67b7cc2fb83261ef8bf5dc0b2684c97be34f3193 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 01:19:06 +0900 Subject: [PATCH 070/126] parser: function params --- src/parser/syntaxes/common.ts | 31 ++++++++++++++++++++++++++++-- src/parser/syntaxes/expressions.ts | 26 ++++++++++++------------- src/parser/syntaxes/statements.ts | 6 +----- 3 files changed, 42 insertions(+), 21 deletions(-) diff --git a/src/parser/syntaxes/common.ts b/src/parser/syntaxes/common.ts index d5d6bd59..b4cef32b 100644 --- a/src/parser/syntaxes/common.ts +++ b/src/parser/syntaxes/common.ts @@ -1,11 +1,38 @@ import { TokenKind } from '../token.js'; import { parseStatement } from './statements.js'; +import { AiScriptSyntaxError } from '../../error.js'; import type { ITokenStream } from '../streams/token-stream.js'; import type * as Cst from '../node.js'; -export function parseParams(s: ITokenStream): Cst.Node[] { - throw new Error('todo'); +/** + * ```abnf + * Params = "(" [IDENT *(SEP IDENT)] ")" + * ``` +*/ +export function parseParams(s: ITokenStream): { name: string }[] { + const items: { name: string }[] = []; + + s.nextWith(TokenKind.OpenParen); + + while (s.kind !== TokenKind.CloseParen) { + // separator + if (items.length > 0) { + if (s.kind === TokenKind.Comma) { + s.next(); + } else if (!s.token.spaceSkipped) { + throw new AiScriptSyntaxError('separator token expected'); + } + } + + s.expect(TokenKind.Identifier); + items.push({ name: s.token.value! }); + s.next(); + } + + s.nextWith(TokenKind.CloseParen); + + return items; } /** diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 5053f739..cb872080 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -235,30 +235,31 @@ function parseAtom(s: ITokenStream): Cst.Node { } /** - * Call = "(" [Expr *(("," / +(" " / "\t")) Expr)] ")" + * Call = "(" [Expr *(SEP Expr)] ")" */ function parseCall(s: ITokenStream, target: Cst.Node): Cst.Node { - const args: Cst.Node[] = []; + const items: Cst.Node[] = []; + s.nextWith(TokenKind.OpenParen); - while (true) { - if (s.kind == TokenKind.CloseParen) { - break; - } + + while (s.kind !== TokenKind.CloseParen) { // separator - if (args.length > 0) { + if (items.length > 0) { if (s.kind === TokenKind.Comma) { s.next(); } else if (!s.token.spaceSkipped) { - throw new AiScriptSyntaxError('separator required'); + throw new AiScriptSyntaxError('separator token expected'); } } - args.push(parseExpr(s)); + + items.push(parseExpr(s)); } + s.nextWith(TokenKind.CloseParen); return NODE('call', { target, - args, + args: items, }); } @@ -291,17 +292,14 @@ export function parseIf(s: ITokenStream): Cst.Node { /** * ```abnf - * FnExpr = "@(" Params ")" [":" Type] Block + * FnExpr = "@" Params [":" Type] Block * ``` */ export function parseFnExpr(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.At); - s.nextWith(TokenKind.OpenParen); const params = parseParams(s); - s.nextWith(TokenKind.CloseParen); - // type const body = parseBlock(s); diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index 4b7c9975..82edf2be 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -99,7 +99,7 @@ export function parseVarDef(s: ITokenStream): Cst.Node { /** * ```abnf - * FnDef = "@" IDENT "(" Params ")" [":" Type] Block + * FnDef = "@" IDENT Params [":" Type] Block * ``` */ export function parseFnDef(s: ITokenStream): Cst.Node { @@ -109,12 +109,8 @@ export function parseFnDef(s: ITokenStream): Cst.Node { const name = s.token.value; s.next(); - s.nextWith(TokenKind.OpenParen); - const params = parseParams(s); - s.nextWith(TokenKind.CloseParen); - // type const body = parseBlock(s); From 622a647fb0b52bc4abed28477e22fdf05e458d87 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 01:36:13 +0900 Subject: [PATCH 071/126] fix bug --- src/parser/syntaxes/expressions.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index cb872080..ad261ad5 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -157,7 +157,7 @@ function parseAtom(s: ITokenStream): Cst.Node { case TokenKind.IfKeyword: { return parseIf(s); } - case TokenKind.OpenAtParen: { + case TokenKind.At: { return parseFnExpr(s); } case TokenKind.MatchKeyword: { From 5ad28432d6e9f3330938a721645623ce42e78fd8 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 01:36:28 +0900 Subject: [PATCH 072/126] refactor --- src/parser/syntaxes/statements.ts | 2 +- src/parser/token.ts | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index 82edf2be..0ecacb93 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -14,7 +14,7 @@ import type { ITokenStream } from '../streams/token-stream.js'; * ``` */ export function parseStatement(s: ITokenStream): Cst.Node { - switch (s.token.kind) { + switch (s.kind) { case TokenKind.VarKeyword: case TokenKind.LetKeyword: { return parseVarDef(s); diff --git a/src/parser/token.ts b/src/parser/token.ts index e03a01d2..787270ab 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -88,8 +88,6 @@ export enum TokenKind { GtEq, /** "@" */ At, - /** "@(" */ - OpenAtParen, /** "[" */ OpenBracket, /** "]" */ From d2210429a54c1eddaddab14b596785e5e63b0ada Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 01:44:49 +0900 Subject: [PATCH 073/126] refactor --- src/parser/syntaxes/common.ts | 2 +- src/parser/syntaxes/statements.ts | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/parser/syntaxes/common.ts b/src/parser/syntaxes/common.ts index b4cef32b..785938f3 100644 --- a/src/parser/syntaxes/common.ts +++ b/src/parser/syntaxes/common.ts @@ -1,6 +1,6 @@ import { TokenKind } from '../token.js'; -import { parseStatement } from './statements.js'; import { AiScriptSyntaxError } from '../../error.js'; +import { parseStatement } from './statements.js'; import type { ITokenStream } from '../streams/token-stream.js'; import type * as Cst from '../node.js'; diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index 0ecacb93..3e2ad93e 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -117,7 +117,11 @@ export function parseFnDef(s: ITokenStream): Cst.Node { return NODE('def', { name, - expr: NODE('fn', { args: params ?? [], retType: undefined, children: body ?? [] }), + expr: NODE('fn', { + args: params, + retType: undefined, // TODO: type + children: body, + }), mut: false, attr: [], }); From dd52a046c2b4effe98a481756a56d3c11a28ff50 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 02:02:24 +0900 Subject: [PATCH 074/126] parser: fix sign --- src/parser/syntaxes/expressions.ts | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index ad261ad5..be5e025b 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -53,10 +53,24 @@ function parsePrefix(s: ITokenStream, minBp: number): Cst.Node { switch (op) { case TokenKind.Plus: { - return NODE('plus', { expr }); + // 数値リテラル以外は非サポート + if (expr.type === 'num') { + return expr; + } else { + throw new AiScriptSyntaxError('currently, sign is only supported for number literal.'); + } + // TODO: 将来的にサポートされる式を拡張 + // return NODE('plus', { expr }); } case TokenKind.Minus: { - return NODE('minus', { expr }); + // 数値リテラル以外は非サポート + if (expr.type == 'num') { + return NODE('num', { value: -1 * expr.value }); + } else { + throw new AiScriptSyntaxError('currently, sign is only supported for number literal.'); + } + // TODO: 将来的にサポートされる式を拡張 + // return NODE('minus', { expr }); } case TokenKind.Not: { return NODE('not', { expr }); From ab3739646ac83cb099d6c5e15487f1b7ab58e634 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 02:03:47 +0900 Subject: [PATCH 075/126] lint --- src/parser/syntaxes/expressions.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index be5e025b..48b24788 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -64,7 +64,7 @@ function parsePrefix(s: ITokenStream, minBp: number): Cst.Node { } case TokenKind.Minus: { // 数値リテラル以外は非サポート - if (expr.type == 'num') { + if (expr.type === 'num') { return NODE('num', { value: -1 * expr.value }); } else { throw new AiScriptSyntaxError('currently, sign is only supported for number literal.'); From 211d74d52404bdbbb9ddb9592c6728c222ebb2be Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 02:14:50 +0900 Subject: [PATCH 076/126] parser: mod op --- src/parser/syntaxes/expressions.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 48b24788..bbddc91a 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -29,6 +29,7 @@ const operators: OpInfo[] = [ { opKind: 'infix', kind: TokenKind.Asterisk, lbp: 12, rbp: 13 }, { opKind: 'infix', kind: TokenKind.Slash, lbp: 12, rbp: 13 }, + { opKind: 'infix', kind: TokenKind.Percent, lbp: 12, rbp: 13 }, { opKind: 'infix', kind: TokenKind.Plus, lbp: 10, rbp: 11 }, { opKind: 'infix', kind: TokenKind.Minus, lbp: 10, rbp: 11 }, @@ -107,6 +108,9 @@ function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { case TokenKind.Slash: { return CALL_NODE('Core:div', [left, right]); } + case TokenKind.Percent: { + return CALL_NODE('Core:mod', [left, right]); + } case TokenKind.Plus: { return CALL_NODE('Core:add', [left, right]); } From f085de19168c1f4f60771c52c4dae381ffbbe251 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 02:16:34 +0900 Subject: [PATCH 077/126] remove infix-to-fncall plugin --- src/parser/index.ts | 2 - src/parser/plugins/infix-to-fncall.ts | 146 -------------------------- 2 files changed, 148 deletions(-) delete mode 100644 src/parser/plugins/infix-to-fncall.ts diff --git a/src/parser/index.ts b/src/parser/index.ts index d48c2b46..f0fe9393 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -5,7 +5,6 @@ import { validateKeyword } from './plugins/validate-keyword.js'; import { validateType } from './plugins/validate-type.js'; import { setAttribute } from './plugins/set-attribute.js'; import { transformChain } from './plugins/transform-chain.js'; -import { infixToFnCall } from './plugins/infix-to-fncall.js'; import type * as Cst from './node.js'; import type * as Ast from '../node.js'; @@ -28,7 +27,6 @@ export class Parser { transform: [ setAttribute, transformChain, - infixToFnCall, ], }; } diff --git a/src/parser/plugins/infix-to-fncall.ts b/src/parser/plugins/infix-to-fncall.ts deleted file mode 100644 index 7d551320..00000000 --- a/src/parser/plugins/infix-to-fncall.ts +++ /dev/null @@ -1,146 +0,0 @@ -import { visitNode } from '../visit.js'; -import { AiScriptSyntaxError } from '../../error.js'; -import type * as Cst from '../node.js'; - -/** - * 中置演算子式を表す木 - * 1 + 3 ならば次のようなイメージ - * ``` - * (+) - * (1) (3) - * ``` - */ -type InfixTree = { - type: 'infixTree'; - left: InfixTree | Cst.Node; - right: InfixTree | Cst.Node; - info: { - priority: number; // 優先度(高いほど優先して計算される値) - } & ({ - func: string; // 対応する関数名 - mapFn?: undefined; - } | { - func?: undefined; - mapFn: ((infix: InfixTree) => Cst.Node); //Nodeへ変換する関数 - }) -}; - -function INFIX_TREE(left: InfixTree | Cst.Node, right: InfixTree | Cst.Node, info: InfixTree['info']): InfixTree { - return { type: 'infixTree', left, right, info }; -} - -/** - * 現在の中置演算子式を表す木に新たな演算子と項を追加した木を構築する - * - * - 新しい演算子の優先度が現在見ている木の演算子の優先度 **以下** である場合は、現在見ている木は新しい演算子の左側の子になる。 - * 1 + 3 - 4 = (1 + 3) - 4 ならば - * ``` - * (-) - * (+) (4) - * (1) (3) - * ``` - * - * - 新しい演算子の優先度が現在見ている木の演算子の優先度 **より大きい** 場合は、右側の子と結合する。 - * 1 + 3 * 4 = 1 + (3 * 4) ならば - * ``` - * (+) - * (1) (*) - * (3) (4) - * ``` - * - * - TODO: 左結合性の場合しか考えていない(結合性によって優先度が同じ場合の振る舞いが変わりそう) - * - NOTE: 右結合性の演算子としては代入演算子などが挙げられる - * - NOTE: 比較の演算子などは非結合性とされる - */ -function insertTree(currTree: InfixTree | Cst.Node, nextTree: InfixTree | Cst.Node, nextOpInfo: InfixTree['info']): InfixTree { - if (currTree.type !== 'infixTree') { - return INFIX_TREE(currTree, nextTree, nextOpInfo); - } - - if (nextOpInfo.priority <= currTree.info.priority) { - return INFIX_TREE(currTree, nextTree, nextOpInfo); - } else { - const { left, right, info: currInfo } = currTree; - return INFIX_TREE(left, insertTree(right, nextTree, nextOpInfo), currInfo); - } -} - -/** - * 中置演算子式を表す木を対応する関数呼び出しの構造体に変換する - */ -function treeToNode(tree: InfixTree | Cst.Node): Cst.Node { - if (tree.type !== 'infixTree') { - return tree; - } - - if (tree.info.mapFn) { - return tree.info.mapFn(tree); - } else { - return { - type: 'call', - target: { type: 'identifier', name: tree.info.func }, - args: [treeToNode(tree.left), treeToNode(tree.right)], - } as Cst.Call; - } -} - -const infoTable: Record = { - '*': { func: 'Core:mul', priority: 7 }, - '^': { func: 'Core:pow', priority: 7 }, - '/': { func: 'Core:div', priority: 7 }, - '%': { func: 'Core:mod', priority: 7 }, - '+': { func: 'Core:add', priority: 6 }, - '-': { func: 'Core:sub', priority: 6 }, - '==': { func: 'Core:eq', priority: 4 }, - '!=': { func: 'Core:neq', priority: 4 }, - '<': { func: 'Core:lt', priority: 4 }, - '>': { func: 'Core:gt', priority: 4 }, - '<=': { func: 'Core:lteq', priority: 4 }, - '>=': { func: 'Core:gteq', priority: 4 }, - '&&': { - mapFn: infix => ({ - type: 'and', - left: treeToNode(infix.left), - right: treeToNode(infix.right), - }) as Cst.And, - priority: 3, - }, - '||': { - mapFn: infix => ({ - type: 'or', - left: treeToNode(infix.left), - right: treeToNode(infix.right), - }) as Cst.Or, - priority: 3, - }, -}; - -/** - * NInfix を関数呼び出し形式に変換する - */ -function transform(node: Cst.Infix): Cst.Node { - const infos = node.operators.map(op => { - const info = infoTable[op]; - if (info == null) { - throw new AiScriptSyntaxError(`No such operator: ${op}.`); - } - return info; - }); - let currTree = INFIX_TREE(node.operands[0]!, node.operands[1]!, infos[0]!); - for (let i = 0; i < infos.length - 1; i++) { - currTree = insertTree(currTree, node.operands[2 + i]!, infos[1 + i]!); - } - return treeToNode(currTree); -} - -export function infixToFnCall(nodes: Cst.Node[]): Cst.Node[] { - for (let i = 0; i < nodes.length; i++) { - nodes[i] = visitNode(nodes[i]!, (node) => { - if (node.type === 'infix') { - return transform(node); - } - return node; - }); - } - return nodes; -} From b209cbba7599537e2badeb06590bf356a02d804d Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 02:53:31 +0900 Subject: [PATCH 078/126] parser: attr wip --- src/parser/syntaxes/statements.ts | 48 ++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index 3e2ad93e..4466cf84 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -31,7 +31,9 @@ export function parseStatement(s: ITokenStream): Cst.Node { case TokenKind.ReturnKeyword: { return parseReturn(s); } - // Attr + case TokenKind.OpenSharpBracket: { + return parseStatementWithAttr(s); + } case TokenKind.EachKeyword: { return parseEach(s); } @@ -251,6 +253,50 @@ export function parseReturn(s: ITokenStream): Cst.Node { return NODE('return', { expr }); } +/** + * ```abnf + * StatementWithAttr = *Attr Statement + * ``` +*/ +export function parseStatementWithAttr(s: ITokenStream): Cst.Node { + const attrs: Cst.Attribute[] = []; + while (s.kind === TokenKind.OpenSharpBracket) { + attrs.push(parseAttr(s) as Cst.Attribute); + } + + const statement = parseStatement(s); + + if (statement.type !== 'def') { + throw new AiScriptSyntaxError('invalid attribute.'); + } + if (statement.attr != null) { + statement.attr.push(...attrs); + } else { + statement.attr = attrs; + } + + return statement; +} + +/** + * ```abnf + * Attr = "#[" IDENT [StaticLiteral] "]" + * ``` +*/ +function parseAttr(s: ITokenStream): Cst.Node { + s.nextWith(TokenKind.OpenSharpBracket); + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + // TODO: value + + s.nextWith(TokenKind.CloseBracket); + + return NODE('attr', { name, value: undefined }); +} + /** * ```abnf * Loop = "loop" Block From a49e098700c9aba623b1555b8bed03ec731924de Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 02:56:03 +0900 Subject: [PATCH 079/126] remove set-attribute plugin --- src/parser/index.ts | 2 -- src/parser/plugins/set-attribute.ts | 48 ----------------------------- 2 files changed, 50 deletions(-) delete mode 100644 src/parser/plugins/set-attribute.ts diff --git a/src/parser/index.ts b/src/parser/index.ts index f0fe9393..6af19b73 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -3,7 +3,6 @@ import { parseTopLevel } from './syntaxes/toplevel.js'; import { validateKeyword } from './plugins/validate-keyword.js'; import { validateType } from './plugins/validate-type.js'; -import { setAttribute } from './plugins/set-attribute.js'; import { transformChain } from './plugins/transform-chain.js'; import type * as Cst from './node.js'; import type * as Ast from '../node.js'; @@ -25,7 +24,6 @@ export class Parser { validateType, ], transform: [ - setAttribute, transformChain, ], }; diff --git a/src/parser/plugins/set-attribute.ts b/src/parser/plugins/set-attribute.ts deleted file mode 100644 index a19e754e..00000000 --- a/src/parser/plugins/set-attribute.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { AiScriptSyntaxError } from '../../error.js'; -import type * as Cst from '../node.js'; - -export function setAttribute(node: Cst.Expression[]): Cst.Expression[] -export function setAttribute(node: Cst.Statement[]): Cst.Statement[] -export function setAttribute(node: (Cst.Statement | Cst.Expression)[]): (Cst.Statement | Cst.Expression)[] -export function setAttribute(node: Cst.Node[]): Cst.Node[] -export function setAttribute(nodes: Cst.Node[]): Cst.Node[] { - const result: Cst.Node[] = []; - const stockedAttrs: Cst.Attribute[] = []; - - for (const node of nodes) { - if (node.type === 'attr') { - stockedAttrs.push(node); - } else if (node.type === 'def') { - if (node.attr == null) { - node.attr = []; - } - node.attr.push(...stockedAttrs); - // clear all - stockedAttrs.splice(0, stockedAttrs.length); - if (node.expr.type === 'fn') { - node.expr.children = setAttribute(node.expr.children); - } - result.push(node); - } else { - if (stockedAttrs.length > 0) { - throw new AiScriptSyntaxError('invalid attribute.'); - } - switch (node.type) { - case 'fn': { - node.children = setAttribute(node.children); - break; - } - case 'block': { - node.statements = setAttribute(node.statements); - break; - } - } - result.push(node); - } - } - if (stockedAttrs.length > 0) { - throw new AiScriptSyntaxError('invalid attribute.'); - } - - return result; -} From 66acc41116624d008ab1339bd405d746c6fbc085 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 02:59:57 +0900 Subject: [PATCH 080/126] remove transform-chain plugin --- src/parser/index.ts | 2 -- src/parser/plugins/transform-chain.ts | 39 --------------------------- 2 files changed, 41 deletions(-) delete mode 100644 src/parser/plugins/transform-chain.ts diff --git a/src/parser/index.ts b/src/parser/index.ts index 6af19b73..ff44b753 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -3,7 +3,6 @@ import { parseTopLevel } from './syntaxes/toplevel.js'; import { validateKeyword } from './plugins/validate-keyword.js'; import { validateType } from './plugins/validate-type.js'; -import { transformChain } from './plugins/transform-chain.js'; import type * as Cst from './node.js'; import type * as Ast from '../node.js'; @@ -24,7 +23,6 @@ export class Parser { validateType, ], transform: [ - transformChain, ], }; } diff --git a/src/parser/plugins/transform-chain.ts b/src/parser/plugins/transform-chain.ts deleted file mode 100644 index 528b9258..00000000 --- a/src/parser/plugins/transform-chain.ts +++ /dev/null @@ -1,39 +0,0 @@ -import * as Cst from '../node.js'; -import { visitNode } from '../visit.js'; - -function transformNode(node: Cst.Node): Cst.Node { - // chain - if (Cst.isExpression(node) && Cst.hasChainProp(node) && node.chain != null) { - const { chain, ...hostNode } = node; - let parent: Cst.Expression = hostNode; - for (const item of chain) { - switch (item.type) { - case 'callChain': { - parent = Cst.CALL(parent, item.args, item.loc); - break; - } - case 'indexChain': { - parent = Cst.INDEX(parent, item.index, item.loc); - break; - } - case 'propChain': { - parent = Cst.PROP(parent, item.name, item.loc); - break; - } - default: { - break; - } - } - } - return parent; - } - - return node; -} - -export function transformChain(nodes: Cst.Node[]): Cst.Node[] { - for (let i = 0; i < nodes.length; i++) { - nodes[i] = visitNode(nodes[i]!, transformNode); - } - return nodes; -} From 9d4d1932d9625911124ba023952625b093c7deef Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 09:13:17 +0900 Subject: [PATCH 081/126] refactor --- src/parser/scanner.ts | 2 +- src/parser/syntaxes/common.ts | 16 +++++++ src/parser/syntaxes/expressions.ts | 18 ++++---- src/parser/syntaxes/statements.ts | 67 ++++++++++++++++++------------ src/parser/syntaxes/toplevel.ts | 30 +++---------- 5 files changed, 73 insertions(+), 60 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 9d207d8c..01f40eaa 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -72,7 +72,7 @@ export class Scanner implements ITokenStream { public expect(kind: TokenKind): void { if (this.kind !== kind) { - throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.token.kind]}`); + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.kind]}`); } } diff --git a/src/parser/syntaxes/common.ts b/src/parser/syntaxes/common.ts index 785938f3..3b56c2f3 100644 --- a/src/parser/syntaxes/common.ts +++ b/src/parser/syntaxes/common.ts @@ -53,6 +53,22 @@ export function parseBlock(s: ITokenStream): Cst.Node[] { return steps; } +//#region Static Literal + +export function parseStaticLiteral(s: ITokenStream): Cst.Node { + throw new Error('todo'); +} + +export function parseStaticArray(s: ITokenStream): Cst.Node { + throw new Error('todo'); +} + +export function parseStaticObject(s: ITokenStream): Cst.Node { + throw new Error('todo'); +} + +//#endregion Static Literal + //#region Type export function parseType(s: ITokenStream): Cst.Node { diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index bbddc91a..9b65aad7 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -247,7 +247,7 @@ function parseAtom(s: ITokenStream): Cst.Node { return parseReference(s); } default: { - throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[s.token.kind]}`); + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[s.kind]}`); } } } @@ -286,7 +286,7 @@ function parseCall(s: ITokenStream, target: Cst.Node): Cst.Node { * If = "if" Expr BlockOrStatement *("elif" Expr BlockOrStatement) ["else" BlockOrStatement] * ``` */ -export function parseIf(s: ITokenStream): Cst.Node { +function parseIf(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.IfKeyword); const cond = parseExpr(s); const then = parseBlockOrStatement(s); @@ -313,7 +313,7 @@ export function parseIf(s: ITokenStream): Cst.Node { * FnExpr = "@" Params [":" Type] Block * ``` */ -export function parseFnExpr(s: ITokenStream): Cst.Node { +function parseFnExpr(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.At); const params = parseParams(s); @@ -325,7 +325,7 @@ export function parseFnExpr(s: ITokenStream): Cst.Node { return NODE('fn', { args: params ?? [], retType: undefined, children: body ?? [] }); } -export function parseMatch(s: ITokenStream): Cst.Node { +function parseMatch(s: ITokenStream): Cst.Node { throw new Error('todo'); } @@ -334,7 +334,7 @@ export function parseMatch(s: ITokenStream): Cst.Node { * Eval = "eval" Block * ``` */ -export function parseEval(s: ITokenStream): Cst.Node { +function parseEval(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.EvalKeyword); const statements = parseBlock(s); return NODE('block', { statements }); @@ -345,7 +345,7 @@ export function parseEval(s: ITokenStream): Cst.Node { * Exists = "exists" Reference * ``` */ -export function parseExists(s: ITokenStream): Cst.Node { +function parseExists(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.ExistsKeyword); const identifier = parseReference(s); return NODE('exists', { identifier }); @@ -356,7 +356,7 @@ export function parseExists(s: ITokenStream): Cst.Node { * Reference = IDENT *(":" IDENT) * ``` */ -export function parseReference(s: ITokenStream): Cst.Node { +function parseReference(s: ITokenStream): Cst.Node { const segs: string[] = []; while (true) { if (segs.length > 0) { @@ -373,7 +373,7 @@ export function parseReference(s: ITokenStream): Cst.Node { return NODE('identifier', { name: segs.join(':') }); } -export function parseObject(s: ITokenStream): Cst.Node { +function parseObject(s: ITokenStream): Cst.Node { throw new Error('todo'); } @@ -382,7 +382,7 @@ export function parseObject(s: ITokenStream): Cst.Node { * Array = "[" *(Expr [","]) "]" * ``` */ -export function parseArray(s: ITokenStream): Cst.Node { +function parseArray(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.OpenBracket); const value = []; diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index 4466cf84..bcd187db 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -60,14 +60,43 @@ export function parseStatement(s: ITokenStream): Cst.Node { return expr; } +export function parseDefStatement(s: ITokenStream) { + switch (s.kind) { + case TokenKind.VarKeyword: + case TokenKind.LetKeyword: { + return parseVarDef(s); + } + case TokenKind.At: { + return parseFnDef(s); + } + default: { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[s.kind]}`); + } + } +} + +/** + * ```abnf + * BlockOrStatement = Block / Statement + * ``` +*/ +export function parseBlockOrStatement(s: ITokenStream): Cst.Node { + if (s.kind === TokenKind.OpenBrace) { + const statements = parseBlock(s); + return NODE('block', { statements }); + } else { + return parseStatement(s); + } +} + /** * ```abnf * VarDef = ("let" / "var") IDENT [":" Type] "=" Expr * ``` */ -export function parseVarDef(s: ITokenStream): Cst.Node { +function parseVarDef(s: ITokenStream): Cst.Node { let mut; - switch (s.token.kind) { + switch (s.kind) { case TokenKind.LetKeyword: { mut = false; break; @@ -77,7 +106,7 @@ export function parseVarDef(s: ITokenStream): Cst.Node { break; } default: { - throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[s.token.kind]}`); + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[s.kind]}`); } } s.next(); @@ -87,7 +116,7 @@ export function parseVarDef(s: ITokenStream): Cst.Node { s.next(); let ty; - if (s.kind === TokenKind.Colon) { + if ((s.kind as TokenKind) === TokenKind.Colon) { s.next(); ty = parseType(s); } @@ -104,7 +133,7 @@ export function parseVarDef(s: ITokenStream): Cst.Node { * FnDef = "@" IDENT Params [":" Type] Block * ``` */ -export function parseFnDef(s: ITokenStream): Cst.Node { +function parseFnDef(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.At); s.expect(TokenKind.Identifier); @@ -134,7 +163,7 @@ export function parseFnDef(s: ITokenStream): Cst.Node { * Out = "<:" Expr * ``` */ -export function parseOut(s: ITokenStream): Cst.Node { +function parseOut(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.Out); const expr = parseExpr(s); return NODE('identifier', { @@ -149,7 +178,7 @@ export function parseOut(s: ITokenStream): Cst.Node { * / "each" "(" "let" IDENT [","] Expr ")" BlockOrStatement * ``` */ -export function parseEach(s: ITokenStream): Cst.Node { +function parseEach(s: ITokenStream): Cst.Node { let hasParen = false; s.nextWith(TokenKind.EachKeyword); @@ -184,7 +213,7 @@ export function parseEach(s: ITokenStream): Cst.Node { }); } -export function parseFor(s: ITokenStream): Cst.Node { +function parseFor(s: ITokenStream): Cst.Node { let hasParen = false; s.nextWith(TokenKind.ForKeyword); @@ -247,7 +276,7 @@ export function parseFor(s: ITokenStream): Cst.Node { * Return = "return" Expr * ``` */ -export function parseReturn(s: ITokenStream): Cst.Node { +function parseReturn(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.ReturnKeyword); const expr = parseExpr(s); return NODE('return', { expr }); @@ -258,7 +287,7 @@ export function parseReturn(s: ITokenStream): Cst.Node { * StatementWithAttr = *Attr Statement * ``` */ -export function parseStatementWithAttr(s: ITokenStream): Cst.Node { +function parseStatementWithAttr(s: ITokenStream): Cst.Node { const attrs: Cst.Attribute[] = []; while (s.kind === TokenKind.OpenSharpBracket) { attrs.push(parseAttr(s) as Cst.Attribute); @@ -302,7 +331,7 @@ function parseAttr(s: ITokenStream): Cst.Node { * Loop = "loop" Block * ``` */ -export function parseLoop(s: ITokenStream): Cst.Node { +function parseLoop(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.LoopKeyword); const statements = parseBlock(s); return NODE('loop', { statements }); @@ -313,7 +342,7 @@ export function parseLoop(s: ITokenStream): Cst.Node { * Assign = Expr ("=" / "+=" / "-=") Expr * ``` */ -export function tryParseAssign(s: ITokenStream, dest: Cst.Node): Cst.Node | undefined { +function tryParseAssign(s: ITokenStream, dest: Cst.Node): Cst.Node | undefined { // Assign switch (s.kind) { case TokenKind.Eq: { @@ -336,17 +365,3 @@ export function tryParseAssign(s: ITokenStream, dest: Cst.Node): Cst.Node | unde } } } - -/** - * ```abnf - * BlockOrStatement = Block / Statement - * ``` -*/ -export function parseBlockOrStatement(s: ITokenStream): Cst.Node { - if (s.kind === TokenKind.OpenBrace) { - const statements = parseBlock(s); - return NODE('block', { statements }); - } else { - return parseStatement(s); - } -} diff --git a/src/parser/syntaxes/toplevel.ts b/src/parser/syntaxes/toplevel.ts index d22ecda2..68706206 100644 --- a/src/parser/syntaxes/toplevel.ts +++ b/src/parser/syntaxes/toplevel.ts @@ -1,6 +1,7 @@ import { NODE } from '../node.js'; import { TokenKind } from '../token.js'; -import { parseFnDef, parseStatement, parseVarDef } from './statements.js'; +import { parseDefStatement, parseStatement } from './statements.js'; +import { parseStaticLiteral } from './common.js'; import type * as Cst from '../node.js'; import type { ITokenStream } from '../streams/token-stream.js'; @@ -14,7 +15,7 @@ export function parseTopLevel(s: ITokenStream): Cst.Node[] { const nodes: Cst.Node[] = []; while (s.kind !== TokenKind.EOF) { - switch (s.token.kind) { + switch (s.kind) { case TokenKind.Colon2: { nodes.push(parseNamespace(s)); break; @@ -48,14 +49,11 @@ export function parseNamespace(s: ITokenStream): Cst.Node { const members: Cst.Node[] = []; s.nextWith(TokenKind.OpenBrace); while (s.kind !== TokenKind.CloseBrace) { - switch (s.token.kind) { + switch (s.kind) { case TokenKind.VarKeyword: - case TokenKind.LetKeyword: { - members.push(parseVarDef(s)); - break; - } + case TokenKind.LetKeyword: case TokenKind.At: { - members.push(parseFnDef(s)); + members.push(parseDefStatement(s)); break; } case TokenKind.Colon2: { @@ -87,19 +85,3 @@ export function parseMeta(s: ITokenStream): Cst.Node { return NODE('meta', { name, value }); } - -//#region Static Literal - -export function parseStaticLiteral(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - -export function parseStaticArray(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - -export function parseStaticObject(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - -//#endregion Static Literal From 09b0abd0e5edd168cafe4d5676d29d6fabc5a2bd Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 11:14:27 +0900 Subject: [PATCH 082/126] fix out --- src/parser/syntaxes/statements.ts | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index bcd187db..496672b1 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -1,5 +1,5 @@ import { AiScriptSyntaxError } from '../../error.js'; -import { NODE } from '../node.js'; +import { CALL_NODE, NODE } from '../node.js'; import { TokenKind } from '../token.js'; import { parseBlock, parseParams, parseType } from './common.js'; import { parseExpr } from './expressions.js'; @@ -166,10 +166,7 @@ function parseFnDef(s: ITokenStream): Cst.Node { function parseOut(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.Out); const expr = parseExpr(s); - return NODE('identifier', { - name: 'print', - chain: [NODE('callChain', { args: [expr] })], - }); + return CALL_NODE('print', [expr]); } /** From 9b042443a77d41eec70b51d7f86e15c08f9fa4d3 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 11:20:21 +0900 Subject: [PATCH 083/126] parser: separators --- src/parser/syntaxes/common.ts | 2 +- src/parser/syntaxes/expressions.ts | 51 +++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/src/parser/syntaxes/common.ts b/src/parser/syntaxes/common.ts index 3b56c2f3..117bb278 100644 --- a/src/parser/syntaxes/common.ts +++ b/src/parser/syntaxes/common.ts @@ -7,7 +7,7 @@ import type * as Cst from '../node.js'; /** * ```abnf - * Params = "(" [IDENT *(SEP IDENT)] ")" + * Params = "(" [IDENT *(("," / SPACE) IDENT)] ")" * ``` */ export function parseParams(s: ITokenStream): { name: string }[] { diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 9b65aad7..f3441e12 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -253,7 +253,7 @@ function parseAtom(s: ITokenStream): Cst.Node { } /** - * Call = "(" [Expr *(SEP Expr)] ")" + * Call = "(" [Expr *(("," / SPACE) Expr)] ")" */ function parseCall(s: ITokenStream, target: Cst.Node): Cst.Node { const items: Cst.Node[] = []; @@ -373,13 +373,48 @@ function parseReference(s: ITokenStream): Cst.Node { return NODE('identifier', { name: segs.join(':') }); } +/** + * ```abnf + * Object = "{" [IDENT ":" Expr *(("," / ";" / SPACE) IDENT ":" Expr) ["," / ";"]] "}" + * ``` +*/ function parseObject(s: ITokenStream): Cst.Node { - throw new Error('todo'); + s.nextWith(TokenKind.OpenBrace); + + const map = new Map(); + while (s.kind !== TokenKind.CloseBrace) { + s.expect(TokenKind.Identifier); + const k = s.token.value!; + s.next(); + + s.nextWith(TokenKind.Colon); + + const v = parseExpr(s); + + map.set(k, v); + + // separator + if ((s.kind as TokenKind) === TokenKind.CloseBrace) { + break; + } else if (s.kind === TokenKind.Comma) { + s.next(); + } else if (s.kind === TokenKind.SemiColon) { + s.next(); + } else { + if (!s.token.spaceSkipped) { + throw new AiScriptSyntaxError('separator token expected'); + } + } + } + + s.nextWith(TokenKind.CloseBrace); + + return NODE('obj', { value: map }); } /** * ```abnf - * Array = "[" *(Expr [","]) "]" + * Array = "[" [Expr *(("," / SPACE) Expr) [","]] "]" * ``` */ function parseArray(s: ITokenStream): Cst.Node { @@ -388,8 +423,16 @@ function parseArray(s: ITokenStream): Cst.Node { const value = []; while (s.kind !== TokenKind.CloseBracket) { value.push(parseExpr(s)); - if (s.kind === TokenKind.Comma) { + + // separator + if ((s.kind as TokenKind) === TokenKind.CloseBracket) { + break; + } else if (s.kind === TokenKind.Comma) { s.next(); + } else { + if (!s.token.spaceSkipped) { + throw new AiScriptSyntaxError('separator token expected'); + } } } From 3c812c868b8b5495be9ff29890f4a0ec167a63a7 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 21:19:20 +0900 Subject: [PATCH 084/126] update test --- test/index.ts | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/test/index.ts b/test/index.ts index f7367d3e..45cf2f8c 100644 --- a/test/index.ts +++ b/test/index.ts @@ -299,8 +299,8 @@ describe('Infix expression', () => { test.concurrent('syntax symbols vs infix operators', async () => { const res = await exe(` <: match true { - 1 == 1 => "true" - 1 < 1 => "false" + case 1 == 1 => "true" + case 1 < 1 => "false" } `); eq(res, STR('true')); @@ -313,8 +313,8 @@ describe('Infix expression', () => { test.concurrent('number + match expression', async () => { const res = await exe(` <: 1 + match 2 == 2 { - true => 3 - false => 4 + case true => 3 + case false => 4 } `); eq(res, NUM(4)); @@ -1494,9 +1494,9 @@ describe('match', () => { test.concurrent('Basic', async () => { const res = await exe(` <: match 2 { - 1 => "a" - 2 => "b" - 3 => "c" + case 1 => "a" + case 2 => "b" + case 3 => "c" } `); eq(res, STR('b')); @@ -1505,9 +1505,9 @@ describe('match', () => { test.concurrent('When default not provided, returns null', async () => { const res = await exe(` <: match 42 { - 1 => "a" - 2 => "b" - 3 => "c" + case 1 => "a" + case 2 => "b" + case 3 => "c" } `); eq(res, NULL); @@ -1516,10 +1516,10 @@ describe('match', () => { test.concurrent('With default', async () => { const res = await exe(` <: match 42 { - 1 => "a" - 2 => "b" - 3 => "c" - * => "d" + case 1 => "a" + case 2 => "b" + case 3 => "c" + default => "d" } `); eq(res, STR('d')); @@ -1528,13 +1528,13 @@ describe('match', () => { test.concurrent('With block', async () => { const res = await exe(` <: match 2 { - 1 => 1 - 2 => { + case 1 => 1 + case 2 => { let a = 1 let b = 2 (a + b) } - 3 => 3 + case 3 => 3 } `); eq(res, NUM(3)); @@ -1544,7 +1544,7 @@ describe('match', () => { const res = await exe(` @f(x) { match x { - 1 => { + case 1 => { return "ai" } } From 7141ff00a918527ec5ed46e25a0e021f6ad38da5 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 21:21:00 +0900 Subject: [PATCH 085/126] lint --- src/parser/syntaxes/statements.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index 496672b1..0b8cb5bb 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -60,7 +60,7 @@ export function parseStatement(s: ITokenStream): Cst.Node { return expr; } -export function parseDefStatement(s: ITokenStream) { +export function parseDefStatement(s: ITokenStream): Cst.Node { switch (s.kind) { case TokenKind.VarKeyword: case TokenKind.LetKeyword: { From b9d733e3624dee3449d0c8a3d38318b3e6e79da9 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 21:25:11 +0900 Subject: [PATCH 086/126] parser: match --- src/parser/plugins/validate-keyword.ts | 2 ++ src/parser/scanner.ts | 6 +++++ src/parser/syntaxes/expressions.ts | 32 ++++++++++++++++++++++++-- src/parser/token.ts | 2 ++ 4 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/parser/plugins/validate-keyword.ts b/src/parser/plugins/validate-keyword.ts index 3e9af586..de80d221 100644 --- a/src/parser/plugins/validate-keyword.ts +++ b/src/parser/plugins/validate-keyword.ts @@ -12,6 +12,8 @@ const reservedWord = [ 'break', 'continue', 'match', + 'case', + 'default', 'if', 'elif', 'else', diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 01f40eaa..fa27d230 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -351,6 +351,12 @@ export class Scanner implements ITokenStream { case 'match': { return TOKEN(TokenKind.MatchKeyword, spaceSkipped); } + case 'case': { + return TOKEN(TokenKind.CaseKeyword, spaceSkipped); + } + case 'default': { + return TOKEN(TokenKind.DefaultKeyword, spaceSkipped); + } case 'if': { return TOKEN(TokenKind.IfKeyword, spaceSkipped); } diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index f3441e12..f48e188d 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -322,11 +322,39 @@ function parseFnExpr(s: ITokenStream): Cst.Node { const body = parseBlock(s); - return NODE('fn', { args: params ?? [], retType: undefined, children: body ?? [] }); + return NODE('fn', { args: params, retType: undefined, children: body }); } +/** + * ```abnf + * Match = "match" Expr "{" *("case" Expr "=>" BlockOrStatement) ["default" "=>" BlockOrStatement] "}" + * ``` +*/ function parseMatch(s: ITokenStream): Cst.Node { - throw new Error('todo'); + s.nextWith(TokenKind.MatchKeyword); + const about = parseExpr(s); + + s.nextWith(TokenKind.OpenBrace); + + const qs: { q: Cst.Node, a: Cst.Node }[] = []; + while (s.kind !== TokenKind.DefaultKeyword && s.kind !== TokenKind.CloseBrace) { + s.nextWith(TokenKind.CaseKeyword); + const q = parseExpr(s); + s.nextWith(TokenKind.Arrow); + const a = parseBlockOrStatement(s); + qs.push({ q, a }); + } + + let x; + if (s.kind === TokenKind.DefaultKeyword) { + s.next(); + s.nextWith(TokenKind.Arrow); + x = parseBlockOrStatement(s); + } + + s.nextWith(TokenKind.CloseBrace); + + return NODE('match', { about, qs, default: x }); } /** diff --git a/src/parser/token.ts b/src/parser/token.ts index 787270ab..1f38c9c9 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -21,6 +21,8 @@ export enum TokenKind { BreakKeyword, ContinueKeyword, MatchKeyword, + CaseKeyword, + DefaultKeyword, IfKeyword, ElifKeyword, ElseKeyword, From 54c48921e4a4574ee3c07c0462aa37dde8139fc2 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 30 Sep 2023 21:42:11 +0900 Subject: [PATCH 087/126] clean --- debug.ais | 1 - 1 file changed, 1 deletion(-) delete mode 100644 debug.ais diff --git a/debug.ais b/debug.ais deleted file mode 100644 index 492fc438..00000000 --- a/debug.ais +++ /dev/null @@ -1 +0,0 @@ -var x = 1 From 9986d08e2d6e1badde820fd0ee5369d59116b6f6 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 1 Oct 2023 14:51:07 +0900 Subject: [PATCH 088/126] update CST nodes --- src/parser/node.ts | 91 +++++--------------------- src/parser/plugins/validate-keyword.ts | 3 +- src/parser/visit.ts | 24 ------- 3 files changed, 19 insertions(+), 99 deletions(-) diff --git a/src/parser/node.ts b/src/parser/node.ts index 161abd06..97783536 100644 --- a/src/parser/node.ts +++ b/src/parser/node.ts @@ -6,7 +6,7 @@ * この処理結果がプラグインによって処理されるとASTノードとなります。 */ -export type Node = Namespace | Meta | Statement | Expression | ChainMember | TypeSource; +export type Node = Namespace | Meta | Statement | Expression | Attribute | TypeSource; export function NODE(type: string, params: Record): Node { const node: Record = { type }; @@ -30,7 +30,6 @@ export function CALL_NODE(name: string, args: Node[]): Node { export type Statement = Definition | Return | - Attribute | // AST Each | For | Loop | @@ -48,7 +47,6 @@ export function isStatement(x: Node): x is Statement { } export type Expression = - Infix | Not | And | Or | @@ -65,12 +63,12 @@ export type Expression = Obj | Arr | Identifier | - Call | // IR - Index | // IR - Prop; // IR + Call | + Index | + Prop; const expressionTypes = [ - 'infix', 'if', 'fn', 'match', 'block', 'exists', 'tmpl', 'str', 'num', 'bool', 'null', 'obj', 'arr', 'identifier', 'call', 'index', 'prop', + 'if', 'fn', 'match', 'block', 'exists', 'tmpl', 'str', 'num', 'bool', 'null', 'obj', 'arr', 'identifier', 'call', 'index', 'prop', ]; export function isExpression(x: Node): x is Expression { return expressionTypes.includes(x.type); @@ -102,7 +100,7 @@ export type Definition = NodeBase & { varType?: TypeSource; expr: Expression; mut: boolean; - attr?: Attribute[]; // IR + attr?: Attribute[]; }; export type Attribute = NodeBase & { @@ -163,14 +161,6 @@ export type Assign = NodeBase & { expr: Expression; }; -export type InfixOperator = '||' | '&&' | '==' | '!=' | '<=' | '>=' | '<' | '>' | '+' | '-' | '*' | '^' | '/' | '%'; - -export type Infix = NodeBase & { - type: 'infix'; - operands: Expression[]; - operators: InfixOperator[]; -}; - export type Not = NodeBase & { type: 'not'; expr: Expression; @@ -199,7 +189,7 @@ export type If = NodeBase & { else?: Statement | Expression; }; -export type Fn = NodeBase & ChainProp & { +export type Fn = NodeBase & { type: 'fn'; args: { name: string; @@ -209,7 +199,7 @@ export type Fn = NodeBase & ChainProp & { children: (Statement | Expression)[]; }; -export type Match = NodeBase & ChainProp & { +export type Match = NodeBase & { type: 'match'; about: Expression; qs: { @@ -219,118 +209,73 @@ export type Match = NodeBase & ChainProp & { default?: Statement | Expression; }; -export type Block = NodeBase & ChainProp & { +export type Block = NodeBase & { type: 'block'; statements: (Statement | Expression)[]; }; -export type Exists = NodeBase & ChainProp & { +export type Exists = NodeBase & { type: 'exists'; identifier: Identifier; }; -export type Tmpl = NodeBase & ChainProp & { +export type Tmpl = NodeBase & { type: 'tmpl'; tmpl: (string | Expression)[]; }; -export type Str = NodeBase & ChainProp & { +export type Str = NodeBase & { type: 'str'; value: string; }; -export type Num = NodeBase & ChainProp & { +export type Num = NodeBase & { type: 'num'; value: number; }; -export type Bool = NodeBase & ChainProp & { +export type Bool = NodeBase & { type: 'bool'; value: boolean; }; -export type Null = NodeBase & ChainProp & { +export type Null = NodeBase & { type: 'null'; }; -export type Obj = NodeBase & ChainProp & { +export type Obj = NodeBase & { type: 'obj'; value: Map; }; -export type Arr = NodeBase & ChainProp & { +export type Arr = NodeBase & { type: 'arr'; value: Expression[]; }; -export type Identifier = NodeBase & ChainProp & { +export type Identifier = NodeBase & { type: 'identifier'; name: string; }; -// AST -type ChainProp = { - chain?: ChainMember[]; -}; - -// AST -export function hasChainProp(x: T): x is T & ChainProp { - return 'chain' in x && x.chain !== null; -} - -// AST -export type ChainMember = CallChain | IndexChain | PropChain; - -// AST -export type CallChain = NodeBase & { - type: 'callChain'; - args: Expression[]; -}; - -// AST -export type IndexChain = NodeBase & { - type: 'indexChain'; - index: Expression; -}; - -// AST -export type PropChain = NodeBase & { - type: 'propChain'; - name: string; -}; - -// IR export type Call = NodeBase & { type: 'call'; target: Expression; args: Expression[]; }; -export function CALL(target: Call['target'], args: Call['args'], loc?: { start: number, end: number }): Call { - return { type: 'call', target, args, loc } as Call; -} -// IR export type Index = NodeBase & { type: 'index'; target: Expression; index: Expression; }; -export function INDEX(target: Index['target'], index: Index['index'], loc?: { start: number, end: number }): Index { - return { type: 'index', target, index, loc } as Index; -} - -// IR export type Prop = NodeBase & { type: 'prop'; target: Expression; name: string; }; -export function PROP(target: Prop['target'], name: Prop['name'], loc?: { start: number, end: number }): Prop { - return { type: 'prop', target, name, loc } as Prop; -} - // Type source export type TypeSource = NamedTypeSource | FnTypeSource; diff --git a/src/parser/plugins/validate-keyword.ts b/src/parser/plugins/validate-keyword.ts index de80d221..f2ad4bb5 100644 --- a/src/parser/plugins/validate-keyword.ts +++ b/src/parser/plugins/validate-keyword.ts @@ -53,8 +53,7 @@ function validateNode(node: Cst.Node): Cst.Node { case 'def': case 'attr': case 'ns': - case 'identifier': - case 'propChain': { + case 'identifier': { if (reservedWord.includes(node.name)) { throwReservedWordError(node.name); } diff --git a/src/parser/visit.ts b/src/parser/visit.ts index db617de2..03b6d1e2 100644 --- a/src/parser/visit.ts +++ b/src/parser/visit.ts @@ -44,12 +44,6 @@ export function visitNode(node: Cst.Node, fn: (node: Cst.Node) => Cst.Node): Cst result.dest = visitNode(result.dest, fn) as Cst.Assign['dest']; break; } - case 'infix': { - for (let i = 0; i < result.operands.length; i++) { - result.operands[i] = visitNode(result.operands[i]!, fn) as Cst.Infix['operands'][number]; - } - break; - } case 'not': { result.expr = visitNode(result.expr, fn) as Cst.Return['expr']; break; @@ -114,16 +108,6 @@ export function visitNode(node: Cst.Node, fn: (node: Cst.Node) => Cst.Node): Cst } break; } - case 'callChain': { - for (let i = 0; i < result.args.length; i++) { - result.args[i] = visitNode(result.args[i]!, fn) as Cst.Call['args'][number]; - } - break; - } - case 'indexChain': { - result.index = visitNode(result.index, fn) as Cst.Index['index']; - break; - } case 'call': { result.target = visitNode(result.target, fn) as Cst.Call['target']; for (let i = 0; i < result.args.length; i++) { @@ -155,13 +139,5 @@ export function visitNode(node: Cst.Node, fn: (node: Cst.Node) => Cst.Node): Cst } } - if (Cst.hasChainProp(result)) { - if (result.chain != null) { - for (let i = 0; i < result.chain.length; i++) { - result.chain[i] = visitNode(result.chain[i]!, fn) as Cst.ChainMember; - } - } - } - return result; } From 64d5f349ffa37ae0b84f1516a9fc7d74555079a4 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 1 Oct 2023 15:24:39 +0900 Subject: [PATCH 089/126] update node --- src/node.ts | 105 +++++++++++++++++++++------------------------ src/parser/node.ts | 84 ++++++++++++++++-------------------- 2 files changed, 85 insertions(+), 104 deletions(-) diff --git a/src/node.ts b/src/node.ts index 79153c10..eb97e936 100644 --- a/src/node.ts +++ b/src/node.ts @@ -9,7 +9,25 @@ export type Loc = { end: number; }; -export type Node = Namespace | Meta | Statement | Expression | TypeSource; +export type Node = Namespace | Meta | Statement | Expression | TypeSource | Attribute; + +type NodeBase = { + loc?: Loc; // コード位置 +}; + +export type Namespace = NodeBase & { + type: 'ns'; // 名前空間 + name: string; // 空間名 + members: (Definition | Namespace)[]; // メンバー +}; + +export type Meta = NodeBase & { + type: 'meta'; // メタデータ定義 + name: string | null; // 名 + value: Expression; // 値 +}; + +// statement export type Statement = Definition | @@ -30,53 +48,6 @@ export function isStatement(x: Node): x is Statement { return statementTypes.includes(x.type); } -export type Expression = - If | - Fn | - Match | - Block | - Exists | - Tmpl | - Str | - Num | - Bool | - Null | - Obj | - Arr | - Not | - And | - Or | - Identifier | - Call | - Index | - Prop; - -const expressionTypes = [ - 'if', 'fn', 'match', 'block', 'exists', 'tmpl', 'str', 'num', 'bool', 'null', 'obj', 'arr', 'identifier', 'call', 'index', 'prop', -]; -export function isExpression(x: Node): x is Expression { - return expressionTypes.includes(x.type); -} - -type NodeBase = { - loc?: { // コード位置 - start: number; - end: number; - }; -}; - -export type Namespace = NodeBase & { - type: 'ns'; // 名前空間 - name: string; // 空間名 - members: (Definition | Namespace)[]; // メンバー -}; - -export type Meta = NodeBase & { - type: 'meta'; // メタデータ定義 - name: string | null; // 名 - value: Expression; // 値 -}; - export type Definition = NodeBase & { type: 'def'; // 変数宣言文 name: string; // 変数名 @@ -144,6 +115,36 @@ export type Assign = NodeBase & { expr: Expression; // 式 }; +// expressions + +export type Expression = + If | + Fn | + Match | + Block | + Exists | + Tmpl | + Str | + Num | + Bool | + Null | + Obj | + Arr | + Not | + And | + Or | + Identifier | + Call | + Index | + Prop; + +const expressionTypes = [ + 'if', 'fn', 'match', 'block', 'exists', 'tmpl', 'str', 'num', 'bool', 'null', 'obj', 'arr', 'not', 'and', 'or', 'identifier', 'call', 'index', 'prop', +]; +export function isExpression(x: Node): x is Expression { + return expressionTypes.includes(x.type); +} + export type Not = NodeBase & { type: 'not'; // 否定 expr: Expression; // 式 @@ -241,14 +242,6 @@ export type Identifier = NodeBase & { name: string; // 変数名 }; -// chain node example: -// call > fn -// call > var(fn) -// index > arr -// index > var(arr) -// prop > prop(obj) > var(obj) -// call > prop(fn) > obj - export type Call = NodeBase & { type: 'call'; // 関数呼び出し target: Expression; // 対象 diff --git a/src/parser/node.ts b/src/parser/node.ts index 97783536..941f0452 100644 --- a/src/parser/node.ts +++ b/src/parser/node.ts @@ -6,7 +6,7 @@ * この処理結果がプラグインによって処理されるとASTノードとなります。 */ -export type Node = Namespace | Meta | Statement | Expression | Attribute | TypeSource; +export type Node = Namespace | Meta | Statement | Expression | TypeSource | Attribute; export function NODE(type: string, params: Record): Node { const node: Record = { type }; @@ -27,53 +27,6 @@ export function CALL_NODE(name: string, args: Node[]): Node { }); } -export type Statement = - Definition | - Return | - Each | - For | - Loop | - Break | - Continue | - Assign | - AddAssign | - SubAssign; - -const statementTypes = [ - 'def', 'return', 'attr', 'each', 'for', 'loop', 'break', 'continue', 'assign', 'addAssign', 'subAssign', -]; -export function isStatement(x: Node): x is Statement { - return statementTypes.includes(x.type); -} - -export type Expression = - Not | - And | - Or | - If | - Fn | - Match | - Block | - Exists | - Tmpl | - Str | - Num | - Bool | - Null | - Obj | - Arr | - Identifier | - Call | - Index | - Prop; - -const expressionTypes = [ - 'if', 'fn', 'match', 'block', 'exists', 'tmpl', 'str', 'num', 'bool', 'null', 'obj', 'arr', 'identifier', 'call', 'index', 'prop', -]; -export function isExpression(x: Node): x is Expression { - return expressionTypes.includes(x.type); -} - type NodeBase = { __AST_NODE: never; // phantom type loc?: { @@ -94,6 +47,18 @@ export type Meta = NodeBase & { value: Expression; }; +export type Statement = + Definition | + Return | + Each | + For | + Loop | + Break | + Continue | + Assign | + AddAssign | + SubAssign; + export type Definition = NodeBase & { type: 'def'; name: string; @@ -161,6 +126,29 @@ export type Assign = NodeBase & { expr: Expression; }; +// expressions + +export type Expression = + If | + Fn | + Match | + Block | + Exists | + Tmpl | + Str | + Num | + Bool | + Null | + Obj | + Arr | + Not | + And | + Or | + Identifier | + Call | + Index | + Prop; + export type Not = NodeBase & { type: 'not'; expr: Expression; From 97de71d335e73ba4bc733596a8d176ec44aa151a Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 1 Oct 2023 21:51:42 +0900 Subject: [PATCH 090/126] debug --- parse.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parse.js b/parse.js index dea19f39..74a859cd 100644 --- a/parse.js +++ b/parse.js @@ -1,6 +1,6 @@ import fs from 'fs'; import { Parser } from '@syuilo/aiscript'; -const script = fs.readFileSync('./debug.ais', 'utf8'); +const script = fs.readFileSync('./test.is', 'utf8'); const ast = Parser.parse(script); console.log(JSON.stringify(ast, null, 2)); From dd42375b539907fb29c15fdea05c5ddc9e72c884 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 1 Oct 2023 22:03:18 +0900 Subject: [PATCH 091/126] parser wip --- src/parser/scanner.ts | 174 ++++++++++++++++------------- src/parser/streams/token-stream.ts | 6 +- src/parser/syntaxes/common.ts | 4 +- src/parser/syntaxes/expressions.ts | 18 ++- src/parser/syntaxes/statements.ts | 16 ++- src/parser/syntaxes/toplevel.ts | 9 ++ src/parser/token.ts | 7 +- test/parser.ts | 44 ++++---- 8 files changed, 164 insertions(+), 114 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index fa27d230..cf50d56f 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -5,7 +5,8 @@ import { TOKEN, TokenKind } from './token.js'; import type { ITokenStream } from './streams/token-stream.js'; import type { Token } from './token.js'; -const spacingChars = [' ', '\t', '\r', '\n']; +const spaceChars = [' ', '\t']; +const lineBreakChars = ['\r', '\n']; const digit = /^[0-9]$/; const wordChar = /^[A-Za-z0-9_]$/; @@ -15,6 +16,7 @@ const wordChar = /^[A-Za-z0-9_]$/; export class Scanner implements ITokenStream { private stream: CharStream; private _tokens: Token[] = []; + private firstRead: boolean; constructor(source: string) constructor(stream: CharStream) @@ -28,6 +30,7 @@ export class Scanner implements ITokenStream { } public init(): void { + this.firstRead = true; this._tokens.push(this.readToken()); } @@ -51,6 +54,11 @@ export class Scanner implements ITokenStream { throw new Error('scanner is not initialized yet'); } + // 現在のトークンがEOFだったら次のトークンに進まない + if (this._tokens[0]!.kind === TokenKind.EOF) { + return; + } + this._tokens.shift(); if (this._tokens.length === 0) { @@ -83,16 +91,29 @@ export class Scanner implements ITokenStream { private readToken(): Token { let token; - let spaceSkipped = false; + let hasLeftSpacing = false; + let lineBegin = false; + + if (this.firstRead) { + lineBegin = true; + this.firstRead = false; + } + while (true) { if (this.stream.eof) { - token = TOKEN(TokenKind.EOF, spaceSkipped); + token = TOKEN(TokenKind.EOF, { hasLeftSpacing, lineBegin }); break; } // skip spasing - if (spacingChars.includes(this.stream.char)) { + if (spaceChars.includes(this.stream.char)) { + this.stream.next(); + hasLeftSpacing = true; + continue; + } + if (lineBreakChars.includes(this.stream.char)) { this.stream.next(); - spaceSkipped = true; + hasLeftSpacing = true; + lineBegin = true; continue; } switch (this.stream.char) { @@ -100,14 +121,15 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.NotEq, spaceSkipped); + token = TOKEN(TokenKind.NotEq, { hasLeftSpacing, lineBegin }); } else { - token = TOKEN(TokenKind.Not, spaceSkipped); + token = TOKEN(TokenKind.Not, { hasLeftSpacing, lineBegin }); } break; } - case '"': { - token = this.readStringLiteral(spaceSkipped); + case '"': + case '\'': { + token = this.readStringLiteral(hasLeftSpacing, lineBegin); break; } case '#': { @@ -116,72 +138,72 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === '#') { this.stream.next(); - token = TOKEN(TokenKind.Sharp3, spaceSkipped); + token = TOKEN(TokenKind.Sharp3, { hasLeftSpacing, lineBegin }); } } else if ((this.stream.char as string) === '[') { this.stream.next(); - token = TOKEN(TokenKind.OpenSharpBracket, spaceSkipped); + token = TOKEN(TokenKind.OpenSharpBracket, { hasLeftSpacing, lineBegin }); } else { - token = TOKEN(TokenKind.Sharp, spaceSkipped); + token = TOKEN(TokenKind.Sharp, { hasLeftSpacing, lineBegin }); } break; } case '%': { this.stream.next(); - token = TOKEN(TokenKind.Percent, spaceSkipped); + token = TOKEN(TokenKind.Percent, { hasLeftSpacing, lineBegin }); break; } case '&': { this.stream.next(); if ((this.stream.char as string) === '&') { this.stream.next(); - token = TOKEN(TokenKind.And2, spaceSkipped); + token = TOKEN(TokenKind.And2, { hasLeftSpacing, lineBegin }); } break; } case '(': { this.stream.next(); - token = TOKEN(TokenKind.OpenParen, spaceSkipped); + token = TOKEN(TokenKind.OpenParen, { hasLeftSpacing, lineBegin }); break; } case ')': { this.stream.next(); - token = TOKEN(TokenKind.CloseParen, spaceSkipped); + token = TOKEN(TokenKind.CloseParen, { hasLeftSpacing, lineBegin }); break; } case '*': { this.stream.next(); - token = TOKEN(TokenKind.Asterisk, spaceSkipped); + token = TOKEN(TokenKind.Asterisk, { hasLeftSpacing, lineBegin }); break; } case '+': { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.PlusEq, spaceSkipped); + token = TOKEN(TokenKind.PlusEq, { hasLeftSpacing, lineBegin }); } else { - token = TOKEN(TokenKind.Plus, spaceSkipped); + token = TOKEN(TokenKind.Plus, { hasLeftSpacing, lineBegin }); } break; } case ',': { this.stream.next(); - token = TOKEN(TokenKind.Comma, spaceSkipped); + token = TOKEN(TokenKind.Comma, { hasLeftSpacing, lineBegin }); break; } case '-': { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.MinusEq, spaceSkipped); + token = TOKEN(TokenKind.MinusEq, { hasLeftSpacing, lineBegin }); } else { - token = TOKEN(TokenKind.Minus, spaceSkipped); + token = TOKEN(TokenKind.Minus, { hasLeftSpacing, lineBegin }); } break; } case '.': { this.stream.next(); - token = TOKEN(TokenKind.Dot, spaceSkipped); + token = TOKEN(TokenKind.Dot, { hasLeftSpacing, lineBegin }); break; } case '/': { @@ -195,7 +217,7 @@ export class Scanner implements ITokenStream { this.skipCommentLine(); continue; } else { - token = TOKEN(TokenKind.Slash, spaceSkipped); + token = TOKEN(TokenKind.Slash, { hasLeftSpacing, lineBegin }); } break; } @@ -203,27 +225,27 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === ':') { this.stream.next(); - token = TOKEN(TokenKind.Colon2, spaceSkipped); + token = TOKEN(TokenKind.Colon2, { hasLeftSpacing, lineBegin }); } else { - token = TOKEN(TokenKind.Colon, spaceSkipped); + token = TOKEN(TokenKind.Colon, { hasLeftSpacing, lineBegin }); } break; } case ';': { this.stream.next(); - token = TOKEN(TokenKind.SemiColon, spaceSkipped); + token = TOKEN(TokenKind.SemiColon, { hasLeftSpacing, lineBegin }); break; } case '<': { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.LtEq, spaceSkipped); + token = TOKEN(TokenKind.LtEq, { hasLeftSpacing, lineBegin }); } else if ((this.stream.char as string) === ':') { this.stream.next(); - token = TOKEN(TokenKind.Out, spaceSkipped); + token = TOKEN(TokenKind.Out, { hasLeftSpacing, lineBegin }); } else { - token = TOKEN(TokenKind.Lt, spaceSkipped); + token = TOKEN(TokenKind.Lt, { hasLeftSpacing, lineBegin }); } break; } @@ -231,12 +253,12 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.Eq2, spaceSkipped); + token = TOKEN(TokenKind.Eq2, { hasLeftSpacing, lineBegin }); } else if ((this.stream.char as string) === '>') { this.stream.next(); - token = TOKEN(TokenKind.Arrow, spaceSkipped); + token = TOKEN(TokenKind.Arrow, { hasLeftSpacing, lineBegin }); } else { - token = TOKEN(TokenKind.Eq, spaceSkipped); + token = TOKEN(TokenKind.Eq, { hasLeftSpacing, lineBegin }); } break; } @@ -244,63 +266,63 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.GtEq, spaceSkipped); + token = TOKEN(TokenKind.GtEq, { hasLeftSpacing, lineBegin }); } else { - token = TOKEN(TokenKind.Gt, spaceSkipped); + token = TOKEN(TokenKind.Gt, { hasLeftSpacing, lineBegin }); } break; } case '@': { this.stream.next(); - token = TOKEN(TokenKind.At, spaceSkipped); + token = TOKEN(TokenKind.At, { hasLeftSpacing, lineBegin }); break; } case '[': { this.stream.next(); - token = TOKEN(TokenKind.OpenBracket, spaceSkipped); + token = TOKEN(TokenKind.OpenBracket, { hasLeftSpacing, lineBegin }); break; } case ']': { this.stream.next(); - token = TOKEN(TokenKind.CloseBracket, spaceSkipped); + token = TOKEN(TokenKind.CloseBracket, { hasLeftSpacing, lineBegin }); break; } case '^': { this.stream.next(); - token = TOKEN(TokenKind.Hat, spaceSkipped); + token = TOKEN(TokenKind.Hat, { hasLeftSpacing, lineBegin }); break; } case '`': { this.stream.next(); - token = this.readTemplate(spaceSkipped); + token = this.readTemplate(hasLeftSpacing, lineBegin); break; } case '{': { this.stream.next(); - token = TOKEN(TokenKind.OpenBrace, spaceSkipped); + token = TOKEN(TokenKind.OpenBrace, { hasLeftSpacing, lineBegin }); break; } case '|': { this.stream.next(); if ((this.stream.char as string) === '|') { this.stream.next(); - token = TOKEN(TokenKind.Or2, spaceSkipped); + token = TOKEN(TokenKind.Or2, { hasLeftSpacing, lineBegin }); } break; } case '}': { this.stream.next(); - token = TOKEN(TokenKind.CloseBrace, spaceSkipped); + token = TOKEN(TokenKind.CloseBrace, { hasLeftSpacing, lineBegin }); break; } } if (token == null) { - const digitToken = this.tryReadDigits(spaceSkipped); + const digitToken = this.tryReadDigits(hasLeftSpacing, lineBegin); if (digitToken) { token = digitToken; break; } - const wordToken = this.tryReadWord(spaceSkipped); + const wordToken = this.tryReadWord(hasLeftSpacing, lineBegin); if (wordToken) { token = wordToken; break; @@ -312,7 +334,7 @@ export class Scanner implements ITokenStream { return token; } - private tryReadWord(spaceSkipped: boolean): Token | undefined { + private tryReadWord(hasLeftSpacing: boolean, lineBegin: boolean): Token | undefined { // read a word let value = ''; while (!this.stream.eof && wordChar.test(this.stream.char)) { @@ -325,69 +347,69 @@ export class Scanner implements ITokenStream { // check word kind switch (value) { case 'null': { - return TOKEN(TokenKind.NullKeyword, spaceSkipped); + return TOKEN(TokenKind.NullKeyword, { hasLeftSpacing, lineBegin }); } case 'true': { - return TOKEN(TokenKind.TrueKeyword, spaceSkipped); + return TOKEN(TokenKind.TrueKeyword, { hasLeftSpacing, lineBegin }); } case 'false': { - return TOKEN(TokenKind.FalseKeyword, spaceSkipped); + return TOKEN(TokenKind.FalseKeyword, { hasLeftSpacing, lineBegin }); } case 'each': { - return TOKEN(TokenKind.EachKeyword, spaceSkipped); + return TOKEN(TokenKind.EachKeyword, { hasLeftSpacing, lineBegin }); } case 'for': { - return TOKEN(TokenKind.ForKeyword, spaceSkipped); + return TOKEN(TokenKind.ForKeyword, { hasLeftSpacing, lineBegin }); } case 'loop': { - return TOKEN(TokenKind.LoopKeyword, spaceSkipped); + return TOKEN(TokenKind.LoopKeyword, { hasLeftSpacing, lineBegin }); } case 'break': { - return TOKEN(TokenKind.BreakKeyword, spaceSkipped); + return TOKEN(TokenKind.BreakKeyword, { hasLeftSpacing, lineBegin }); } case 'continue': { - return TOKEN(TokenKind.ContinueKeyword, spaceSkipped); + return TOKEN(TokenKind.ContinueKeyword, { hasLeftSpacing, lineBegin }); } case 'match': { - return TOKEN(TokenKind.MatchKeyword, spaceSkipped); + return TOKEN(TokenKind.MatchKeyword, { hasLeftSpacing, lineBegin }); } case 'case': { - return TOKEN(TokenKind.CaseKeyword, spaceSkipped); + return TOKEN(TokenKind.CaseKeyword, { hasLeftSpacing, lineBegin }); } case 'default': { - return TOKEN(TokenKind.DefaultKeyword, spaceSkipped); + return TOKEN(TokenKind.DefaultKeyword, { hasLeftSpacing, lineBegin }); } case 'if': { - return TOKEN(TokenKind.IfKeyword, spaceSkipped); + return TOKEN(TokenKind.IfKeyword, { hasLeftSpacing, lineBegin }); } case 'elif': { - return TOKEN(TokenKind.ElifKeyword, spaceSkipped); + return TOKEN(TokenKind.ElifKeyword, { hasLeftSpacing, lineBegin }); } case 'else': { - return TOKEN(TokenKind.ElseKeyword, spaceSkipped); + return TOKEN(TokenKind.ElseKeyword, { hasLeftSpacing, lineBegin }); } case 'return': { - return TOKEN(TokenKind.ReturnKeyword, spaceSkipped); + return TOKEN(TokenKind.ReturnKeyword, { hasLeftSpacing, lineBegin }); } case 'eval': { - return TOKEN(TokenKind.EvalKeyword, spaceSkipped); + return TOKEN(TokenKind.EvalKeyword, { hasLeftSpacing, lineBegin }); } case 'var': { - return TOKEN(TokenKind.VarKeyword, spaceSkipped); + return TOKEN(TokenKind.VarKeyword, { hasLeftSpacing, lineBegin }); } case 'let': { - return TOKEN(TokenKind.LetKeyword, spaceSkipped); + return TOKEN(TokenKind.LetKeyword, { hasLeftSpacing, lineBegin }); } case 'exists': { - return TOKEN(TokenKind.ExistsKeyword, spaceSkipped); + return TOKEN(TokenKind.ExistsKeyword, { hasLeftSpacing, lineBegin }); } default: { - return TOKEN(TokenKind.Identifier, spaceSkipped, { value }); + return TOKEN(TokenKind.Identifier, { hasLeftSpacing, lineBegin, value }); } } } - private tryReadDigits(spaceSkipped: boolean): Token | undefined { + private tryReadDigits(hasLeftSpacing: boolean, lineBegin: boolean): Token | undefined { // TODO: float number let value = ''; while (!this.stream.eof && digit.test(this.stream.char)) { @@ -397,10 +419,10 @@ export class Scanner implements ITokenStream { if (value.length === 0) { return; } - return TOKEN(TokenKind.NumberLiteral, spaceSkipped, { value }); + return TOKEN(TokenKind.NumberLiteral, { hasLeftSpacing, lineBegin, value }); } - private readStringLiteral(spaceSkipped: boolean): Token { + private readStringLiteral(hasLeftSpacing: boolean, lineBegin: boolean): Token { let value = ''; const literalMark = this.stream.char; @@ -417,10 +439,10 @@ export class Scanner implements ITokenStream { value += this.stream.char; this.stream.next(); } - return TOKEN(TokenKind.StringLiteral, spaceSkipped, { value }); + return TOKEN(TokenKind.StringLiteral, { hasLeftSpacing, lineBegin, value }); } - private readTemplate(spaceSkipped: boolean): Token { + private readTemplate(hasLeftSpacing: boolean, lineBegin: boolean): Token { const elements: Token[] = []; let buf = ''; let tokenBuf: Token[] = []; @@ -437,7 +459,7 @@ export class Scanner implements ITokenStream { if (this.stream.char === '`') { this.stream.next(); if (buf.length > 0) { - elements.push(TOKEN(TokenKind.TemplateStringElement, spaceSkipped, { value: buf })); + elements.push(TOKEN(TokenKind.TemplateStringElement, { hasLeftSpacing, lineBegin, value: buf })); } state = 'finish'; break; @@ -446,7 +468,7 @@ export class Scanner implements ITokenStream { if (this.stream.char === '{') { this.stream.next(); if (buf.length > 0) { - elements.push(TOKEN(TokenKind.TemplateStringElement, spaceSkipped, { value: buf })); + elements.push(TOKEN(TokenKind.TemplateStringElement, { hasLeftSpacing, lineBegin, value: buf })); buf = ''; } state = 'expr'; @@ -462,14 +484,14 @@ export class Scanner implements ITokenStream { throw new AiScriptSyntaxError('unexpected EOF'); } // skip spasing - if (spacingChars.includes(this.stream.char)) { + if (spaceChars.includes(this.stream.char)) { this.stream.next(); continue; } // 埋め込み式の終了 if ((this.stream.char as string) === '}') { this.stream.next(); - elements.push(TOKEN(TokenKind.TemplateExprElement, spaceSkipped, { children: tokenBuf })); + elements.push(TOKEN(TokenKind.TemplateExprElement, { hasLeftSpacing, lineBegin, children: tokenBuf })); tokenBuf = []; state = 'string'; break; @@ -481,7 +503,7 @@ export class Scanner implements ITokenStream { } } - return TOKEN(TokenKind.Template, spaceSkipped, { children: elements }); + return TOKEN(TokenKind.Template, { hasLeftSpacing, lineBegin, children: elements }); } private skipCommentLine(): void { diff --git a/src/parser/streams/token-stream.ts b/src/parser/streams/token-stream.ts index c4ca6093..f38b3dd3 100644 --- a/src/parser/streams/token-stream.ts +++ b/src/parser/streams/token-stream.ts @@ -42,7 +42,7 @@ export class TokenStream implements ITokenStream { throw new Error('stream is not initialized yet'); } if (this.eof) { - throw new Error('end of stream'); + return TOKEN(TokenKind.EOF); } return this._token; } @@ -62,7 +62,7 @@ export class TokenStream implements ITokenStream { if (this.index + offset < this.source.length) { return this.source[this.index + offset]!; } else { - return TOKEN(TokenKind.EOF, false); + return TOKEN(TokenKind.EOF); } } @@ -79,7 +79,7 @@ export class TokenStream implements ITokenStream { private load(): void { if (this.eof) { - this._token = TOKEN(TokenKind.EOF, false); + this._token = TOKEN(TokenKind.EOF); } else { this._token = this.source[this.index]; } diff --git a/src/parser/syntaxes/common.ts b/src/parser/syntaxes/common.ts index 117bb278..6d57dcfc 100644 --- a/src/parser/syntaxes/common.ts +++ b/src/parser/syntaxes/common.ts @@ -20,8 +20,8 @@ export function parseParams(s: ITokenStream): { name: string }[] { if (items.length > 0) { if (s.kind === TokenKind.Comma) { s.next(); - } else if (!s.token.spaceSkipped) { - throw new AiScriptSyntaxError('separator token expected'); + } else if (!s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('separator expected'); } } diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index f48e188d..152c6525 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -246,6 +246,12 @@ function parseAtom(s: ITokenStream): Cst.Node { case TokenKind.Identifier: { return parseReference(s); } + case TokenKind.OpenParen: { + s.next(); + const expr = parseExpr(s); + s.nextWith(TokenKind.CloseParen); + return expr; + } default: { throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[s.kind]}`); } @@ -265,8 +271,8 @@ function parseCall(s: ITokenStream, target: Cst.Node): Cst.Node { if (items.length > 0) { if (s.kind === TokenKind.Comma) { s.next(); - } else if (!s.token.spaceSkipped) { - throw new AiScriptSyntaxError('separator token expected'); + } else if (!s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('separator expected'); } } @@ -429,8 +435,8 @@ function parseObject(s: ITokenStream): Cst.Node { } else if (s.kind === TokenKind.SemiColon) { s.next(); } else { - if (!s.token.spaceSkipped) { - throw new AiScriptSyntaxError('separator token expected'); + if (!s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('separator expected'); } } } @@ -458,8 +464,8 @@ function parseArray(s: ITokenStream): Cst.Node { } else if (s.kind === TokenKind.Comma) { s.next(); } else { - if (!s.token.spaceSkipped) { - throw new AiScriptSyntaxError('separator token expected'); + if (!s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('separator expected'); } } } diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index 0b8cb5bb..b6530116 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -14,6 +14,10 @@ import type { ITokenStream } from '../streams/token-stream.js'; * ``` */ export function parseStatement(s: ITokenStream): Cst.Node { + if (!s.token.lineBegin) { + throw new AiScriptSyntaxError('Statement must be at the beginning of the line.'); + } + switch (s.kind) { case TokenKind.VarKeyword: case TokenKind.LetKeyword: { @@ -171,8 +175,8 @@ function parseOut(s: ITokenStream): Cst.Node { /** * ```abnf - * Each = "each" "let" IDENT [","] Expr BlockOrStatement - * / "each" "(" "let" IDENT [","] Expr ")" BlockOrStatement + * Each = "each" "let" IDENT ("," / SPACE) Expr BlockOrStatement + * / "each" "(" "let" IDENT ("," / SPACE) Expr ")" BlockOrStatement * ``` */ function parseEach(s: ITokenStream): Cst.Node { @@ -193,6 +197,8 @@ function parseEach(s: ITokenStream): Cst.Node { if (s.kind === TokenKind.Comma) { s.next(); + } else if (!s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('separator expected'); } const items = parseExpr(s); @@ -236,6 +242,12 @@ function parseFor(s: ITokenStream): Cst.Node { _from = NODE('num', { value: 0 }); } + if ((s.kind as TokenKind) === TokenKind.Comma) { + s.next(); + } else if (!s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('separator expected'); + } + const to = parseExpr(s); if (hasParen) { diff --git a/src/parser/syntaxes/toplevel.ts b/src/parser/syntaxes/toplevel.ts index 68706206..d58ec6f8 100644 --- a/src/parser/syntaxes/toplevel.ts +++ b/src/parser/syntaxes/toplevel.ts @@ -5,6 +5,7 @@ import { parseStaticLiteral } from './common.js'; import type * as Cst from '../node.js'; import type { ITokenStream } from '../streams/token-stream.js'; +import { AiScriptSyntaxError } from '../../error.js'; /** * ```abnf @@ -40,6 +41,10 @@ export function parseTopLevel(s: ITokenStream): Cst.Node[] { * ``` */ export function parseNamespace(s: ITokenStream): Cst.Node { + if (!s.token.lineBegin) { + throw new AiScriptSyntaxError('Statement must be at the beginning of the line.'); + } + s.nextWith(TokenKind.Colon2); s.expect(TokenKind.Identifier); @@ -73,6 +78,10 @@ export function parseNamespace(s: ITokenStream): Cst.Node { * ``` */ export function parseMeta(s: ITokenStream): Cst.Node { + if (!s.token.lineBegin) { + throw new AiScriptSyntaxError('Statement must be at the beginning of the line.'); + } + s.nextWith(TokenKind.Sharp3); let name; diff --git a/src/parser/token.ts b/src/parser/token.ts index 1f38c9c9..a2f846f4 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -107,7 +107,8 @@ export enum TokenKind { export class Token { constructor( public kind: TokenKind, - public spaceSkipped: boolean, + public hasLeftSpacing: boolean = false, + public lineBegin: boolean = false, /** for number literal, string literal */ public value?: string, /** for template syntax */ @@ -119,6 +120,6 @@ export class Token { * - opts.value: for number literal, string literal * - opts.children: for template syntax */ -export function TOKEN(kind: TokenKind, spaceSkipped: boolean, opts?: { value?: Token['value'], children?: Token['children'] }): Token { - return new Token(kind, spaceSkipped, opts?.value, opts?.children); +export function TOKEN(kind: TokenKind, opts?: { hasLeftSpacing?: boolean, lineBegin?: boolean, value?: Token['value'], children?: Token['children'] }): Token { + return new Token(kind, opts?.hasLeftSpacing, opts?.lineBegin, opts?.value, opts?.children); } diff --git a/test/parser.ts b/test/parser.ts index d13abbe2..606453e2 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -59,8 +59,8 @@ describe('Scanner', () => { stream.init(); return stream; } - function next(stream: Scanner, kind: TokenKind, spaceSkipped: boolean, value?: string) { - assert.deepStrictEqual(stream.token, TOKEN(kind, spaceSkipped, { value })); + function next(stream: Scanner, kind: TokenKind, opts: { hasLeftSpacing?: boolean, lineBegin?: boolean, value?: string }) { + assert.deepStrictEqual(stream.token, TOKEN(kind, opts)); stream.next(); } @@ -77,20 +77,20 @@ describe('Scanner', () => { test.concurrent('eof', async () => { const source = ''; const stream = init(source); - next(stream, TokenKind.EOF, false); - next(stream, TokenKind.EOF, false); + next(stream, TokenKind.EOF, { lineBegin: true }); + next(stream, TokenKind.EOF, { lineBegin: true }); }); test.concurrent('keyword', async () => { const source = 'if'; const stream = init(source); - next(stream, TokenKind.IfKeyword, false); - next(stream, TokenKind.EOF, false); + next(stream, TokenKind.IfKeyword, { lineBegin: true }); + next(stream, TokenKind.EOF, {}); }); test.concurrent('identifier', async () => { const source = 'xyz'; const stream = init(source); - next(stream, TokenKind.Identifier, false, 'xyz'); - next(stream, TokenKind.EOF, false); + next(stream, TokenKind.Identifier, { lineBegin: true, value: 'xyz' }); + next(stream, TokenKind.EOF, {}); }); test.concurrent('invalid token', async () => { const source = '$'; @@ -103,27 +103,27 @@ describe('Scanner', () => { test.concurrent('words', async () => { const source = 'abc xyz'; const stream = init(source); - next(stream, TokenKind.Identifier, false, 'abc'); - next(stream, TokenKind.Identifier, true, 'xyz'); - next(stream, TokenKind.EOF, false); + next(stream, TokenKind.Identifier, { lineBegin: true, value: 'abc' }); + next(stream, TokenKind.Identifier, { hasLeftSpacing: true, value: 'xyz' }); + next(stream, TokenKind.EOF, {}); }); test.concurrent('stream', async () => { const source = '@abc() { }'; const stream = init(source); - next(stream, TokenKind.At, false); - next(stream, TokenKind.Identifier, false, 'abc'); - next(stream, TokenKind.OpenParen, false); - next(stream, TokenKind.CloseParen, false); - next(stream, TokenKind.OpenBrace, true); - next(stream, TokenKind.CloseBrace, true); - next(stream, TokenKind.EOF, false); + next(stream, TokenKind.At, { lineBegin: true }); + next(stream, TokenKind.Identifier, { value: 'abc' }); + next(stream, TokenKind.OpenParen, {}); + next(stream, TokenKind.CloseParen, {}); + next(stream, TokenKind.OpenBrace, { hasLeftSpacing: true }); + next(stream, TokenKind.CloseBrace, { hasLeftSpacing: true }); + next(stream, TokenKind.EOF, {}); }); test.concurrent('lookahead', async () => { const source = '@abc() { }'; const stream = init(source); - assert.deepStrictEqual(stream.lookahead(1), TOKEN(TokenKind.Identifier, false, { value: 'abc' })); - next(stream, TokenKind.At, false); - next(stream, TokenKind.Identifier, false, 'abc'); - next(stream, TokenKind.OpenParen, false); + assert.deepStrictEqual(stream.lookahead(1), TOKEN(TokenKind.Identifier, { value: 'abc' })); + next(stream, TokenKind.At, { lineBegin: true }); + next(stream, TokenKind.Identifier, { value: 'abc' }); + next(stream, TokenKind.OpenParen, { }); }); }); From 3160e9137f176dd53acc55424348a46620c31eee Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 2 Oct 2023 20:14:08 +0900 Subject: [PATCH 092/126] fix multi statement --- src/parser/syntaxes/common.ts | 5 +++++ src/parser/syntaxes/statements.ts | 4 ---- src/parser/syntaxes/toplevel.ts | 13 +++++-------- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/parser/syntaxes/common.ts b/src/parser/syntaxes/common.ts index 6d57dcfc..f6d42dce 100644 --- a/src/parser/syntaxes/common.ts +++ b/src/parser/syntaxes/common.ts @@ -45,6 +45,11 @@ export function parseBlock(s: ITokenStream): Cst.Node[] { const steps: Cst.Node[] = []; while (s.kind !== TokenKind.CloseBrace) { + if (steps.length > 0) { + if (!s.token.lineBegin) { + throw new AiScriptSyntaxError('Multiple statements cannot be placed on a single line.'); + } + } steps.push(parseStatement(s)); } diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index b6530116..41e87a32 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -14,10 +14,6 @@ import type { ITokenStream } from '../streams/token-stream.js'; * ``` */ export function parseStatement(s: ITokenStream): Cst.Node { - if (!s.token.lineBegin) { - throw new AiScriptSyntaxError('Statement must be at the beginning of the line.'); - } - switch (s.kind) { case TokenKind.VarKeyword: case TokenKind.LetKeyword: { diff --git a/src/parser/syntaxes/toplevel.ts b/src/parser/syntaxes/toplevel.ts index d58ec6f8..c39a8769 100644 --- a/src/parser/syntaxes/toplevel.ts +++ b/src/parser/syntaxes/toplevel.ts @@ -16,6 +16,11 @@ export function parseTopLevel(s: ITokenStream): Cst.Node[] { const nodes: Cst.Node[] = []; while (s.kind !== TokenKind.EOF) { + if (nodes.length > 0) { + if (!s.token.lineBegin) { + throw new AiScriptSyntaxError('Multiple statements cannot be placed on a single line.'); + } + } switch (s.kind) { case TokenKind.Colon2: { nodes.push(parseNamespace(s)); @@ -41,10 +46,6 @@ export function parseTopLevel(s: ITokenStream): Cst.Node[] { * ``` */ export function parseNamespace(s: ITokenStream): Cst.Node { - if (!s.token.lineBegin) { - throw new AiScriptSyntaxError('Statement must be at the beginning of the line.'); - } - s.nextWith(TokenKind.Colon2); s.expect(TokenKind.Identifier); @@ -78,10 +79,6 @@ export function parseNamespace(s: ITokenStream): Cst.Node { * ``` */ export function parseMeta(s: ITokenStream): Cst.Node { - if (!s.token.lineBegin) { - throw new AiScriptSyntaxError('Statement must be at the beginning of the line.'); - } - s.nextWith(TokenKind.Sharp3); let name; From 05de92cad482a5de61dfd5203d7e94c4fd1547e0 Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 2 Oct 2023 20:47:46 +0900 Subject: [PATCH 093/126] backslash support --- src/parser/scanner.ts | 5 +++++ src/parser/syntaxes/expressions.ts | 20 ++++++++++++++++++-- src/parser/token.ts | 2 ++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index cf50d56f..e4847aa1 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -282,6 +282,11 @@ export class Scanner implements ITokenStream { token = TOKEN(TokenKind.OpenBracket, { hasLeftSpacing, lineBegin }); break; } + case '\\': { + this.stream.next(); + token = TOKEN(TokenKind.BackSlash, { hasLeftSpacing, lineBegin }); + break; + } case ']': { this.stream.next(); token = TOKEN(TokenKind.CloseBracket, { hasLeftSpacing, lineBegin }); diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 152c6525..e8b15c49 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -50,6 +50,12 @@ const operators: OpInfo[] = [ function parsePrefix(s: ITokenStream, minBp: number): Cst.Node { const op = s.kind; s.next(); + + // 改行ができなかった頃の下位互換性を維持 + if (s.kind === TokenKind.BackSlash) { + s.next(); + } + const expr = parsePratt(s, minBp); switch (op) { @@ -84,8 +90,14 @@ function parsePrefix(s: ITokenStream, minBp: number): Cst.Node { function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { const op = s.kind; - if (op === TokenKind.Dot) { + s.next(); + + // 改行ができなかった頃の下位互換性を維持 + if (s.kind === TokenKind.BackSlash) { s.next(); + } + + if (op === TokenKind.Dot) { s.expect(TokenKind.Identifier); const name = s.token.value!; s.next(); @@ -95,7 +107,6 @@ function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { name, }); } else { - s.next(); const right = parsePratt(s, minBp); switch (op) { @@ -497,6 +508,11 @@ function parsePratt(s: ITokenStream, minBp: number): Cst.Node { } while (true) { + // 下位互換性を維持 + if (s.kind === TokenKind.BackSlash) { + s.next(); + } + const tokenKind = s.kind; const postfix = operators.find((x): x is PostfixInfo => x.opKind === 'postfix' && x.kind === tokenKind); diff --git a/src/parser/token.ts b/src/parser/token.ts index a2f846f4..44ea512d 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -92,6 +92,8 @@ export enum TokenKind { At, /** "[" */ OpenBracket, + /** "\\" */ + BackSlash, /** "]" */ CloseBracket, /** "^" */ From 6d473393738b54faecd30a8e69314a685e49b673 Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 2 Oct 2023 20:53:19 +0900 Subject: [PATCH 094/126] lint --- src/parser/syntaxes/toplevel.ts | 2 +- src/parser/token.ts | 4 ++-- src/parser/visit.ts | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parser/syntaxes/toplevel.ts b/src/parser/syntaxes/toplevel.ts index c39a8769..e97b3d81 100644 --- a/src/parser/syntaxes/toplevel.ts +++ b/src/parser/syntaxes/toplevel.ts @@ -1,11 +1,11 @@ import { NODE } from '../node.js'; import { TokenKind } from '../token.js'; +import { AiScriptSyntaxError } from '../../error.js'; import { parseDefStatement, parseStatement } from './statements.js'; import { parseStaticLiteral } from './common.js'; import type * as Cst from '../node.js'; import type { ITokenStream } from '../streams/token-stream.js'; -import { AiScriptSyntaxError } from '../../error.js'; /** * ```abnf diff --git a/src/parser/token.ts b/src/parser/token.ts index 44ea512d..bed7df35 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -109,8 +109,8 @@ export enum TokenKind { export class Token { constructor( public kind: TokenKind, - public hasLeftSpacing: boolean = false, - public lineBegin: boolean = false, + public hasLeftSpacing = false, + public lineBegin = false, /** for number literal, string literal */ public value?: string, /** for template syntax */ diff --git a/src/parser/visit.ts b/src/parser/visit.ts index 03b6d1e2..b49257bb 100644 --- a/src/parser/visit.ts +++ b/src/parser/visit.ts @@ -1,4 +1,4 @@ -import * as Cst from './node.js'; +import type * as Cst from './node.js'; export function visitNode(node: Cst.Node, fn: (node: Cst.Node) => Cst.Node): Cst.Node { const result = fn(node); From 54a16e8233ac639e024c3304d8a646d28886250f Mon Sep 17 00:00:00 2001 From: marihachi Date: Tue, 3 Oct 2023 01:05:00 +0900 Subject: [PATCH 095/126] newline token --- src/parser/scanner.ts | 153 +++++++++++++++----------------- src/parser/syntaxes/common.ts | 16 ++-- src/parser/syntaxes/toplevel.ts | 28 ++++-- src/parser/token.ts | 6 +- test/parser.ts | 28 +++--- 5 files changed, 124 insertions(+), 107 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index e4847aa1..a1e57cf9 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -92,16 +92,10 @@ export class Scanner implements ITokenStream { private readToken(): Token { let token; let hasLeftSpacing = false; - let lineBegin = false; - - if (this.firstRead) { - lineBegin = true; - this.firstRead = false; - } while (true) { if (this.stream.eof) { - token = TOKEN(TokenKind.EOF, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.EOF, { hasLeftSpacing }); break; } // skip spasing @@ -112,24 +106,23 @@ export class Scanner implements ITokenStream { } if (lineBreakChars.includes(this.stream.char)) { this.stream.next(); - hasLeftSpacing = true; - lineBegin = true; - continue; + token = TOKEN(TokenKind.NewLine, { hasLeftSpacing }); + return token; } switch (this.stream.char) { case '!': { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.NotEq, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.NotEq, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Not, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Not, { hasLeftSpacing }); } break; } case '"': case '\'': { - token = this.readStringLiteral(hasLeftSpacing, lineBegin); + token = this.readStringLiteral(hasLeftSpacing); break; } case '#': { @@ -138,72 +131,72 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === '#') { this.stream.next(); - token = TOKEN(TokenKind.Sharp3, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Sharp3, { hasLeftSpacing }); } } else if ((this.stream.char as string) === '[') { this.stream.next(); - token = TOKEN(TokenKind.OpenSharpBracket, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.OpenSharpBracket, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Sharp, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Sharp, { hasLeftSpacing }); } break; } case '%': { this.stream.next(); - token = TOKEN(TokenKind.Percent, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Percent, { hasLeftSpacing }); break; } case '&': { this.stream.next(); if ((this.stream.char as string) === '&') { this.stream.next(); - token = TOKEN(TokenKind.And2, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.And2, { hasLeftSpacing }); } break; } case '(': { this.stream.next(); - token = TOKEN(TokenKind.OpenParen, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.OpenParen, { hasLeftSpacing }); break; } case ')': { this.stream.next(); - token = TOKEN(TokenKind.CloseParen, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.CloseParen, { hasLeftSpacing }); break; } case '*': { this.stream.next(); - token = TOKEN(TokenKind.Asterisk, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Asterisk, { hasLeftSpacing }); break; } case '+': { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.PlusEq, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.PlusEq, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Plus, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Plus, { hasLeftSpacing }); } break; } case ',': { this.stream.next(); - token = TOKEN(TokenKind.Comma, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Comma, { hasLeftSpacing }); break; } case '-': { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.MinusEq, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.MinusEq, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Minus, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Minus, { hasLeftSpacing }); } break; } case '.': { this.stream.next(); - token = TOKEN(TokenKind.Dot, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Dot, { hasLeftSpacing }); break; } case '/': { @@ -217,7 +210,7 @@ export class Scanner implements ITokenStream { this.skipCommentLine(); continue; } else { - token = TOKEN(TokenKind.Slash, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Slash, { hasLeftSpacing }); } break; } @@ -225,27 +218,27 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === ':') { this.stream.next(); - token = TOKEN(TokenKind.Colon2, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Colon2, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Colon, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Colon, { hasLeftSpacing }); } break; } case ';': { this.stream.next(); - token = TOKEN(TokenKind.SemiColon, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.SemiColon, { hasLeftSpacing }); break; } case '<': { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.LtEq, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.LtEq, { hasLeftSpacing }); } else if ((this.stream.char as string) === ':') { this.stream.next(); - token = TOKEN(TokenKind.Out, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Out, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Lt, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Lt, { hasLeftSpacing }); } break; } @@ -253,12 +246,12 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.Eq2, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Eq2, { hasLeftSpacing }); } else if ((this.stream.char as string) === '>') { this.stream.next(); - token = TOKEN(TokenKind.Arrow, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Arrow, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Eq, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Eq, { hasLeftSpacing }); } break; } @@ -266,68 +259,68 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.GtEq, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.GtEq, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Gt, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Gt, { hasLeftSpacing }); } break; } case '@': { this.stream.next(); - token = TOKEN(TokenKind.At, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.At, { hasLeftSpacing }); break; } case '[': { this.stream.next(); - token = TOKEN(TokenKind.OpenBracket, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.OpenBracket, { hasLeftSpacing }); break; } case '\\': { this.stream.next(); - token = TOKEN(TokenKind.BackSlash, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.BackSlash, { hasLeftSpacing }); break; } case ']': { this.stream.next(); - token = TOKEN(TokenKind.CloseBracket, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.CloseBracket, { hasLeftSpacing }); break; } case '^': { this.stream.next(); - token = TOKEN(TokenKind.Hat, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Hat, { hasLeftSpacing }); break; } case '`': { this.stream.next(); - token = this.readTemplate(hasLeftSpacing, lineBegin); + token = this.readTemplate(hasLeftSpacing); break; } case '{': { this.stream.next(); - token = TOKEN(TokenKind.OpenBrace, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.OpenBrace, { hasLeftSpacing }); break; } case '|': { this.stream.next(); if ((this.stream.char as string) === '|') { this.stream.next(); - token = TOKEN(TokenKind.Or2, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.Or2, { hasLeftSpacing }); } break; } case '}': { this.stream.next(); - token = TOKEN(TokenKind.CloseBrace, { hasLeftSpacing, lineBegin }); + token = TOKEN(TokenKind.CloseBrace, { hasLeftSpacing }); break; } } if (token == null) { - const digitToken = this.tryReadDigits(hasLeftSpacing, lineBegin); + const digitToken = this.tryReadDigits(hasLeftSpacing); if (digitToken) { token = digitToken; break; } - const wordToken = this.tryReadWord(hasLeftSpacing, lineBegin); + const wordToken = this.tryReadWord(hasLeftSpacing); if (wordToken) { token = wordToken; break; @@ -339,7 +332,7 @@ export class Scanner implements ITokenStream { return token; } - private tryReadWord(hasLeftSpacing: boolean, lineBegin: boolean): Token | undefined { + private tryReadWord(hasLeftSpacing: boolean): Token | undefined { // read a word let value = ''; while (!this.stream.eof && wordChar.test(this.stream.char)) { @@ -352,69 +345,69 @@ export class Scanner implements ITokenStream { // check word kind switch (value) { case 'null': { - return TOKEN(TokenKind.NullKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.NullKeyword, { hasLeftSpacing }); } case 'true': { - return TOKEN(TokenKind.TrueKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.TrueKeyword, { hasLeftSpacing }); } case 'false': { - return TOKEN(TokenKind.FalseKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.FalseKeyword, { hasLeftSpacing }); } case 'each': { - return TOKEN(TokenKind.EachKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.EachKeyword, { hasLeftSpacing }); } case 'for': { - return TOKEN(TokenKind.ForKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.ForKeyword, { hasLeftSpacing }); } case 'loop': { - return TOKEN(TokenKind.LoopKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.LoopKeyword, { hasLeftSpacing }); } case 'break': { - return TOKEN(TokenKind.BreakKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.BreakKeyword, { hasLeftSpacing }); } case 'continue': { - return TOKEN(TokenKind.ContinueKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.ContinueKeyword, { hasLeftSpacing }); } case 'match': { - return TOKEN(TokenKind.MatchKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.MatchKeyword, { hasLeftSpacing }); } case 'case': { - return TOKEN(TokenKind.CaseKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.CaseKeyword, { hasLeftSpacing }); } case 'default': { - return TOKEN(TokenKind.DefaultKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.DefaultKeyword, { hasLeftSpacing }); } case 'if': { - return TOKEN(TokenKind.IfKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.IfKeyword, { hasLeftSpacing }); } case 'elif': { - return TOKEN(TokenKind.ElifKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.ElifKeyword, { hasLeftSpacing }); } case 'else': { - return TOKEN(TokenKind.ElseKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.ElseKeyword, { hasLeftSpacing }); } case 'return': { - return TOKEN(TokenKind.ReturnKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.ReturnKeyword, { hasLeftSpacing }); } case 'eval': { - return TOKEN(TokenKind.EvalKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.EvalKeyword, { hasLeftSpacing }); } case 'var': { - return TOKEN(TokenKind.VarKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.VarKeyword, { hasLeftSpacing }); } case 'let': { - return TOKEN(TokenKind.LetKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.LetKeyword, { hasLeftSpacing }); } case 'exists': { - return TOKEN(TokenKind.ExistsKeyword, { hasLeftSpacing, lineBegin }); + return TOKEN(TokenKind.ExistsKeyword, { hasLeftSpacing }); } default: { - return TOKEN(TokenKind.Identifier, { hasLeftSpacing, lineBegin, value }); + return TOKEN(TokenKind.Identifier, { hasLeftSpacing, value }); } } } - private tryReadDigits(hasLeftSpacing: boolean, lineBegin: boolean): Token | undefined { + private tryReadDigits(hasLeftSpacing: boolean): Token | undefined { // TODO: float number let value = ''; while (!this.stream.eof && digit.test(this.stream.char)) { @@ -424,10 +417,10 @@ export class Scanner implements ITokenStream { if (value.length === 0) { return; } - return TOKEN(TokenKind.NumberLiteral, { hasLeftSpacing, lineBegin, value }); + return TOKEN(TokenKind.NumberLiteral, { hasLeftSpacing, value }); } - private readStringLiteral(hasLeftSpacing: boolean, lineBegin: boolean): Token { + private readStringLiteral(hasLeftSpacing: boolean): Token { let value = ''; const literalMark = this.stream.char; @@ -444,10 +437,10 @@ export class Scanner implements ITokenStream { value += this.stream.char; this.stream.next(); } - return TOKEN(TokenKind.StringLiteral, { hasLeftSpacing, lineBegin, value }); + return TOKEN(TokenKind.StringLiteral, { hasLeftSpacing, value }); } - private readTemplate(hasLeftSpacing: boolean, lineBegin: boolean): Token { + private readTemplate(hasLeftSpacing: boolean): Token { const elements: Token[] = []; let buf = ''; let tokenBuf: Token[] = []; @@ -464,7 +457,7 @@ export class Scanner implements ITokenStream { if (this.stream.char === '`') { this.stream.next(); if (buf.length > 0) { - elements.push(TOKEN(TokenKind.TemplateStringElement, { hasLeftSpacing, lineBegin, value: buf })); + elements.push(TOKEN(TokenKind.TemplateStringElement, { hasLeftSpacing, value: buf })); } state = 'finish'; break; @@ -473,7 +466,7 @@ export class Scanner implements ITokenStream { if (this.stream.char === '{') { this.stream.next(); if (buf.length > 0) { - elements.push(TOKEN(TokenKind.TemplateStringElement, { hasLeftSpacing, lineBegin, value: buf })); + elements.push(TOKEN(TokenKind.TemplateStringElement, { hasLeftSpacing, value: buf })); buf = ''; } state = 'expr'; @@ -496,7 +489,7 @@ export class Scanner implements ITokenStream { // 埋め込み式の終了 if ((this.stream.char as string) === '}') { this.stream.next(); - elements.push(TOKEN(TokenKind.TemplateExprElement, { hasLeftSpacing, lineBegin, children: tokenBuf })); + elements.push(TOKEN(TokenKind.TemplateExprElement, { hasLeftSpacing, children: tokenBuf })); tokenBuf = []; state = 'string'; break; @@ -508,7 +501,7 @@ export class Scanner implements ITokenStream { } } - return TOKEN(TokenKind.Template, { hasLeftSpacing, lineBegin, children: elements }); + return TOKEN(TokenKind.Template, { hasLeftSpacing, children: elements }); } private skipCommentLine(): void { diff --git a/src/parser/syntaxes/common.ts b/src/parser/syntaxes/common.ts index f6d42dce..57b0d460 100644 --- a/src/parser/syntaxes/common.ts +++ b/src/parser/syntaxes/common.ts @@ -43,14 +43,20 @@ export function parseParams(s: ITokenStream): { name: string }[] { export function parseBlock(s: ITokenStream): Cst.Node[] { s.nextWith(TokenKind.OpenBrace); + while (s.kind === TokenKind.NewLine) { + s.next(); + } + const steps: Cst.Node[] = []; while (s.kind !== TokenKind.CloseBrace) { - if (steps.length > 0) { - if (!s.token.lineBegin) { - throw new AiScriptSyntaxError('Multiple statements cannot be placed on a single line.'); - } - } steps.push(parseStatement(s)); + + if ((s.kind as TokenKind) !== TokenKind.NewLine && (s.kind as TokenKind) !== TokenKind.CloseBrace) { + throw new AiScriptSyntaxError('Multiple statements cannot be placed on a single line.'); + } + while ((s.kind as TokenKind) === TokenKind.NewLine) { + s.next(); + } } s.nextWith(TokenKind.CloseBrace); diff --git a/src/parser/syntaxes/toplevel.ts b/src/parser/syntaxes/toplevel.ts index e97b3d81..e43ce6ec 100644 --- a/src/parser/syntaxes/toplevel.ts +++ b/src/parser/syntaxes/toplevel.ts @@ -15,12 +15,11 @@ import type { ITokenStream } from '../streams/token-stream.js'; export function parseTopLevel(s: ITokenStream): Cst.Node[] { const nodes: Cst.Node[] = []; + while (s.kind === TokenKind.NewLine) { + s.next(); + } + while (s.kind !== TokenKind.EOF) { - if (nodes.length > 0) { - if (!s.token.lineBegin) { - throw new AiScriptSyntaxError('Multiple statements cannot be placed on a single line.'); - } - } switch (s.kind) { case TokenKind.Colon2: { nodes.push(parseNamespace(s)); @@ -35,6 +34,13 @@ export function parseTopLevel(s: ITokenStream): Cst.Node[] { break; } } + + if ((s.kind as TokenKind) !== TokenKind.NewLine && (s.kind as TokenKind) !== TokenKind.EOF) { + throw new AiScriptSyntaxError('Multiple statements cannot be placed on a single line.'); + } + while ((s.kind as TokenKind) === TokenKind.NewLine) { + s.next(); + } } return nodes; @@ -54,6 +60,11 @@ export function parseNamespace(s: ITokenStream): Cst.Node { const members: Cst.Node[] = []; s.nextWith(TokenKind.OpenBrace); + + while (s.kind === TokenKind.NewLine) { + s.next(); + } + while (s.kind !== TokenKind.CloseBrace) { switch (s.kind) { case TokenKind.VarKeyword: @@ -67,6 +78,13 @@ export function parseNamespace(s: ITokenStream): Cst.Node { break; } } + + if ((s.kind as TokenKind) !== TokenKind.NewLine && (s.kind as TokenKind) !== TokenKind.CloseBrace) { + throw new AiScriptSyntaxError('Multiple statements cannot be placed on a single line.'); + } + while ((s.kind as TokenKind) === TokenKind.NewLine) { + s.next(); + } } s.nextWith(TokenKind.CloseBrace); diff --git a/src/parser/token.ts b/src/parser/token.ts index bed7df35..7c771de2 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -1,5 +1,6 @@ export enum TokenKind { EOF, + NewLine, Identifier, // literal @@ -110,7 +111,6 @@ export class Token { constructor( public kind: TokenKind, public hasLeftSpacing = false, - public lineBegin = false, /** for number literal, string literal */ public value?: string, /** for template syntax */ @@ -122,6 +122,6 @@ export class Token { * - opts.value: for number literal, string literal * - opts.children: for template syntax */ -export function TOKEN(kind: TokenKind, opts?: { hasLeftSpacing?: boolean, lineBegin?: boolean, value?: Token['value'], children?: Token['children'] }): Token { - return new Token(kind, opts?.hasLeftSpacing, opts?.lineBegin, opts?.value, opts?.children); +export function TOKEN(kind: TokenKind, opts?: { hasLeftSpacing?: boolean, value?: Token['value'], children?: Token['children'] }): Token { + return new Token(kind, opts?.hasLeftSpacing, opts?.value, opts?.children); } diff --git a/test/parser.ts b/test/parser.ts index 606453e2..871e8b2f 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -59,7 +59,7 @@ describe('Scanner', () => { stream.init(); return stream; } - function next(stream: Scanner, kind: TokenKind, opts: { hasLeftSpacing?: boolean, lineBegin?: boolean, value?: string }) { + function next(stream: Scanner, kind: TokenKind, opts: { hasLeftSpacing?: boolean, value?: string }) { assert.deepStrictEqual(stream.token, TOKEN(kind, opts)); stream.next(); } @@ -77,20 +77,20 @@ describe('Scanner', () => { test.concurrent('eof', async () => { const source = ''; const stream = init(source); - next(stream, TokenKind.EOF, { lineBegin: true }); - next(stream, TokenKind.EOF, { lineBegin: true }); + next(stream, TokenKind.EOF, { }); + next(stream, TokenKind.EOF, { }); }); test.concurrent('keyword', async () => { const source = 'if'; const stream = init(source); - next(stream, TokenKind.IfKeyword, { lineBegin: true }); - next(stream, TokenKind.EOF, {}); + next(stream, TokenKind.IfKeyword, { }); + next(stream, TokenKind.EOF, { }); }); test.concurrent('identifier', async () => { const source = 'xyz'; const stream = init(source); - next(stream, TokenKind.Identifier, { lineBegin: true, value: 'xyz' }); - next(stream, TokenKind.EOF, {}); + next(stream, TokenKind.Identifier, { value: 'xyz' }); + next(stream, TokenKind.EOF, { }); }); test.concurrent('invalid token', async () => { const source = '$'; @@ -103,26 +103,26 @@ describe('Scanner', () => { test.concurrent('words', async () => { const source = 'abc xyz'; const stream = init(source); - next(stream, TokenKind.Identifier, { lineBegin: true, value: 'abc' }); + next(stream, TokenKind.Identifier, { value: 'abc' }); next(stream, TokenKind.Identifier, { hasLeftSpacing: true, value: 'xyz' }); - next(stream, TokenKind.EOF, {}); + next(stream, TokenKind.EOF, { }); }); test.concurrent('stream', async () => { const source = '@abc() { }'; const stream = init(source); - next(stream, TokenKind.At, { lineBegin: true }); + next(stream, TokenKind.At, { }); next(stream, TokenKind.Identifier, { value: 'abc' }); - next(stream, TokenKind.OpenParen, {}); - next(stream, TokenKind.CloseParen, {}); + next(stream, TokenKind.OpenParen, { }); + next(stream, TokenKind.CloseParen, { }); next(stream, TokenKind.OpenBrace, { hasLeftSpacing: true }); next(stream, TokenKind.CloseBrace, { hasLeftSpacing: true }); - next(stream, TokenKind.EOF, {}); + next(stream, TokenKind.EOF, { }); }); test.concurrent('lookahead', async () => { const source = '@abc() { }'; const stream = init(source); assert.deepStrictEqual(stream.lookahead(1), TOKEN(TokenKind.Identifier, { value: 'abc' })); - next(stream, TokenKind.At, { lineBegin: true }); + next(stream, TokenKind.At, { }); next(stream, TokenKind.Identifier, { value: 'abc' }); next(stream, TokenKind.OpenParen, { }); }); From 0a45d7f6d67d7725f61dd6615b25c6d9fa0a46ec Mon Sep 17 00:00:00 2001 From: marihachi Date: Thu, 5 Oct 2023 20:05:36 +0900 Subject: [PATCH 096/126] test --- test/index.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/index.ts b/test/index.ts index c29788ef..dcf2c754 100644 --- a/test/index.ts +++ b/test/index.ts @@ -16,9 +16,13 @@ const exe = (program: string): Promise => new Promise((ok, err) => { maxStep: 9999, }); - const parser = new Parser(); - const ast = parser.parse(program); - aiscript.exec(ast).catch(err); + try { + const parser = new Parser(); + const ast = parser.parse(program); + aiscript.exec(ast).catch(err); + } catch (e) { + err(e); + } }); const getMeta = (program: string) => { From 971847487e213a175492958e9b4a7e9bd1060880 Mon Sep 17 00:00:00 2001 From: marihachi Date: Thu, 5 Oct 2023 20:07:25 +0900 Subject: [PATCH 097/126] debug --- parse.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/parse.js b/parse.js index 74a859cd..ade1f9cb 100644 --- a/parse.js +++ b/parse.js @@ -1,6 +1,7 @@ import fs from 'fs'; import { Parser } from '@syuilo/aiscript'; +import { inspect } from 'util'; const script = fs.readFileSync('./test.is', 'utf8'); const ast = Parser.parse(script); -console.log(JSON.stringify(ast, null, 2)); +console.log(inspect(ast, { depth: 10 })); From 9ba1655572f22c49bb9ab445262a5aa674159a69 Mon Sep 17 00:00:00 2001 From: marihachi Date: Thu, 5 Oct 2023 20:07:43 +0900 Subject: [PATCH 098/126] parser: newline --- src/parser/syntaxes/expressions.ts | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index e8b15c49..76800ffb 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -352,6 +352,7 @@ function parseMatch(s: ITokenStream): Cst.Node { const about = parseExpr(s); s.nextWith(TokenKind.OpenBrace); + s.nextWith(TokenKind.NewLine); const qs: { q: Cst.Node, a: Cst.Node }[] = []; while (s.kind !== TokenKind.DefaultKeyword && s.kind !== TokenKind.CloseBrace) { @@ -359,6 +360,7 @@ function parseMatch(s: ITokenStream): Cst.Node { const q = parseExpr(s); s.nextWith(TokenKind.Arrow); const a = parseBlockOrStatement(s); + s.nextWith(TokenKind.NewLine); qs.push({ q, a }); } @@ -367,6 +369,7 @@ function parseMatch(s: ITokenStream): Cst.Node { s.next(); s.nextWith(TokenKind.Arrow); x = parseBlockOrStatement(s); + s.nextWith(TokenKind.NewLine); } s.nextWith(TokenKind.CloseBrace); @@ -426,6 +429,10 @@ function parseReference(s: ITokenStream): Cst.Node { function parseObject(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.OpenBrace); + if (s.kind === TokenKind.NewLine) { + s.next(); + } + const map = new Map(); while (s.kind !== TokenKind.CloseBrace) { s.expect(TokenKind.Identifier); @@ -445,11 +452,17 @@ function parseObject(s: ITokenStream): Cst.Node { s.next(); } else if (s.kind === TokenKind.SemiColon) { s.next(); + } else if (s.kind === TokenKind.NewLine) { + // noop } else { if (!s.token.hasLeftSpacing) { throw new AiScriptSyntaxError('separator expected'); } } + + if (s.kind === TokenKind.NewLine) { + s.next(); + } } s.nextWith(TokenKind.CloseBrace); @@ -465,6 +478,10 @@ function parseObject(s: ITokenStream): Cst.Node { function parseArray(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.OpenBracket); + if (s.kind === TokenKind.NewLine) { + s.next(); + } + const value = []; while (s.kind !== TokenKind.CloseBracket) { value.push(parseExpr(s)); @@ -474,11 +491,17 @@ function parseArray(s: ITokenStream): Cst.Node { break; } else if (s.kind === TokenKind.Comma) { s.next(); + } else if (s.kind === TokenKind.NewLine) { + // noop } else { if (!s.token.hasLeftSpacing) { throw new AiScriptSyntaxError('separator expected'); } } + + if (s.kind === TokenKind.NewLine) { + s.next(); + } } s.nextWith(TokenKind.CloseBracket); From 0246f7f88e869d5748c437801b482815c0f9621c Mon Sep 17 00:00:00 2001 From: marihachi Date: Thu, 5 Oct 2023 20:39:28 +0900 Subject: [PATCH 099/126] parser: float --- src/parser/scanner.ts | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index a1e57cf9..c2bbb908 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -408,15 +408,31 @@ export class Scanner implements ITokenStream { } private tryReadDigits(hasLeftSpacing: boolean): Token | undefined { - // TODO: float number - let value = ''; + let wholeNumber = ''; + let fractional = ''; while (!this.stream.eof && digit.test(this.stream.char)) { - value += this.stream.char; + wholeNumber += this.stream.char; this.stream.next(); } - if (value.length === 0) { + if (wholeNumber.length === 0) { return; } + if (!this.stream.eof && this.stream.char === '.') { + this.stream.next(); + while (!this.stream.eof && digit.test(this.stream.char)) { + fractional += this.stream.char; + this.stream.next(); + } + if (fractional.length === 0) { + throw new AiScriptSyntaxError('digit expected'); + } + } + let value; + if (fractional.length > 0) { + value = wholeNumber + '.' + fractional; + } else { + value = wholeNumber; + } return TOKEN(TokenKind.NumberLiteral, { hasLeftSpacing, value }); } From d77a2e82bbb3dff4b9d09320a163817ef35253fa Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 11:37:35 +0900 Subject: [PATCH 100/126] escape chars for template --- src/parser/scanner.ts | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index c2bbb908..ae4a510c 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -460,7 +460,7 @@ export class Scanner implements ITokenStream { const elements: Token[] = []; let buf = ''; let tokenBuf: Token[] = []; - let state: 'string' | 'expr' | 'finish' = 'string'; + let state: 'string' | 'escape' | 'expr' | 'finish' = 'string'; while (state !== 'finish') { switch (state) { @@ -469,6 +469,12 @@ export class Scanner implements ITokenStream { if (this.stream.eof) { throw new AiScriptSyntaxError('unexpected EOF'); } + // エスケープ + if (this.stream.char === '\\') { + this.stream.next(); + state = 'escape'; + break; + } // テンプレートの終了 if (this.stream.char === '`') { this.stream.next(); @@ -492,6 +498,21 @@ export class Scanner implements ITokenStream { this.stream.next(); break; } + case 'escape': { + // エスケープ文字が無いままEOFに達した + if (this.stream.eof) { + throw new AiScriptSyntaxError('unexpected EOF'); + } + // エスケープ対象かどうか確認 + if (!['`', '{', '}', '\\'].includes(this.stream.char)) { + throw new AiScriptSyntaxError(`unexpected char: ${this.stream.char}`); + } + buf += this.stream.char; + this.stream.next(); + // 通常の文字列に戻る + state = 'string'; + break; + } case 'expr': { // 埋め込み式の終端記号が無いままEOFに達した if (this.stream.eof) { From 7215bdc15ebc2aa5a7cbc1ecce901285f2424420 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 11:37:47 +0900 Subject: [PATCH 101/126] escape newlines for pratt parser --- src/parser/syntaxes/expressions.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 76800ffb..807ab5d1 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -51,9 +51,10 @@ function parsePrefix(s: ITokenStream, minBp: number): Cst.Node { const op = s.kind; s.next(); - // 改行ができなかった頃の下位互換性を維持 + // 改行のエスケープ if (s.kind === TokenKind.BackSlash) { s.next(); + s.nextWith(TokenKind.NewLine); } const expr = parsePratt(s, minBp); @@ -92,9 +93,10 @@ function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { const op = s.kind; s.next(); - // 改行ができなかった頃の下位互換性を維持 + // 改行のエスケープ if (s.kind === TokenKind.BackSlash) { s.next(); + s.nextWith(TokenKind.NewLine); } if (op === TokenKind.Dot) { @@ -531,9 +533,10 @@ function parsePratt(s: ITokenStream, minBp: number): Cst.Node { } while (true) { - // 下位互換性を維持 + // 改行のエスケープ if (s.kind === TokenKind.BackSlash) { s.next(); + s.nextWith(TokenKind.NewLine); } const tokenKind = s.kind; From 1d41979592eecae649bd6958dabe1bb5faf2786b Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 11:45:23 +0900 Subject: [PATCH 102/126] update escape for template --- src/parser/scanner.ts | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index ae4a510c..2d2d4a33 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -499,14 +499,11 @@ export class Scanner implements ITokenStream { break; } case 'escape': { - // エスケープ文字が無いままEOFに達した + // エスケープ対象の文字が無いままEOFに達した if (this.stream.eof) { throw new AiScriptSyntaxError('unexpected EOF'); } - // エスケープ対象かどうか確認 - if (!['`', '{', '}', '\\'].includes(this.stream.char)) { - throw new AiScriptSyntaxError(`unexpected char: ${this.stream.char}`); - } + // 普通の文字として取り込み buf += this.stream.char; this.stream.next(); // 通常の文字列に戻る From 81b60fe73eccc25ae613def0ee41c81c6bbb7321 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 12:20:08 +0900 Subject: [PATCH 103/126] fix postfix ops --- src/parser/syntaxes/expressions.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 807ab5d1..15ae515a 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -547,8 +547,12 @@ function parsePratt(s: ITokenStream, minBp: number): Cst.Node { break; } - left = parsePostfix(s, left); - continue; + if ([TokenKind.OpenBracket, TokenKind.OpenParen].includes(tokenKind) && s.token.hasLeftSpacing) { + // 前にスペースがある場合は後置演算子として処理しない + } else { + left = parsePostfix(s, left); + continue; + } } const infix = operators.find((x): x is InfixInfo => x.opKind === 'infix' && x.kind === tokenKind); From 6e517b34fad5911943bdd96881353693c4d0790c Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 12:36:03 +0900 Subject: [PATCH 104/126] modify test: disallow line break in pratt parser --- test/index.ts | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/test/index.ts b/test/index.ts index dcf2c754..7f46da85 100644 --- a/test/index.ts +++ b/test/index.ts @@ -328,11 +328,17 @@ describe('Infix expression', () => { eq(await exe('<: eval { 1 } + eval { 1 }'), NUM(2)); }); - test.concurrent('allow line break', async () => { - eq(await exe(` + test.concurrent('disallow line break', async () => { + try { + await exe(` <: 1 + 1 + 1 - `), NUM(3)); + `); + } catch (e) { + assert.ok(true); + return; + } + assert.fail(); }); test.concurrent('escaped line break', async () => { From 2437640bc51cd31c05e3fc84129eb24c229cd7f6 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 12:43:01 +0900 Subject: [PATCH 105/126] escape for string literal --- src/parser/scanner.ts | 44 ++++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 2d2d4a33..41b78b37 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -291,7 +291,6 @@ export class Scanner implements ITokenStream { break; } case '`': { - this.stream.next(); token = this.readTemplate(hasLeftSpacing); break; } @@ -438,20 +437,41 @@ export class Scanner implements ITokenStream { private readStringLiteral(hasLeftSpacing: boolean): Token { let value = ''; - const literalMark = this.stream.char; + let state: 'string' | 'escape' | 'finish' = 'string'; + this.stream.next(); - while (true) { - if (this.stream.eof) { - throw new AiScriptSyntaxError('unexpected EOF'); - } - if (this.stream.char === literalMark) { - this.stream.next(); - break; + while (state !== 'finish') { + switch (state) { + case 'string': { + if (this.stream.eof) { + throw new AiScriptSyntaxError('unexpected EOF'); + } + if (this.stream.char === '\\') { + this.stream.next(); + state = 'escape'; + break; + } + if (this.stream.char === literalMark) { + this.stream.next(); + state = 'finish'; + break; + } + value += this.stream.char; + this.stream.next(); + break; + } + case 'escape': { + if (this.stream.eof) { + throw new AiScriptSyntaxError('unexpected EOF'); + } + value += this.stream.char; + this.stream.next(); + state = 'string'; + break; + } } - value += this.stream.char; - this.stream.next(); } return TOKEN(TokenKind.StringLiteral, { hasLeftSpacing, value }); } @@ -462,6 +482,8 @@ export class Scanner implements ITokenStream { let tokenBuf: Token[] = []; let state: 'string' | 'escape' | 'expr' | 'finish' = 'string'; + this.stream.next(); + while (state !== 'finish') { switch (state) { case 'string': { From 9cd41a2ca56f07bcfb324e28748b8aaf361fe8e0 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 14:33:28 +0900 Subject: [PATCH 106/126] clean --- src/parser/scanner.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 41b78b37..ee243542 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -16,7 +16,6 @@ const wordChar = /^[A-Za-z0-9_]$/; export class Scanner implements ITokenStream { private stream: CharStream; private _tokens: Token[] = []; - private firstRead: boolean; constructor(source: string) constructor(stream: CharStream) @@ -30,7 +29,6 @@ export class Scanner implements ITokenStream { } public init(): void { - this.firstRead = true; this._tokens.push(this.readToken()); } From 1e693bee396fb1462b1c6426ed4f3af7e24077f7 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 15:05:31 +0900 Subject: [PATCH 107/126] remove stream init, node pos wip --- src/parser/index.ts | 1 - src/parser/scanner.ts | 15 --------------- src/parser/streams/char-stream.ts | 19 +++++++++---------- test/parser.ts | 19 +------------------ 4 files changed, 10 insertions(+), 44 deletions(-) diff --git a/src/parser/index.ts b/src/parser/index.ts index ff44b753..ef0ce7f0 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -51,7 +51,6 @@ export class Parser { let nodes: Cst.Node[]; const scanner = new Scanner(input); - scanner.init(); nodes = parseTopLevel(scanner); // validate the node tree diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index ee243542..e3084e87 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -22,13 +22,9 @@ export class Scanner implements ITokenStream { constructor(x: string | CharStream) { if (typeof x === 'string') { this.stream = new CharStream(x); - this.stream.init(); } else { this.stream = x; } - } - - public init(): void { this._tokens.push(this.readToken()); } @@ -37,9 +33,6 @@ export class Scanner implements ITokenStream { } public get token(): Token { - if (this._tokens.length === 0) { - throw new Error('scanner is not initialized yet'); - } return this._tokens[0]!; } @@ -48,10 +41,6 @@ export class Scanner implements ITokenStream { } public next(): void { - if (this._tokens.length === 0) { - throw new Error('scanner is not initialized yet'); - } - // 現在のトークンがEOFだったら次のトークンに進まない if (this._tokens[0]!.kind === TokenKind.EOF) { return; @@ -65,10 +54,6 @@ export class Scanner implements ITokenStream { } public lookahead(offset: number): Token { - if (this._tokens.length === 0) { - throw new Error('scanner is not initialized yet'); - } - while (this._tokens.length <= offset) { this._tokens.push(this.readToken()); } diff --git a/src/parser/streams/char-stream.ts b/src/parser/streams/char-stream.ts index d0b30849..8b8e4e79 100644 --- a/src/parser/streams/char-stream.ts +++ b/src/parser/streams/char-stream.ts @@ -7,18 +7,21 @@ export class CharStream { private lastPageIndex: number; private pageIndex: number; private address: number; - private _char?: string; + private _char: string; + /** zero-based number */ + private line: number; + /** zero-based number */ + private column: number; - constructor(source: string) { + constructor(source: string, opts?: { line?: number, column?: number }) { this.pages = new Map(); this.pages.set(0, source); this.firstPageIndex = 0; this.lastPageIndex = 0; this.pageIndex = 0; this.address = 0; - } - - public init(): void { + this.line = opts?.line ?? 0; + this.column = opts?.column ?? 0; this.loadChar(); } @@ -30,10 +33,6 @@ export class CharStream { if (this.eof) { throw new Error('end of stream'); } - if (this._char == null) { - // EOFではない時にnullだったらinitされていない - throw new Error('stream is not initialized yet'); - } return this._char; } @@ -72,7 +71,7 @@ export class CharStream { private loadChar(): void { if (!this.eof) { - this._char = this.pages.get(this.pageIndex)![this.address]; + this._char = this.pages.get(this.pageIndex)![this.address]!; } } } diff --git a/test/parser.ts b/test/parser.ts index 871e8b2f..4e4d3abb 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -7,14 +7,12 @@ describe('CharStream', () => { test.concurrent('char', async () => { const source = 'abc'; const stream = new CharStream(source); - stream.init(); assert.strictEqual('a', stream.char); }); test.concurrent('next', async () => { const source = 'abc'; const stream = new CharStream(source); - stream.init(); stream.next(); assert.strictEqual('b', stream.char); }); @@ -23,7 +21,6 @@ describe('CharStream', () => { test.concurrent('move', async () => { const source = 'abc'; const stream = new CharStream(source); - stream.init(); stream.next(); assert.strictEqual('b', stream.char); stream.prev(); @@ -33,7 +30,6 @@ describe('CharStream', () => { test.concurrent('境界外には移動しない', async () => { const source = 'abc'; const stream = new CharStream(source); - stream.init(); stream.prev(); assert.strictEqual('a', stream.char); }); @@ -42,7 +38,6 @@ describe('CharStream', () => { test.concurrent('eof', async () => { const source = 'abc'; const stream = new CharStream(source); - stream.init(); assert.strictEqual(false, stream.eof); stream.next(); assert.strictEqual(false, stream.eof); @@ -56,7 +51,6 @@ describe('CharStream', () => { describe('Scanner', () => { function init(source: string) { const stream = new Scanner(source); - stream.init(); return stream; } function next(stream: Scanner, kind: TokenKind, opts: { hasLeftSpacing?: boolean, value?: string }) { @@ -64,16 +58,6 @@ describe('Scanner', () => { stream.next(); } - test.concurrent('can get a token after init', async () => { - const source = ''; - const stream = new Scanner(source); - try { - stream.token; - assert.fail(); - } catch (e) { } - stream.init(); - stream.token; - }); test.concurrent('eof', async () => { const source = ''; const stream = init(source); @@ -94,9 +78,8 @@ describe('Scanner', () => { }); test.concurrent('invalid token', async () => { const source = '$'; - const stream = new Scanner(source); try { - stream.init(); + const stream = new Scanner(source); assert.fail(); } catch (e) { } }); From 755a333353c3b8ef0e965554e5d4a48cc9839bff Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 17:17:36 +0900 Subject: [PATCH 108/126] code location --- src/node.ts | 4 +- src/parser/node.ts | 17 ++-- src/parser/scanner.ts | 145 ++++++++++++++++------------- src/parser/streams/char-stream.ts | 27 +++++- src/parser/streams/token-stream.ts | 6 +- src/parser/syntaxes/expressions.ts | 95 ++++++++++++------- src/parser/syntaxes/statements.ts | 56 +++++++---- src/parser/syntaxes/toplevel.ts | 8 +- src/parser/token.ts | 7 +- test/index.ts | 4 +- test/parser.ts | 53 ++++++----- 11 files changed, 259 insertions(+), 163 deletions(-) diff --git a/src/node.ts b/src/node.ts index eb97e936..ad75e28f 100644 --- a/src/node.ts +++ b/src/node.ts @@ -5,8 +5,8 @@ */ export type Loc = { - start: number; - end: number; + line: number; + column: number; }; export type Node = Namespace | Meta | Statement | Expression | TypeSource | Attribute; diff --git a/src/parser/node.ts b/src/parser/node.ts index 941f0452..0919aab4 100644 --- a/src/parser/node.ts +++ b/src/parser/node.ts @@ -8,30 +8,29 @@ export type Node = Namespace | Meta | Statement | Expression | TypeSource | Attribute; -export function NODE(type: string, params: Record): Node { +export function NODE(type: string, params: Record, loc: { column: number, line: number }): Node { const node: Record = { type }; - //params.children; for (const key of Object.keys(params)) { if (params[key] !== undefined) { node[key] = params[key]; } } - //node.loc = { start, end }; + node.loc = loc; return node as Node; } -export function CALL_NODE(name: string, args: Node[]): Node { +export function CALL_NODE(name: string, args: Node[], loc: { column: number, line: number }): Node { return NODE('call', { - target: NODE('identifier', { name }), + target: NODE('identifier', { name }, loc), args, - }); + }, loc); } type NodeBase = { __AST_NODE: never; // phantom type - loc?: { - start: number; - end: number; + loc: { + line: number; + column: number; }; }; diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index e3084e87..e972c888 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -78,7 +78,7 @@ export class Scanner implements ITokenStream { while (true) { if (this.stream.eof) { - token = TOKEN(TokenKind.EOF, { hasLeftSpacing }); + token = TOKEN(TokenKind.EOF, this.stream.getPos(), { hasLeftSpacing }); break; } // skip spasing @@ -87,9 +87,13 @@ export class Scanner implements ITokenStream { hasLeftSpacing = true; continue; } + + // トークン位置を記憶 + const loc = this.stream.getPos(); + if (lineBreakChars.includes(this.stream.char)) { this.stream.next(); - token = TOKEN(TokenKind.NewLine, { hasLeftSpacing }); + token = TOKEN(TokenKind.NewLine, loc, { hasLeftSpacing }); return token; } switch (this.stream.char) { @@ -97,9 +101,9 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.NotEq, { hasLeftSpacing }); + token = TOKEN(TokenKind.NotEq, loc, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Not, { hasLeftSpacing }); + token = TOKEN(TokenKind.Not, loc, { hasLeftSpacing }); } break; } @@ -114,72 +118,72 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === '#') { this.stream.next(); - token = TOKEN(TokenKind.Sharp3, { hasLeftSpacing }); + token = TOKEN(TokenKind.Sharp3, loc, { hasLeftSpacing }); } } else if ((this.stream.char as string) === '[') { this.stream.next(); - token = TOKEN(TokenKind.OpenSharpBracket, { hasLeftSpacing }); + token = TOKEN(TokenKind.OpenSharpBracket, loc, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Sharp, { hasLeftSpacing }); + token = TOKEN(TokenKind.Sharp, loc, { hasLeftSpacing }); } break; } case '%': { this.stream.next(); - token = TOKEN(TokenKind.Percent, { hasLeftSpacing }); + token = TOKEN(TokenKind.Percent, loc, { hasLeftSpacing }); break; } case '&': { this.stream.next(); if ((this.stream.char as string) === '&') { this.stream.next(); - token = TOKEN(TokenKind.And2, { hasLeftSpacing }); + token = TOKEN(TokenKind.And2, loc, { hasLeftSpacing }); } break; } case '(': { this.stream.next(); - token = TOKEN(TokenKind.OpenParen, { hasLeftSpacing }); + token = TOKEN(TokenKind.OpenParen, loc, { hasLeftSpacing }); break; } case ')': { this.stream.next(); - token = TOKEN(TokenKind.CloseParen, { hasLeftSpacing }); + token = TOKEN(TokenKind.CloseParen, loc, { hasLeftSpacing }); break; } case '*': { this.stream.next(); - token = TOKEN(TokenKind.Asterisk, { hasLeftSpacing }); + token = TOKEN(TokenKind.Asterisk, loc, { hasLeftSpacing }); break; } case '+': { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.PlusEq, { hasLeftSpacing }); + token = TOKEN(TokenKind.PlusEq, loc, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Plus, { hasLeftSpacing }); + token = TOKEN(TokenKind.Plus, loc, { hasLeftSpacing }); } break; } case ',': { this.stream.next(); - token = TOKEN(TokenKind.Comma, { hasLeftSpacing }); + token = TOKEN(TokenKind.Comma, loc, { hasLeftSpacing }); break; } case '-': { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.MinusEq, { hasLeftSpacing }); + token = TOKEN(TokenKind.MinusEq, loc, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Minus, { hasLeftSpacing }); + token = TOKEN(TokenKind.Minus, loc, { hasLeftSpacing }); } break; } case '.': { this.stream.next(); - token = TOKEN(TokenKind.Dot, { hasLeftSpacing }); + token = TOKEN(TokenKind.Dot, loc, { hasLeftSpacing }); break; } case '/': { @@ -193,7 +197,7 @@ export class Scanner implements ITokenStream { this.skipCommentLine(); continue; } else { - token = TOKEN(TokenKind.Slash, { hasLeftSpacing }); + token = TOKEN(TokenKind.Slash, loc, { hasLeftSpacing }); } break; } @@ -201,27 +205,27 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === ':') { this.stream.next(); - token = TOKEN(TokenKind.Colon2, { hasLeftSpacing }); + token = TOKEN(TokenKind.Colon2, loc, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Colon, { hasLeftSpacing }); + token = TOKEN(TokenKind.Colon, loc, { hasLeftSpacing }); } break; } case ';': { this.stream.next(); - token = TOKEN(TokenKind.SemiColon, { hasLeftSpacing }); + token = TOKEN(TokenKind.SemiColon, loc, { hasLeftSpacing }); break; } case '<': { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.LtEq, { hasLeftSpacing }); + token = TOKEN(TokenKind.LtEq, loc, { hasLeftSpacing }); } else if ((this.stream.char as string) === ':') { this.stream.next(); - token = TOKEN(TokenKind.Out, { hasLeftSpacing }); + token = TOKEN(TokenKind.Out, loc, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Lt, { hasLeftSpacing }); + token = TOKEN(TokenKind.Lt, loc, { hasLeftSpacing }); } break; } @@ -229,12 +233,12 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.Eq2, { hasLeftSpacing }); + token = TOKEN(TokenKind.Eq2, loc, { hasLeftSpacing }); } else if ((this.stream.char as string) === '>') { this.stream.next(); - token = TOKEN(TokenKind.Arrow, { hasLeftSpacing }); + token = TOKEN(TokenKind.Arrow, loc, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Eq, { hasLeftSpacing }); + token = TOKEN(TokenKind.Eq, loc, { hasLeftSpacing }); } break; } @@ -242,35 +246,35 @@ export class Scanner implements ITokenStream { this.stream.next(); if ((this.stream.char as string) === '=') { this.stream.next(); - token = TOKEN(TokenKind.GtEq, { hasLeftSpacing }); + token = TOKEN(TokenKind.GtEq, loc, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Gt, { hasLeftSpacing }); + token = TOKEN(TokenKind.Gt, loc, { hasLeftSpacing }); } break; } case '@': { this.stream.next(); - token = TOKEN(TokenKind.At, { hasLeftSpacing }); + token = TOKEN(TokenKind.At, loc, { hasLeftSpacing }); break; } case '[': { this.stream.next(); - token = TOKEN(TokenKind.OpenBracket, { hasLeftSpacing }); + token = TOKEN(TokenKind.OpenBracket, loc, { hasLeftSpacing }); break; } case '\\': { this.stream.next(); - token = TOKEN(TokenKind.BackSlash, { hasLeftSpacing }); + token = TOKEN(TokenKind.BackSlash, loc, { hasLeftSpacing }); break; } case ']': { this.stream.next(); - token = TOKEN(TokenKind.CloseBracket, { hasLeftSpacing }); + token = TOKEN(TokenKind.CloseBracket, loc, { hasLeftSpacing }); break; } case '^': { this.stream.next(); - token = TOKEN(TokenKind.Hat, { hasLeftSpacing }); + token = TOKEN(TokenKind.Hat, loc, { hasLeftSpacing }); break; } case '`': { @@ -279,20 +283,20 @@ export class Scanner implements ITokenStream { } case '{': { this.stream.next(); - token = TOKEN(TokenKind.OpenBrace, { hasLeftSpacing }); + token = TOKEN(TokenKind.OpenBrace, loc, { hasLeftSpacing }); break; } case '|': { this.stream.next(); if ((this.stream.char as string) === '|') { this.stream.next(); - token = TOKEN(TokenKind.Or2, { hasLeftSpacing }); + token = TOKEN(TokenKind.Or2, loc, { hasLeftSpacing }); } break; } case '}': { this.stream.next(); - token = TOKEN(TokenKind.CloseBrace, { hasLeftSpacing }); + token = TOKEN(TokenKind.CloseBrace, loc, { hasLeftSpacing }); break; } } @@ -317,6 +321,9 @@ export class Scanner implements ITokenStream { private tryReadWord(hasLeftSpacing: boolean): Token | undefined { // read a word let value = ''; + + const loc = this.stream.getPos(); + while (!this.stream.eof && wordChar.test(this.stream.char)) { value += this.stream.char; this.stream.next(); @@ -327,64 +334,64 @@ export class Scanner implements ITokenStream { // check word kind switch (value) { case 'null': { - return TOKEN(TokenKind.NullKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.NullKeyword, loc, { hasLeftSpacing }); } case 'true': { - return TOKEN(TokenKind.TrueKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.TrueKeyword, loc, { hasLeftSpacing }); } case 'false': { - return TOKEN(TokenKind.FalseKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.FalseKeyword, loc, { hasLeftSpacing }); } case 'each': { - return TOKEN(TokenKind.EachKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.EachKeyword, loc, { hasLeftSpacing }); } case 'for': { - return TOKEN(TokenKind.ForKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.ForKeyword, loc, { hasLeftSpacing }); } case 'loop': { - return TOKEN(TokenKind.LoopKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.LoopKeyword, loc, { hasLeftSpacing }); } case 'break': { - return TOKEN(TokenKind.BreakKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.BreakKeyword, loc, { hasLeftSpacing }); } case 'continue': { - return TOKEN(TokenKind.ContinueKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.ContinueKeyword, loc, { hasLeftSpacing }); } case 'match': { - return TOKEN(TokenKind.MatchKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.MatchKeyword, loc, { hasLeftSpacing }); } case 'case': { - return TOKEN(TokenKind.CaseKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.CaseKeyword, loc, { hasLeftSpacing }); } case 'default': { - return TOKEN(TokenKind.DefaultKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.DefaultKeyword, loc, { hasLeftSpacing }); } case 'if': { - return TOKEN(TokenKind.IfKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.IfKeyword, loc, { hasLeftSpacing }); } case 'elif': { - return TOKEN(TokenKind.ElifKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.ElifKeyword, loc, { hasLeftSpacing }); } case 'else': { - return TOKEN(TokenKind.ElseKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.ElseKeyword, loc, { hasLeftSpacing }); } case 'return': { - return TOKEN(TokenKind.ReturnKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.ReturnKeyword, loc, { hasLeftSpacing }); } case 'eval': { - return TOKEN(TokenKind.EvalKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.EvalKeyword, loc, { hasLeftSpacing }); } case 'var': { - return TOKEN(TokenKind.VarKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.VarKeyword, loc, { hasLeftSpacing }); } case 'let': { - return TOKEN(TokenKind.LetKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.LetKeyword, loc, { hasLeftSpacing }); } case 'exists': { - return TOKEN(TokenKind.ExistsKeyword, { hasLeftSpacing }); + return TOKEN(TokenKind.ExistsKeyword, loc, { hasLeftSpacing }); } default: { - return TOKEN(TokenKind.Identifier, { hasLeftSpacing, value }); + return TOKEN(TokenKind.Identifier, loc, { hasLeftSpacing, value }); } } } @@ -392,6 +399,9 @@ export class Scanner implements ITokenStream { private tryReadDigits(hasLeftSpacing: boolean): Token | undefined { let wholeNumber = ''; let fractional = ''; + + const loc = this.stream.getPos(); + while (!this.stream.eof && digit.test(this.stream.char)) { wholeNumber += this.stream.char; this.stream.next(); @@ -415,7 +425,7 @@ export class Scanner implements ITokenStream { } else { value = wholeNumber; } - return TOKEN(TokenKind.NumberLiteral, { hasLeftSpacing, value }); + return TOKEN(TokenKind.NumberLiteral, loc, { hasLeftSpacing, value }); } private readStringLiteral(hasLeftSpacing: boolean): Token { @@ -423,6 +433,7 @@ export class Scanner implements ITokenStream { const literalMark = this.stream.char; let state: 'string' | 'escape' | 'finish' = 'string'; + const loc = this.stream.getPos(); this.stream.next(); while (state !== 'finish') { @@ -456,7 +467,7 @@ export class Scanner implements ITokenStream { } } } - return TOKEN(TokenKind.StringLiteral, { hasLeftSpacing, value }); + return TOKEN(TokenKind.StringLiteral, loc, { hasLeftSpacing, value }); } private readTemplate(hasLeftSpacing: boolean): Token { @@ -465,6 +476,8 @@ export class Scanner implements ITokenStream { let tokenBuf: Token[] = []; let state: 'string' | 'escape' | 'expr' | 'finish' = 'string'; + const loc = this.stream.getPos(); + let elementLoc = loc; this.stream.next(); while (state !== 'finish') { @@ -484,7 +497,7 @@ export class Scanner implements ITokenStream { if (this.stream.char === '`') { this.stream.next(); if (buf.length > 0) { - elements.push(TOKEN(TokenKind.TemplateStringElement, { hasLeftSpacing, value: buf })); + elements.push(TOKEN(TokenKind.TemplateStringElement, elementLoc, { hasLeftSpacing, value: buf })); } state = 'finish'; break; @@ -493,9 +506,11 @@ export class Scanner implements ITokenStream { if (this.stream.char === '{') { this.stream.next(); if (buf.length > 0) { - elements.push(TOKEN(TokenKind.TemplateStringElement, { hasLeftSpacing, value: buf })); + elements.push(TOKEN(TokenKind.TemplateStringElement, elementLoc, { hasLeftSpacing, value: buf })); buf = ''; } + // ここから式エレメントになるので位置を更新 + elementLoc = this.stream.getPos(); state = 'expr'; break; } @@ -528,8 +543,10 @@ export class Scanner implements ITokenStream { // 埋め込み式の終了 if ((this.stream.char as string) === '}') { this.stream.next(); - elements.push(TOKEN(TokenKind.TemplateExprElement, { hasLeftSpacing, children: tokenBuf })); + elements.push(TOKEN(TokenKind.TemplateExprElement, elementLoc, { hasLeftSpacing, children: tokenBuf })); tokenBuf = []; + // ここから文字列エレメントになるので位置を更新 + elementLoc = this.stream.getPos(); state = 'string'; break; } @@ -540,7 +557,7 @@ export class Scanner implements ITokenStream { } } - return TOKEN(TokenKind.Template, { hasLeftSpacing, children: elements }); + return TOKEN(TokenKind.Template, loc, { hasLeftSpacing, children: elements }); } private skipCommentLine(): void { diff --git a/src/parser/streams/char-stream.ts b/src/parser/streams/char-stream.ts index 8b8e4e79..9a418d17 100644 --- a/src/parser/streams/char-stream.ts +++ b/src/parser/streams/char-stream.ts @@ -7,7 +7,7 @@ export class CharStream { private lastPageIndex: number; private pageIndex: number; private address: number; - private _char: string; + private _char?: string; /** zero-based number */ private line: number; /** zero-based number */ @@ -33,7 +33,14 @@ export class CharStream { if (this.eof) { throw new Error('end of stream'); } - return this._char; + return this._char!; + } + + public getPos(): { line: number, column: number } { + return { + line: (this.line + 1), + column: (this.column + 1), + }; } public next(): void { @@ -44,6 +51,18 @@ export class CharStream { this.address = 0; } this.loadChar(); + + // column, line + if (!this.eof) { + if (this._char === '\n') { + this.line++; + this.column = 0; + } else if (this._char !== '\r') { + this.column++; + } + } else { + this.column++; + } } public prev(): void { @@ -70,7 +89,9 @@ export class CharStream { } private loadChar(): void { - if (!this.eof) { + if (this.eof) { + this._char = undefined; + } else { this._char = this.pages.get(this.pageIndex)![this.address]!; } } diff --git a/src/parser/streams/token-stream.ts b/src/parser/streams/token-stream.ts index f38b3dd3..7159d09c 100644 --- a/src/parser/streams/token-stream.ts +++ b/src/parser/streams/token-stream.ts @@ -42,7 +42,7 @@ export class TokenStream implements ITokenStream { throw new Error('stream is not initialized yet'); } if (this.eof) { - return TOKEN(TokenKind.EOF); + return TOKEN(TokenKind.EOF, { line: -1, column: -1 }); } return this._token; } @@ -62,7 +62,7 @@ export class TokenStream implements ITokenStream { if (this.index + offset < this.source.length) { return this.source[this.index + offset]!; } else { - return TOKEN(TokenKind.EOF); + return TOKEN(TokenKind.EOF, { line: -1, column: -1 }); } } @@ -79,7 +79,7 @@ export class TokenStream implements ITokenStream { private load(): void { if (this.eof) { - this._token = TOKEN(TokenKind.EOF); + this._token = TOKEN(TokenKind.EOF, { line: -1, column: -1 }); } else { this._token = this.source[this.index]; } diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 15ae515a..12bafae8 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -48,6 +48,7 @@ const operators: OpInfo[] = [ ]; function parsePrefix(s: ITokenStream, minBp: number): Cst.Node { + const loc = s.token.loc; const op = s.kind; s.next(); @@ -63,25 +64,25 @@ function parsePrefix(s: ITokenStream, minBp: number): Cst.Node { case TokenKind.Plus: { // 数値リテラル以外は非サポート if (expr.type === 'num') { - return expr; + return NODE('num', { value: expr.value }, loc); } else { throw new AiScriptSyntaxError('currently, sign is only supported for number literal.'); } // TODO: 将来的にサポートされる式を拡張 - // return NODE('plus', { expr }); + // return NODE('plus', { expr }, loc); } case TokenKind.Minus: { // 数値リテラル以外は非サポート if (expr.type === 'num') { - return NODE('num', { value: -1 * expr.value }); + return NODE('num', { value: -1 * expr.value }, loc); } else { throw new AiScriptSyntaxError('currently, sign is only supported for number literal.'); } // TODO: 将来的にサポートされる式を拡張 - // return NODE('minus', { expr }); + // return NODE('minus', { expr }, loc); } case TokenKind.Not: { - return NODE('not', { expr }); + return NODE('not', { expr }, loc); } default: { throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[op]}`); @@ -90,6 +91,7 @@ function parsePrefix(s: ITokenStream, minBp: number): Cst.Node { } function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { + const loc = s.token.loc; const op = s.kind; s.next(); @@ -107,52 +109,52 @@ function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { return NODE('prop', { target: left, name, - }); + }, loc); } else { const right = parsePratt(s, minBp); switch (op) { case TokenKind.Hat: { - return CALL_NODE('Core:pow', [left, right]); + return CALL_NODE('Core:pow', [left, right], loc); } case TokenKind.Asterisk: { - return CALL_NODE('Core:mul', [left, right]); + return CALL_NODE('Core:mul', [left, right], loc); } case TokenKind.Slash: { - return CALL_NODE('Core:div', [left, right]); + return CALL_NODE('Core:div', [left, right], loc); } case TokenKind.Percent: { - return CALL_NODE('Core:mod', [left, right]); + return CALL_NODE('Core:mod', [left, right], loc); } case TokenKind.Plus: { - return CALL_NODE('Core:add', [left, right]); + return CALL_NODE('Core:add', [left, right], loc); } case TokenKind.Minus: { - return CALL_NODE('Core:sub', [left, right]); + return CALL_NODE('Core:sub', [left, right], loc); } case TokenKind.Lt: { - return CALL_NODE('Core:lt', [left, right]); + return CALL_NODE('Core:lt', [left, right], loc); } case TokenKind.LtEq: { - return CALL_NODE('Core:lteq', [left, right]); + return CALL_NODE('Core:lteq', [left, right], loc); } case TokenKind.Gt: { - return CALL_NODE('Core:gt', [left, right]); + return CALL_NODE('Core:gt', [left, right], loc); } case TokenKind.GtEq: { - return CALL_NODE('Core:gteq', [left, right]); + return CALL_NODE('Core:gteq', [left, right], loc); } case TokenKind.Eq2: { - return CALL_NODE('Core:eq', [left, right]); + return CALL_NODE('Core:eq', [left, right], loc); } case TokenKind.NotEq: { - return CALL_NODE('Core:neq', [left, right]); + return CALL_NODE('Core:neq', [left, right], loc); } case TokenKind.And2: { - return NODE('and', { left, right }); + return NODE('and', { left, right }, loc); } case TokenKind.Or2: { - return NODE('or', { left, right }); + return NODE('or', { left, right }, loc); } default: { throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[op]}`); @@ -162,7 +164,9 @@ function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { } function parsePostfix(s: ITokenStream, expr: Cst.Node): Cst.Node { + const loc = s.token.loc; const op = s.kind; + switch (op) { case TokenKind.OpenParen: { return parseCall(s, expr); @@ -175,7 +179,7 @@ function parsePostfix(s: ITokenStream, expr: Cst.Node): Cst.Node { return NODE('index', { target: expr, index, - }); + }, loc); } default: { throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[op]}`); @@ -184,6 +188,8 @@ function parsePostfix(s: ITokenStream, expr: Cst.Node): Cst.Node { } function parseAtom(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + switch (s.kind) { case TokenKind.IfKeyword: { return parseIf(s); @@ -206,7 +212,7 @@ function parseAtom(s: ITokenStream): Cst.Node { for (const element of s.token.children!) { switch (element.kind) { case TokenKind.TemplateStringElement: { - values.push(NODE('str', { value: element.value! })); + values.push(NODE('str', { value: element.value! }, element.loc)); break; } case TokenKind.TemplateExprElement: { @@ -227,28 +233,28 @@ function parseAtom(s: ITokenStream): Cst.Node { } s.next(); - return NODE('tmpl', { tmpl: values }); + return NODE('tmpl', { tmpl: values }, loc); } case TokenKind.StringLiteral: { const value = s.token.value!; s.next(); - return NODE('str', { value }); + return NODE('str', { value }, loc); } case TokenKind.NumberLiteral: { // TODO: validate number value const value = Number(s.token.value!); s.next(); - return NODE('num', { value }); + return NODE('num', { value }, loc); } case TokenKind.TrueKeyword: case TokenKind.FalseKeyword: { const value = (s.kind === TokenKind.TrueKeyword); s.next(); - return NODE('bool', { value }); + return NODE('bool', { value }, loc); } case TokenKind.NullKeyword: { s.next(); - return NODE('null', { }); + return NODE('null', { }, loc); } case TokenKind.OpenBrace: { return parseObject(s); @@ -275,6 +281,7 @@ function parseAtom(s: ITokenStream): Cst.Node { * Call = "(" [Expr *(("," / SPACE) Expr)] ")" */ function parseCall(s: ITokenStream, target: Cst.Node): Cst.Node { + const loc = s.token.loc; const items: Cst.Node[] = []; s.nextWith(TokenKind.OpenParen); @@ -297,7 +304,7 @@ function parseCall(s: ITokenStream, target: Cst.Node): Cst.Node { return NODE('call', { target, args: items, - }); + }, loc); } /** @@ -306,6 +313,8 @@ function parseCall(s: ITokenStream, target: Cst.Node): Cst.Node { * ``` */ function parseIf(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + s.nextWith(TokenKind.IfKeyword); const cond = parseExpr(s); const then = parseBlockOrStatement(s); @@ -324,7 +333,7 @@ function parseIf(s: ITokenStream): Cst.Node { _else = parseBlockOrStatement(s); } - return NODE('if', { cond, then, elseif, else: _else }); + return NODE('if', { cond, then, elseif, else: _else }, loc); } /** @@ -333,6 +342,8 @@ function parseIf(s: ITokenStream): Cst.Node { * ``` */ function parseFnExpr(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + s.nextWith(TokenKind.At); const params = parseParams(s); @@ -341,7 +352,7 @@ function parseFnExpr(s: ITokenStream): Cst.Node { const body = parseBlock(s); - return NODE('fn', { args: params, retType: undefined, children: body }); + return NODE('fn', { args: params, retType: undefined, children: body }, loc); } /** @@ -350,6 +361,8 @@ function parseFnExpr(s: ITokenStream): Cst.Node { * ``` */ function parseMatch(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + s.nextWith(TokenKind.MatchKeyword); const about = parseExpr(s); @@ -376,7 +389,7 @@ function parseMatch(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.CloseBrace); - return NODE('match', { about, qs, default: x }); + return NODE('match', { about, qs, default: x }, loc); } /** @@ -385,9 +398,11 @@ function parseMatch(s: ITokenStream): Cst.Node { * ``` */ function parseEval(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + s.nextWith(TokenKind.EvalKeyword); const statements = parseBlock(s); - return NODE('block', { statements }); + return NODE('block', { statements }, loc); } /** @@ -396,9 +411,11 @@ function parseEval(s: ITokenStream): Cst.Node { * ``` */ function parseExists(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + s.nextWith(TokenKind.ExistsKeyword); const identifier = parseReference(s); - return NODE('exists', { identifier }); + return NODE('exists', { identifier }, loc); } /** @@ -407,6 +424,8 @@ function parseExists(s: ITokenStream): Cst.Node { * ``` */ function parseReference(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + const segs: string[] = []; while (true) { if (segs.length > 0) { @@ -420,7 +439,7 @@ function parseReference(s: ITokenStream): Cst.Node { segs.push(s.token.value!); s.next(); } - return NODE('identifier', { name: segs.join(':') }); + return NODE('identifier', { name: segs.join(':') }, loc); } /** @@ -429,6 +448,8 @@ function parseReference(s: ITokenStream): Cst.Node { * ``` */ function parseObject(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + s.nextWith(TokenKind.OpenBrace); if (s.kind === TokenKind.NewLine) { @@ -469,7 +490,7 @@ function parseObject(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.CloseBrace); - return NODE('obj', { value: map }); + return NODE('obj', { value: map }, loc); } /** @@ -478,6 +499,8 @@ function parseObject(s: ITokenStream): Cst.Node { * ``` */ function parseArray(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + s.nextWith(TokenKind.OpenBracket); if (s.kind === TokenKind.NewLine) { @@ -508,7 +531,7 @@ function parseArray(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.CloseBracket); - return NODE('arr', { value }); + return NODE('arr', { value }, loc); } //#region Pratt parsing diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index 41e87a32..c58a1bda 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -14,6 +14,8 @@ import type { ITokenStream } from '../streams/token-stream.js'; * ``` */ export function parseStatement(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + switch (s.kind) { case TokenKind.VarKeyword: case TokenKind.LetKeyword: { @@ -45,11 +47,11 @@ export function parseStatement(s: ITokenStream): Cst.Node { } case TokenKind.BreakKeyword: { s.next(); - return NODE('break', {}); + return NODE('break', {}, loc); } case TokenKind.ContinueKeyword: { s.next(); - return NODE('continue', {}); + return NODE('continue', {}, loc); } } const expr = parseExpr(s); @@ -81,9 +83,11 @@ export function parseDefStatement(s: ITokenStream): Cst.Node { * ``` */ export function parseBlockOrStatement(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + if (s.kind === TokenKind.OpenBrace) { const statements = parseBlock(s); - return NODE('block', { statements }); + return NODE('block', { statements }, loc); } else { return parseStatement(s); } @@ -95,6 +99,8 @@ export function parseBlockOrStatement(s: ITokenStream): Cst.Node { * ``` */ function parseVarDef(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + let mut; switch (s.kind) { case TokenKind.LetKeyword: { @@ -125,7 +131,7 @@ function parseVarDef(s: ITokenStream): Cst.Node { const expr = parseExpr(s); - return NODE('def', { name, varType: ty, expr, mut, attr: [] }); + return NODE('def', { name, varType: ty, expr, mut, attr: [] }, loc); } /** @@ -134,6 +140,8 @@ function parseVarDef(s: ITokenStream): Cst.Node { * ``` */ function parseFnDef(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + s.nextWith(TokenKind.At); s.expect(TokenKind.Identifier); @@ -152,10 +160,10 @@ function parseFnDef(s: ITokenStream): Cst.Node { args: params, retType: undefined, // TODO: type children: body, - }), + }, loc), mut: false, attr: [], - }); + }, loc); } /** @@ -164,9 +172,11 @@ function parseFnDef(s: ITokenStream): Cst.Node { * ``` */ function parseOut(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + s.nextWith(TokenKind.Out); const expr = parseExpr(s); - return CALL_NODE('print', [expr]); + return CALL_NODE('print', [expr], loc); } /** @@ -176,6 +186,7 @@ function parseOut(s: ITokenStream): Cst.Node { * ``` */ function parseEach(s: ITokenStream): Cst.Node { + const loc = s.token.loc; let hasParen = false; s.nextWith(TokenKind.EachKeyword); @@ -209,10 +220,11 @@ function parseEach(s: ITokenStream): Cst.Node { var: name, items: items, for: body, - }); + }, loc); } function parseFor(s: ITokenStream): Cst.Node { + const loc = s.token.loc; let hasParen = false; s.nextWith(TokenKind.ForKeyword); @@ -226,6 +238,8 @@ function parseFor(s: ITokenStream): Cst.Node { // range syntax s.next(); + const identLoc = s.token.loc; + s.expect(TokenKind.Identifier); const name = s.token.value!; s.next(); @@ -235,7 +249,7 @@ function parseFor(s: ITokenStream): Cst.Node { s.next(); _from = parseExpr(s); } else { - _from = NODE('num', { value: 0 }); + _from = NODE('num', { value: 0 }, identLoc); } if ((s.kind as TokenKind) === TokenKind.Comma) { @@ -257,7 +271,7 @@ function parseFor(s: ITokenStream): Cst.Node { from: _from, to, for: body, - }); + }, loc); } else { // times syntax @@ -272,7 +286,7 @@ function parseFor(s: ITokenStream): Cst.Node { return NODE('for', { times, for: body, - }); + }, loc); } } @@ -282,9 +296,11 @@ function parseFor(s: ITokenStream): Cst.Node { * ``` */ function parseReturn(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + s.nextWith(TokenKind.ReturnKeyword); const expr = parseExpr(s); - return NODE('return', { expr }); + return NODE('return', { expr }, loc); } /** @@ -318,6 +334,8 @@ function parseStatementWithAttr(s: ITokenStream): Cst.Node { * ``` */ function parseAttr(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + s.nextWith(TokenKind.OpenSharpBracket); s.expect(TokenKind.Identifier); @@ -328,7 +346,7 @@ function parseAttr(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.CloseBracket); - return NODE('attr', { name, value: undefined }); + return NODE('attr', { name, value: undefined }, loc); } /** @@ -337,9 +355,11 @@ function parseAttr(s: ITokenStream): Cst.Node { * ``` */ function parseLoop(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + s.nextWith(TokenKind.LoopKeyword); const statements = parseBlock(s); - return NODE('loop', { statements }); + return NODE('loop', { statements }, loc); } /** @@ -348,22 +368,24 @@ function parseLoop(s: ITokenStream): Cst.Node { * ``` */ function tryParseAssign(s: ITokenStream, dest: Cst.Node): Cst.Node | undefined { + const loc = s.token.loc; + // Assign switch (s.kind) { case TokenKind.Eq: { s.next(); const expr = parseExpr(s); - return NODE('assign', { dest, expr }); + return NODE('assign', { dest, expr }, loc); } case TokenKind.PlusEq: { s.next(); const expr = parseExpr(s); - return NODE('addAssign', { dest, expr }); + return NODE('addAssign', { dest, expr }, loc); } case TokenKind.MinusEq: { s.next(); const expr = parseExpr(s); - return NODE('subAssign', { dest, expr }); + return NODE('subAssign', { dest, expr }, loc); } default: { return; diff --git a/src/parser/syntaxes/toplevel.ts b/src/parser/syntaxes/toplevel.ts index e43ce6ec..e1ae076f 100644 --- a/src/parser/syntaxes/toplevel.ts +++ b/src/parser/syntaxes/toplevel.ts @@ -52,6 +52,8 @@ export function parseTopLevel(s: ITokenStream): Cst.Node[] { * ``` */ export function parseNamespace(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + s.nextWith(TokenKind.Colon2); s.expect(TokenKind.Identifier); @@ -88,7 +90,7 @@ export function parseNamespace(s: ITokenStream): Cst.Node { } s.nextWith(TokenKind.CloseBrace); - return NODE('ns', { name, members }); + return NODE('ns', { name, members }, loc); } /** @@ -97,6 +99,8 @@ export function parseNamespace(s: ITokenStream): Cst.Node { * ``` */ export function parseMeta(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + s.nextWith(TokenKind.Sharp3); let name; @@ -107,5 +111,5 @@ export function parseMeta(s: ITokenStream): Cst.Node { const value = parseStaticLiteral(s); - return NODE('meta', { name, value }); + return NODE('meta', { name, value }, loc); } diff --git a/src/parser/token.ts b/src/parser/token.ts index 7c771de2..02c0eedc 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -107,9 +107,12 @@ export enum TokenKind { CloseBrace, } +export type TokenLocation = { column: number, line: number }; + export class Token { constructor( public kind: TokenKind, + public loc: { column: number, line: number }, public hasLeftSpacing = false, /** for number literal, string literal */ public value?: string, @@ -122,6 +125,6 @@ export class Token { * - opts.value: for number literal, string literal * - opts.children: for template syntax */ -export function TOKEN(kind: TokenKind, opts?: { hasLeftSpacing?: boolean, value?: Token['value'], children?: Token['children'] }): Token { - return new Token(kind, opts?.hasLeftSpacing, opts?.value, opts?.children); +export function TOKEN(kind: TokenKind, loc: TokenLocation, opts?: { hasLeftSpacing?: boolean, value?: Token['value'], children?: Token['children'] }): Token { + return new Token(kind, loc, opts?.hasLeftSpacing, opts?.value, opts?.children); } diff --git a/test/index.ts b/test/index.ts index 7f46da85..4b1b1d95 100644 --- a/test/index.ts +++ b/test/index.ts @@ -2270,12 +2270,12 @@ describe('Location', () => { let node: Ast.Node; const parser = new Parser(); const nodes = parser.parse(` - @f(a) { a } + @f(a) { a } `); assert.equal(nodes.length, 1); node = nodes[0]; if (!node.loc) assert.fail(); - assert.deepEqual(node.loc, { start: 3, end: 13 }); + assert.deepEqual(node.loc, { line: 2, column: 4 }); }); }); diff --git a/test/parser.ts b/test/parser.ts index 4e4d3abb..1e009b07 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -1,6 +1,6 @@ import * as assert from 'assert'; import { Scanner } from '../src/parser/scanner'; -import { TOKEN, TokenKind } from '../src/parser/token'; +import { TOKEN, TokenKind, TokenLocation } from '../src/parser/token'; import { CharStream } from '../src/parser/streams/char-stream'; describe('CharStream', () => { @@ -53,28 +53,28 @@ describe('Scanner', () => { const stream = new Scanner(source); return stream; } - function next(stream: Scanner, kind: TokenKind, opts: { hasLeftSpacing?: boolean, value?: string }) { - assert.deepStrictEqual(stream.token, TOKEN(kind, opts)); + function next(stream: Scanner, kind: TokenKind, loc: TokenLocation, opts: { hasLeftSpacing?: boolean, value?: string }) { + assert.deepStrictEqual(stream.token, TOKEN(kind, loc, opts)); stream.next(); } test.concurrent('eof', async () => { const source = ''; const stream = init(source); - next(stream, TokenKind.EOF, { }); - next(stream, TokenKind.EOF, { }); + next(stream, TokenKind.EOF, { line: 1, column: 1 }, { }); + next(stream, TokenKind.EOF, { line: 1, column: 1 }, { }); }); test.concurrent('keyword', async () => { const source = 'if'; const stream = init(source); - next(stream, TokenKind.IfKeyword, { }); - next(stream, TokenKind.EOF, { }); + next(stream, TokenKind.IfKeyword, { line: 1, column: 1 }, { }); + next(stream, TokenKind.EOF, { line: 1, column: 3 }, { }); }); test.concurrent('identifier', async () => { const source = 'xyz'; const stream = init(source); - next(stream, TokenKind.Identifier, { value: 'xyz' }); - next(stream, TokenKind.EOF, { }); + next(stream, TokenKind.Identifier, { line: 1, column: 1 }, { value: 'xyz' }); + next(stream, TokenKind.EOF, { line: 1, column: 4 }, { }); }); test.concurrent('invalid token', async () => { const source = '$'; @@ -86,27 +86,34 @@ describe('Scanner', () => { test.concurrent('words', async () => { const source = 'abc xyz'; const stream = init(source); - next(stream, TokenKind.Identifier, { value: 'abc' }); - next(stream, TokenKind.Identifier, { hasLeftSpacing: true, value: 'xyz' }); - next(stream, TokenKind.EOF, { }); + next(stream, TokenKind.Identifier, { line: 1, column: 1 }, { value: 'abc' }); + next(stream, TokenKind.Identifier, { line: 1, column: 5 }, { hasLeftSpacing: true, value: 'xyz' }); + next(stream, TokenKind.EOF, { line: 1, column: 8 }, { }); }); test.concurrent('stream', async () => { const source = '@abc() { }'; const stream = init(source); - next(stream, TokenKind.At, { }); - next(stream, TokenKind.Identifier, { value: 'abc' }); - next(stream, TokenKind.OpenParen, { }); - next(stream, TokenKind.CloseParen, { }); - next(stream, TokenKind.OpenBrace, { hasLeftSpacing: true }); - next(stream, TokenKind.CloseBrace, { hasLeftSpacing: true }); - next(stream, TokenKind.EOF, { }); + next(stream, TokenKind.At, { line: 1, column: 1 }, { }); + next(stream, TokenKind.Identifier, { line: 1, column: 2 }, { value: 'abc' }); + next(stream, TokenKind.OpenParen, { line: 1, column: 5 }, { }); + next(stream, TokenKind.CloseParen, { line: 1, column: 6 }, { }); + next(stream, TokenKind.OpenBrace, { line: 1, column: 8 }, { hasLeftSpacing: true }); + next(stream, TokenKind.CloseBrace, { line: 1, column: 10 }, { hasLeftSpacing: true }); + next(stream, TokenKind.EOF, { line: 1, column: 11 }, { }); + }); + test.concurrent('multi-lines', async () => { + const source = 'aaa\nbbb'; + const stream = init(source); + next(stream, TokenKind.Identifier, { line: 1, column: 1 }, { value: 'aaa' }); + next(stream, TokenKind.Identifier, { line: 2, column: 1 }, { value: 'bbb' }); + next(stream, TokenKind.EOF, { line: 2, column: 4 }, { }); }); test.concurrent('lookahead', async () => { const source = '@abc() { }'; const stream = init(source); - assert.deepStrictEqual(stream.lookahead(1), TOKEN(TokenKind.Identifier, { value: 'abc' })); - next(stream, TokenKind.At, { }); - next(stream, TokenKind.Identifier, { value: 'abc' }); - next(stream, TokenKind.OpenParen, { }); + assert.deepStrictEqual(stream.lookahead(1), TOKEN(TokenKind.Identifier, { line: 1, column: 2 }, { value: 'abc' })); + next(stream, TokenKind.At, { line: 1, column: 1 }, { }); + next(stream, TokenKind.Identifier, { line: 1, column: 2 }, { value: 'abc' }); + next(stream, TokenKind.OpenParen, { line: 1, column: 5 }, { }); }); }); From 0bd0c003d0e598adba73d4efac8b2b9ba4731547 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 18:30:30 +0900 Subject: [PATCH 109/126] fix newline --- src/parser/streams/char-stream.ts | 74 +++++++++++++++++++++---------- test/parser.ts | 1 + 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/src/parser/streams/char-stream.ts b/src/parser/streams/char-stream.ts index 9a418d17..2eefc9bd 100644 --- a/src/parser/streams/char-stream.ts +++ b/src/parser/streams/char-stream.ts @@ -22,7 +22,7 @@ export class CharStream { this.address = 0; this.line = opts?.line ?? 0; this.column = opts?.column ?? 0; - this.loadChar(); + this.moveNext(); } public get eof(): boolean { @@ -44,35 +44,19 @@ export class CharStream { } public next(): void { - if (!this.endOfPage) { - this.address++; - } else if (!this.isLastPage) { - this.pageIndex++; - this.address = 0; - } - this.loadChar(); - - // column, line - if (!this.eof) { - if (this._char === '\n') { - this.line++; - this.column = 0; - } else if (this._char !== '\r') { - this.column++; - } + if (!this.eof && this._char === '\n') { + this.line++; + this.column = 0; } else { this.column++; } + this.incAddr(); + this.moveNext(); } public prev(): void { - if (this.address > 0) { - this.address--; - } else if (!this.isFirstPage) { - this.pageIndex--; - this.address = this.pages.get(this.pageIndex)!.length - 1; - } - this.loadChar(); + this.decAddr(); + this.movePrev(); } private get isFirstPage(): boolean { @@ -88,6 +72,48 @@ export class CharStream { return (this.address >= page.length); } + private moveNext() { + this.loadChar(); + while (true) { + if (!this.eof && this._char === '\r') { + this.incAddr(); + this.loadChar(); + continue; + } + break; + } + } + + private incAddr() { + if (!this.endOfPage) { + this.address++; + } else if (!this.isLastPage) { + this.pageIndex++; + this.address = 0; + } + } + + private movePrev() { + this.loadChar(); + while (true) { + if (!this.eof && this._char === '\r') { + this.decAddr(); + this.loadChar(); + continue; + } + break; + } + } + + private decAddr() { + if (this.address > 0) { + this.address--; + } else if (!this.isFirstPage) { + this.pageIndex--; + this.address = this.pages.get(this.pageIndex)!.length - 1; + } + } + private loadChar(): void { if (this.eof) { this._char = undefined; diff --git a/test/parser.ts b/test/parser.ts index 1e009b07..e202dadb 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -105,6 +105,7 @@ describe('Scanner', () => { const source = 'aaa\nbbb'; const stream = init(source); next(stream, TokenKind.Identifier, { line: 1, column: 1 }, { value: 'aaa' }); + next(stream, TokenKind.NewLine, { line: 1, column: 4 }, { }); next(stream, TokenKind.Identifier, { line: 2, column: 1 }, { value: 'bbb' }); next(stream, TokenKind.EOF, { line: 2, column: 4 }, { }); }); From 3b59ec3865be405f4e8619d829abf5d7c4ba3f61 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 19:05:16 +0900 Subject: [PATCH 110/126] refactor --- src/parser/scanner.ts | 6 +----- src/parser/streams/token-stream.ts | 14 +++----------- src/parser/syntaxes/expressions.ts | 1 - src/parser/token.ts | 2 -- test/parser.ts | 30 ++++++++++++++++++++++++++++-- 5 files changed, 32 insertions(+), 21 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index e972c888..f34c3929 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -28,10 +28,6 @@ export class Scanner implements ITokenStream { this._tokens.push(this.readToken()); } - public get eof(): boolean { - return this.stream.eof; - } - public get token(): Token { return this._tokens[0]!; } @@ -124,7 +120,7 @@ export class Scanner implements ITokenStream { this.stream.next(); token = TOKEN(TokenKind.OpenSharpBracket, loc, { hasLeftSpacing }); } else { - token = TOKEN(TokenKind.Sharp, loc, { hasLeftSpacing }); + throw new AiScriptSyntaxError(`invalid character: "#"`); } break; } diff --git a/src/parser/streams/token-stream.ts b/src/parser/streams/token-stream.ts index 7159d09c..872e2e68 100644 --- a/src/parser/streams/token-stream.ts +++ b/src/parser/streams/token-stream.ts @@ -6,7 +6,6 @@ import type { Token } from '../token.js'; * トークンの読み取りに関するインターフェース */ export interface ITokenStream { - get eof(): boolean; get token(): Token; get kind(): TokenKind; next(): void; @@ -21,26 +20,19 @@ export interface ITokenStream { export class TokenStream implements ITokenStream { private source: Token[]; private index: number; - private _token?: Token; + private _token: Token; constructor(source: TokenStream['source']) { this.source = source; this.index = 0; - } - - public init(): void { this.load(); } - public get eof(): boolean { + private get eof(): boolean { return (this.index >= this.source.length); } public get token(): Token { - if (this._token == null) { - // EOFトークンさえも入っていなかったらinitされていない - throw new Error('stream is not initialized yet'); - } if (this.eof) { return TOKEN(TokenKind.EOF, { line: -1, column: -1 }); } @@ -81,7 +73,7 @@ export class TokenStream implements ITokenStream { if (this.eof) { this._token = TOKEN(TokenKind.EOF, { line: -1, column: -1 }); } else { - this._token = this.source[this.index]; + this._token = this.source[this.index]!; } } } diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 12bafae8..31a73e8a 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -218,7 +218,6 @@ function parseAtom(s: ITokenStream): Cst.Node { case TokenKind.TemplateExprElement: { // スキャナで埋め込み式として事前に読み取っておいたトークン列をパースする const exprStream = new TokenStream(element.children!); - exprStream.init(); const expr = parseExpr(exprStream); if (exprStream.kind !== TokenKind.EOF) { throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[exprStream.token.kind]}`); diff --git a/src/parser/token.ts b/src/parser/token.ts index 02c0eedc..67aca6b6 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -37,8 +37,6 @@ export enum TokenKind { Not, /** "!=" */ NotEq, - /** "#" */ - Sharp, /** "#[" */ OpenSharpBracket, /** "###" */ diff --git a/test/parser.ts b/test/parser.ts index e202dadb..893fff91 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -46,6 +46,30 @@ describe('CharStream', () => { stream.next(); assert.strictEqual(true, stream.eof); }); + + test.concurrent('EOFでcharを参照するとエラー', async () => { + const source = ''; + const stream = new CharStream(source); + assert.strictEqual(true, stream.eof); + try { + stream.char; + } catch (e) { + return; + } + assert.fail(); + }); + + test.concurrent('CRは読み飛ばされる', async () => { + const source = 'a\r\nb'; + const stream = new CharStream(source); + assert.strictEqual('a', stream.char); + stream.next(); + assert.strictEqual('\n', stream.char); + stream.next(); + assert.strictEqual('b', stream.char); + stream.next(); + assert.strictEqual(true, stream.eof); + }); }); describe('Scanner', () => { @@ -80,8 +104,10 @@ describe('Scanner', () => { const source = '$'; try { const stream = new Scanner(source); - assert.fail(); - } catch (e) { } + } catch (e) { + return; + } + assert.fail(); }); test.concurrent('words', async () => { const source = 'abc xyz'; From 79b0cb29505479509abce11589e353fdbfa4ee01 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 19:13:09 +0900 Subject: [PATCH 111/126] test --- test/index.ts | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/index.ts b/test/index.ts index 4b1b1d95..48cb8a6b 100644 --- a/test/index.ts +++ b/test/index.ts @@ -478,6 +478,20 @@ describe('Cannot put multiple statements in a line', () => { } assert.fail(); }); + + test.concurrent('var def in block', async () => { + try { + await exe(` + eval { + let a = 42 let b = 11 + } + `); + } catch (e) { + assert.ok(true); + return; + } + assert.fail(); + }); }); test.concurrent('empty function', async () => { From 3278f61e49863418e52814cea53ea619a50fd78c Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 19:56:45 +0900 Subject: [PATCH 112/126] meta --- src/parser/syntaxes/common.ts | 16 --------- src/parser/syntaxes/expressions.ts | 52 ++++++++++++++++++------------ src/parser/syntaxes/statements.ts | 22 ++++++------- src/parser/syntaxes/toplevel.ts | 10 +++--- 4 files changed, 47 insertions(+), 53 deletions(-) diff --git a/src/parser/syntaxes/common.ts b/src/parser/syntaxes/common.ts index 57b0d460..8919009f 100644 --- a/src/parser/syntaxes/common.ts +++ b/src/parser/syntaxes/common.ts @@ -64,22 +64,6 @@ export function parseBlock(s: ITokenStream): Cst.Node[] { return steps; } -//#region Static Literal - -export function parseStaticLiteral(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - -export function parseStaticArray(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - -export function parseStaticObject(s: ITokenStream): Cst.Node { - throw new Error('todo'); -} - -//#endregion Static Literal - //#region Type export function parseType(s: ITokenStream): Cst.Node { diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 31a73e8a..f21eccc3 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -8,8 +8,12 @@ import { parseBlockOrStatement } from './statements.js'; import type * as Cst from '../node.js'; import type { ITokenStream } from '../streams/token-stream.js'; -export function parseExpr(s: ITokenStream): Cst.Node { - return parsePratt(s, 0); +export function parseExpr(s: ITokenStream, isStatic: boolean): Cst.Node { + if (isStatic) { + return parseAtom(s, true); + } else { + return parsePratt(s, 0); + } } // NOTE: infix(中置演算子)ではlbpを大きくすると右結合、rbpを大きくすると左結合の演算子になります。 @@ -173,7 +177,7 @@ function parsePostfix(s: ITokenStream, expr: Cst.Node): Cst.Node { } case TokenKind.OpenBracket: { s.next(); - const index = parseExpr(s); + const index = parseExpr(s, false); s.nextWith(TokenKind.CloseBracket); return NODE('index', { @@ -187,28 +191,35 @@ function parsePostfix(s: ITokenStream, expr: Cst.Node): Cst.Node { } } -function parseAtom(s: ITokenStream): Cst.Node { +function parseAtom(s: ITokenStream, isStatic: boolean): Cst.Node { const loc = s.token.loc; switch (s.kind) { case TokenKind.IfKeyword: { + if (isStatic) break; return parseIf(s); } case TokenKind.At: { + if (isStatic) break; return parseFnExpr(s); } case TokenKind.MatchKeyword: { + if (isStatic) break; return parseMatch(s); } case TokenKind.EvalKeyword: { + if (isStatic) break; return parseEval(s); } case TokenKind.ExistsKeyword: { + if (isStatic) break; return parseExists(s); } case TokenKind.Template: { const values: (string | Cst.Node)[] = []; + if (isStatic) break; + for (const element of s.token.children!) { switch (element.kind) { case TokenKind.TemplateStringElement: { @@ -218,7 +229,7 @@ function parseAtom(s: ITokenStream): Cst.Node { case TokenKind.TemplateExprElement: { // スキャナで埋め込み式として事前に読み取っておいたトークン列をパースする const exprStream = new TokenStream(element.children!); - const expr = parseExpr(exprStream); + const expr = parseExpr(exprStream, false); if (exprStream.kind !== TokenKind.EOF) { throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[exprStream.token.kind]}`); } @@ -256,24 +267,23 @@ function parseAtom(s: ITokenStream): Cst.Node { return NODE('null', { }, loc); } case TokenKind.OpenBrace: { - return parseObject(s); + return parseObject(s, isStatic); } case TokenKind.OpenBracket: { - return parseArray(s); + return parseArray(s, isStatic); } case TokenKind.Identifier: { + if (isStatic) break; return parseReference(s); } case TokenKind.OpenParen: { s.next(); - const expr = parseExpr(s); + const expr = parseExpr(s, isStatic); s.nextWith(TokenKind.CloseParen); return expr; } - default: { - throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[s.kind]}`); - } } + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[s.kind]}`); } /** @@ -295,7 +305,7 @@ function parseCall(s: ITokenStream, target: Cst.Node): Cst.Node { } } - items.push(parseExpr(s)); + items.push(parseExpr(s, false)); } s.nextWith(TokenKind.CloseParen); @@ -315,13 +325,13 @@ function parseIf(s: ITokenStream): Cst.Node { const loc = s.token.loc; s.nextWith(TokenKind.IfKeyword); - const cond = parseExpr(s); + const cond = parseExpr(s, false); const then = parseBlockOrStatement(s); const elseif: { cond: Cst.Node, then: Cst.Node }[] = []; while (s.kind === TokenKind.ElifKeyword) { s.next(); - const elifCond = parseExpr(s); + const elifCond = parseExpr(s, false); const elifThen = parseBlockOrStatement(s); elseif.push({ cond: elifCond, then: elifThen }); } @@ -363,7 +373,7 @@ function parseMatch(s: ITokenStream): Cst.Node { const loc = s.token.loc; s.nextWith(TokenKind.MatchKeyword); - const about = parseExpr(s); + const about = parseExpr(s, false); s.nextWith(TokenKind.OpenBrace); s.nextWith(TokenKind.NewLine); @@ -371,7 +381,7 @@ function parseMatch(s: ITokenStream): Cst.Node { const qs: { q: Cst.Node, a: Cst.Node }[] = []; while (s.kind !== TokenKind.DefaultKeyword && s.kind !== TokenKind.CloseBrace) { s.nextWith(TokenKind.CaseKeyword); - const q = parseExpr(s); + const q = parseExpr(s, false); s.nextWith(TokenKind.Arrow); const a = parseBlockOrStatement(s); s.nextWith(TokenKind.NewLine); @@ -446,7 +456,7 @@ function parseReference(s: ITokenStream): Cst.Node { * Object = "{" [IDENT ":" Expr *(("," / ";" / SPACE) IDENT ":" Expr) ["," / ";"]] "}" * ``` */ -function parseObject(s: ITokenStream): Cst.Node { +function parseObject(s: ITokenStream, isStatic: boolean): Cst.Node { const loc = s.token.loc; s.nextWith(TokenKind.OpenBrace); @@ -463,7 +473,7 @@ function parseObject(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.Colon); - const v = parseExpr(s); + const v = parseExpr(s, isStatic); map.set(k, v); @@ -497,7 +507,7 @@ function parseObject(s: ITokenStream): Cst.Node { * Array = "[" [Expr *(("," / SPACE) Expr) [","]] "]" * ``` */ -function parseArray(s: ITokenStream): Cst.Node { +function parseArray(s: ITokenStream, isStatic: boolean): Cst.Node { const loc = s.token.loc; s.nextWith(TokenKind.OpenBracket); @@ -508,7 +518,7 @@ function parseArray(s: ITokenStream): Cst.Node { const value = []; while (s.kind !== TokenKind.CloseBracket) { - value.push(parseExpr(s)); + value.push(parseExpr(s, isStatic)); // separator if ((s.kind as TokenKind) === TokenKind.CloseBracket) { @@ -551,7 +561,7 @@ function parsePratt(s: ITokenStream, minBp: number): Cst.Node { if (prefix != null) { left = parsePrefix(s, prefix.bp); } else { - left = parseAtom(s); + left = parseAtom(s, false); } while (true) { diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index c58a1bda..4beebb17 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -54,7 +54,7 @@ export function parseStatement(s: ITokenStream): Cst.Node { return NODE('continue', {}, loc); } } - const expr = parseExpr(s); + const expr = parseExpr(s, false); const assign = tryParseAssign(s, expr); if (assign) { return assign; @@ -129,7 +129,7 @@ function parseVarDef(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.Eq); - const expr = parseExpr(s); + const expr = parseExpr(s, false); return NODE('def', { name, varType: ty, expr, mut, attr: [] }, loc); } @@ -175,7 +175,7 @@ function parseOut(s: ITokenStream): Cst.Node { const loc = s.token.loc; s.nextWith(TokenKind.Out); - const expr = parseExpr(s); + const expr = parseExpr(s, false); return CALL_NODE('print', [expr], loc); } @@ -208,7 +208,7 @@ function parseEach(s: ITokenStream): Cst.Node { throw new AiScriptSyntaxError('separator expected'); } - const items = parseExpr(s); + const items = parseExpr(s, false); if (hasParen) { s.nextWith(TokenKind.CloseParen); @@ -247,7 +247,7 @@ function parseFor(s: ITokenStream): Cst.Node { let _from; if ((s.kind as TokenKind) === TokenKind.Eq) { s.next(); - _from = parseExpr(s); + _from = parseExpr(s, false); } else { _from = NODE('num', { value: 0 }, identLoc); } @@ -258,7 +258,7 @@ function parseFor(s: ITokenStream): Cst.Node { throw new AiScriptSyntaxError('separator expected'); } - const to = parseExpr(s); + const to = parseExpr(s, false); if (hasParen) { s.nextWith(TokenKind.CloseParen); @@ -275,7 +275,7 @@ function parseFor(s: ITokenStream): Cst.Node { } else { // times syntax - const times = parseExpr(s); + const times = parseExpr(s, false); if (hasParen) { s.nextWith(TokenKind.CloseParen); @@ -299,7 +299,7 @@ function parseReturn(s: ITokenStream): Cst.Node { const loc = s.token.loc; s.nextWith(TokenKind.ReturnKeyword); - const expr = parseExpr(s); + const expr = parseExpr(s, false); return NODE('return', { expr }, loc); } @@ -374,17 +374,17 @@ function tryParseAssign(s: ITokenStream, dest: Cst.Node): Cst.Node | undefined { switch (s.kind) { case TokenKind.Eq: { s.next(); - const expr = parseExpr(s); + const expr = parseExpr(s, false); return NODE('assign', { dest, expr }, loc); } case TokenKind.PlusEq: { s.next(); - const expr = parseExpr(s); + const expr = parseExpr(s, false); return NODE('addAssign', { dest, expr }, loc); } case TokenKind.MinusEq: { s.next(); - const expr = parseExpr(s); + const expr = parseExpr(s, false); return NODE('subAssign', { dest, expr }, loc); } default: { diff --git a/src/parser/syntaxes/toplevel.ts b/src/parser/syntaxes/toplevel.ts index e1ae076f..7b23a676 100644 --- a/src/parser/syntaxes/toplevel.ts +++ b/src/parser/syntaxes/toplevel.ts @@ -2,7 +2,7 @@ import { NODE } from '../node.js'; import { TokenKind } from '../token.js'; import { AiScriptSyntaxError } from '../../error.js'; import { parseDefStatement, parseStatement } from './statements.js'; -import { parseStaticLiteral } from './common.js'; +import { parseExpr } from './expressions.js'; import type * as Cst from '../node.js'; import type { ITokenStream } from '../streams/token-stream.js'; @@ -95,7 +95,7 @@ export function parseNamespace(s: ITokenStream): Cst.Node { /** * ```abnf - * Meta = "###" [IDENT] StaticLiteral + * Meta = "###" [IDENT] StaticExpr * ``` */ export function parseMeta(s: ITokenStream): Cst.Node { @@ -103,13 +103,13 @@ export function parseMeta(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.Sharp3); - let name; + let name = null; if (s.kind === TokenKind.Identifier) { - name = s.token.value; + name = s.token.value!; s.next(); } - const value = parseStaticLiteral(s); + const value = parseExpr(s, true); return NODE('meta', { name, value }, loc); } From 5ed881e1669bc2ba202dffa897de0be72bb5f31c Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 19:56:59 +0900 Subject: [PATCH 113/126] lint --- src/parser/scanner.ts | 4 ++-- src/parser/streams/char-stream.ts | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index f34c3929..c2b86d11 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -120,7 +120,7 @@ export class Scanner implements ITokenStream { this.stream.next(); token = TOKEN(TokenKind.OpenSharpBracket, loc, { hasLeftSpacing }); } else { - throw new AiScriptSyntaxError(`invalid character: "#"`); + throw new AiScriptSyntaxError('invalid character: "#"'); } break; } @@ -407,7 +407,7 @@ export class Scanner implements ITokenStream { } if (!this.stream.eof && this.stream.char === '.') { this.stream.next(); - while (!this.stream.eof && digit.test(this.stream.char)) { + while (!this.stream.eof as boolean && digit.test(this.stream.char as string)) { fractional += this.stream.char; this.stream.next(); } diff --git a/src/parser/streams/char-stream.ts b/src/parser/streams/char-stream.ts index 2eefc9bd..80d47f62 100644 --- a/src/parser/streams/char-stream.ts +++ b/src/parser/streams/char-stream.ts @@ -72,7 +72,7 @@ export class CharStream { return (this.address >= page.length); } - private moveNext() { + private moveNext(): void { this.loadChar(); while (true) { if (!this.eof && this._char === '\r') { @@ -84,7 +84,7 @@ export class CharStream { } } - private incAddr() { + private incAddr(): void { if (!this.endOfPage) { this.address++; } else if (!this.isLastPage) { @@ -93,7 +93,7 @@ export class CharStream { } } - private movePrev() { + private movePrev(): void { this.loadChar(); while (true) { if (!this.eof && this._char === '\r') { @@ -105,7 +105,7 @@ export class CharStream { } } - private decAddr() { + private decAddr(): void { if (this.address > 0) { this.address--; } else if (!this.isFirstPage) { From e39ebc0c73fb37bbb07db1a7b034455eea77f2c0 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 20:32:50 +0900 Subject: [PATCH 114/126] var def, attr, if --- src/parser/syntaxes/expressions.ts | 7 +++++++ src/parser/syntaxes/statements.ts | 17 ++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index f21eccc3..f0cf486e 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -328,11 +328,18 @@ function parseIf(s: ITokenStream): Cst.Node { const cond = parseExpr(s, false); const then = parseBlockOrStatement(s); + if (s.kind === TokenKind.NewLine) { + s.next(); + } + const elseif: { cond: Cst.Node, then: Cst.Node }[] = []; while (s.kind === TokenKind.ElifKeyword) { s.next(); const elifCond = parseExpr(s, false); const elifThen = parseBlockOrStatement(s); + if ((s.kind as TokenKind) === TokenKind.NewLine) { + s.next(); + } elseif.push({ cond: elifCond, then: elifThen }); } diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index 4beebb17..17e2d014 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -129,6 +129,10 @@ function parseVarDef(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.Eq); + if ((s.kind as TokenKind) === TokenKind.NewLine) { + s.next(); + } + const expr = parseExpr(s, false); return NODE('def', { name, varType: ty, expr, mut, attr: [] }, loc); @@ -312,6 +316,7 @@ function parseStatementWithAttr(s: ITokenStream): Cst.Node { const attrs: Cst.Attribute[] = []; while (s.kind === TokenKind.OpenSharpBracket) { attrs.push(parseAttr(s) as Cst.Attribute); + s.nextWith(TokenKind.NewLine); } const statement = parseStatement(s); @@ -330,7 +335,7 @@ function parseStatementWithAttr(s: ITokenStream): Cst.Node { /** * ```abnf - * Attr = "#[" IDENT [StaticLiteral] "]" + * Attr = "#[" IDENT [StaticExpr] "]" * ``` */ function parseAttr(s: ITokenStream): Cst.Node { @@ -338,15 +343,21 @@ function parseAttr(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.OpenSharpBracket); + s.expect(TokenKind.Identifier); const name = s.token.value!; s.next(); - // TODO: value + let value; + if (s.kind !== TokenKind.CloseBracket) { + value = parseExpr(s, true); + } else { + value = NODE('bool', { value: true }, loc); + } s.nextWith(TokenKind.CloseBracket); - return NODE('attr', { name, value: undefined }, loc); + return NODE('attr', { name, value }, loc); } /** From 32791759a229e0d3ab2002cc2498950833bca858 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 20:34:43 +0900 Subject: [PATCH 115/126] lint --- src/parser/syntaxes/statements.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index 17e2d014..4a8093b7 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -343,7 +343,6 @@ function parseAttr(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.OpenSharpBracket); - s.expect(TokenKind.Identifier); const name = s.token.value!; s.next(); From 9735ff3d93a7d6ad12d57658ce7f24b9745a3e9e Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 7 Oct 2023 23:23:08 +0900 Subject: [PATCH 116/126] if --- src/parser/syntaxes/expressions.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index f0cf486e..bee3acf2 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -328,7 +328,7 @@ function parseIf(s: ITokenStream): Cst.Node { const cond = parseExpr(s, false); const then = parseBlockOrStatement(s); - if (s.kind === TokenKind.NewLine) { + if (s.kind === TokenKind.NewLine && [TokenKind.ElifKeyword, TokenKind.ElseKeyword].includes(s.lookahead(1).kind)) { s.next(); } @@ -337,7 +337,7 @@ function parseIf(s: ITokenStream): Cst.Node { s.next(); const elifCond = parseExpr(s, false); const elifThen = parseBlockOrStatement(s); - if ((s.kind as TokenKind) === TokenKind.NewLine) { + if ((s.kind as TokenKind) === TokenKind.NewLine && [TokenKind.ElifKeyword, TokenKind.ElseKeyword].includes(s.lookahead(1).kind)) { s.next(); } elseif.push({ cond: elifCond, then: elifThen }); From 3f9c985adbd2a1c3403c1aafdf03d086bfd24019 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 8 Oct 2023 09:55:50 +0900 Subject: [PATCH 117/126] types --- src/parser/syntaxes/common.ts | 77 ++++++++++++++++++++++++++---- src/parser/syntaxes/expressions.ts | 10 ++-- src/parser/syntaxes/statements.ts | 14 ++++-- 3 files changed, 85 insertions(+), 16 deletions(-) diff --git a/src/parser/syntaxes/common.ts b/src/parser/syntaxes/common.ts index 8919009f..4d908403 100644 --- a/src/parser/syntaxes/common.ts +++ b/src/parser/syntaxes/common.ts @@ -1,6 +1,7 @@ import { TokenKind } from '../token.js'; import { AiScriptSyntaxError } from '../../error.js'; import { parseStatement } from './statements.js'; +import { NODE } from '../node.js'; import type { ITokenStream } from '../streams/token-stream.js'; import type * as Cst from '../node.js'; @@ -10,8 +11,8 @@ import type * as Cst from '../node.js'; * Params = "(" [IDENT *(("," / SPACE) IDENT)] ")" * ``` */ -export function parseParams(s: ITokenStream): { name: string }[] { - const items: { name: string }[] = []; +export function parseParams(s: ITokenStream): { name: string, argType?: Cst.Node }[] { + const items: { name: string, argType?: Cst.Node }[] = []; s.nextWith(TokenKind.OpenParen); @@ -26,8 +27,16 @@ export function parseParams(s: ITokenStream): { name: string }[] { } s.expect(TokenKind.Identifier); - items.push({ name: s.token.value! }); + const name = s.token.value!; s.next(); + + let type; + if ((s.kind as TokenKind) === TokenKind.Colon) { + s.next(); + type = parseType(s); + } + + items.push({ name, argType: type }); } s.nextWith(TokenKind.CloseParen); @@ -67,15 +76,67 @@ export function parseBlock(s: ITokenStream): Cst.Node[] { //#region Type export function parseType(s: ITokenStream): Cst.Node { - throw new Error('todo'); + if (s.kind === TokenKind.At) { + return parseFnType(s); + } else { + return parseNamedType(s); + } } -export function parseFnType(s: ITokenStream): Cst.Node { - throw new Error('todo'); +/** + * ```abnf + * FnType = "@" "(" ParamTypes ")" "=>" Type + * ParamTypes = [Type *(("," / SPACE) Type)] + * ``` +*/ +function parseFnType(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + + s.nextWith(TokenKind.At); + s.nextWith(TokenKind.OpenParen); + + const params: Cst.Node[] = []; + while (s.kind !== TokenKind.CloseParen) { + if (params.length > 0) { + if (s.kind === TokenKind.Comma) { + s.next(); + } else if (!s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('separator expected'); + } + } + const type = parseType(s); + params.push(type); + } + + s.nextWith(TokenKind.CloseParen); + s.nextWith(TokenKind.Arrow); + + const resultType = parseType(s); + + return NODE('fnTypeSource', { args: params, result: resultType }, loc); } -export function parseNamedType(s: ITokenStream): Cst.Node { - throw new Error('todo'); +/** + * ```abnf + * NamedType = IDENT ["<" Type ">"] + * ``` +*/ +function parseNamedType(s: ITokenStream): Cst.Node { + const loc = s.token.loc; + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + // inner type + let inner = null; + if (s.kind === TokenKind.Lt) { + s.next(); + inner = parseType(s); + s.nextWith(TokenKind.Gt); + } + + return NODE('namedTypeSource', { name, inner }, loc); } //#endregion Type diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index bee3acf2..7e0614ef 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -2,7 +2,7 @@ import { AiScriptSyntaxError } from '../../error.js'; import { CALL_NODE, NODE } from '../node.js'; import { TokenStream } from '../streams/token-stream.js'; import { TokenKind } from '../token.js'; -import { parseBlock, parseParams } from './common.js'; +import { parseBlock, parseParams, parseType } from './common.js'; import { parseBlockOrStatement } from './statements.js'; import type * as Cst from '../node.js'; @@ -364,11 +364,15 @@ function parseFnExpr(s: ITokenStream): Cst.Node { const params = parseParams(s); - // type + let type; + if ((s.kind as TokenKind) === TokenKind.Colon) { + s.next(); + type = parseType(s); + } const body = parseBlock(s); - return NODE('fn', { args: params, retType: undefined, children: body }, loc); + return NODE('fn', { args: params, retType: type, children: body }, loc); } /** diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index 4a8093b7..7cd88f6e 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -121,10 +121,10 @@ function parseVarDef(s: ITokenStream): Cst.Node { const name = s.token.value!; s.next(); - let ty; + let type; if ((s.kind as TokenKind) === TokenKind.Colon) { s.next(); - ty = parseType(s); + type = parseType(s); } s.nextWith(TokenKind.Eq); @@ -135,7 +135,7 @@ function parseVarDef(s: ITokenStream): Cst.Node { const expr = parseExpr(s, false); - return NODE('def', { name, varType: ty, expr, mut, attr: [] }, loc); + return NODE('def', { name, varType: type, expr, mut, attr: [] }, loc); } /** @@ -154,7 +154,11 @@ function parseFnDef(s: ITokenStream): Cst.Node { const params = parseParams(s); - // type + let type; + if ((s.kind as TokenKind) === TokenKind.Colon) { + s.next(); + type = parseType(s); + } const body = parseBlock(s); @@ -162,7 +166,7 @@ function parseFnDef(s: ITokenStream): Cst.Node { name, expr: NODE('fn', { args: params, - retType: undefined, // TODO: type + retType: type, children: body, }, loc), mut: false, From 4749274d7067a081d3d20dd072afbec8ea66f0dc Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 8 Oct 2023 09:56:14 +0900 Subject: [PATCH 118/126] remove peg parser --- src/parser/parser.peggy | 596 ---------------------------------------- 1 file changed, 596 deletions(-) delete mode 100644 src/parser/parser.peggy diff --git a/src/parser/parser.peggy b/src/parser/parser.peggy deleted file mode 100644 index 01f9df8a..00000000 --- a/src/parser/parser.peggy +++ /dev/null @@ -1,596 +0,0 @@ -{ - function createNode(type, params, children) { - const node = { type }; - params.children = children; - for (const key of Object.keys(params)) { - if (params[key] !== undefined) { - node[key] = params[key]; - } - } - const loc = location(); - node.loc = { start: loc.start.offset, end: loc.end.offset - 1 }; - return node; - } -} - -// -// preprocessor -// - -Preprocess - = s:PreprocessPart* -{ return s.join(''); } - -PreprocessPart - = Tmpl { return text(); } - / Str { return text(); } - / Comment - / . - -Comment - = "//" (!EOL .)* { return ''; } - / "/*" (!"*/" .)* "*/" { return ''; } - - -// -// main parser -// - -Main - = _* content:GlobalStatements? _* -{ return content ?? []; } - -GlobalStatements - = head:GlobalStatement tails:(__* LF _* s:GlobalStatement { return s; })* -{ return [head, ...tails]; } - -NamespaceStatements - = head:NamespaceStatement tails:(__* LF _* s:NamespaceStatement { return s; })* -{ return [head, ...tails]; } - -Statements - = head:Statement tails:(__* LF _* s:Statement { return s; })* -{ return [head, ...tails]; } - -// list of global statements - -GlobalStatement - = Namespace // "::" - / Meta // "###" - / Statement - -// list of namespace statement - -NamespaceStatement - = VarDef - / FnDef - / Namespace - -// list of statement - -Statement - = VarDef // "let" NAME | "var" NAME - / FnDef // "@" - / Out // "<:" - / Return // "return" - / Attr // "+" - / Each // "each" - / For // "for" - / Loop // "loop" - / Break // "break" - / Continue // "continue" - / Assign // Expr "=" | Expr "+=" | Expr "-=" - / Expr - -// list of expression - -Expr - = Infix - / Expr2 - -Expr2 - = If // "if" - / Fn // "@(" - / Chain // Expr3 "(" | Expr3 "[" | Expr3 "." - / Expr3 - -Expr3 - = Match // "match" - / Eval // "eval" - / Exists // "exists" - / Tmpl // "`" - / Str // "\"" - / Num // "+" | "-" | "1"~"9" - / Bool // "true" | "false" - / Null // "null" - / Obj // "{" - / Arr // "[" - / Not // "!" - / Identifier // NAME_WITH_NAMESPACE - / "(" _* e:Expr _* ")" { return e; } - -// list of static literal - -StaticLiteral - = Num // "+" "1"~"9" | "-" "1"~"9" | "1"~"9" - / Str // "\"" - / Bool // "true" | "false" - / StaticArr // "[" - / StaticObj // "{" - / Null // "null" - - - -// -// global statements --------------------------------------------------------------------- -// - -// namespace statement - -Namespace - = "::" _+ name:NAME _+ "{" _* members:NamespaceStatements? _* "}" -{ return createNode('ns', { name, members }); } - -// meta statement - -Meta - = "###" __* name:NAME _* value:StaticLiteral -{ return createNode('meta', { name, value }); } - / "###" __* value:StaticLiteral -{ return createNode('meta', { name: null, value }); } - - - -// -// statements ---------------------------------------------------------------------------- -// - -// define statement - -VarDef - = "let" _+ name:NAME type:(_* ":" _* @Type)? _* "=" _* expr:Expr -{ return createNode('def', { name, varType: type, expr, mut: false, attr: [] }); } - / "var" _+ name:NAME type:(_* ":" _* @Type)? _* "=" _* expr:Expr -{ return createNode('def', { name, varType: type, expr, mut: true, attr: [] }); } - -// output statement - -// NOTE: out is syntax sugar for print(expr) -Out - = "<:" _* expr:Expr -{ - return createNode('identifier', { - name: 'print', - chain: [createNode('callChain', { args: [expr] })], - }); -} - -// attribute statement - -// Note: Attribute will be combined with def node when parsing is complete. -Attr - = "#[" _* name:NAME value:(_* @StaticLiteral)? _* "]" -{ - return createNode('attr', { - name: name, - value: value ?? createNode('bool', { value: true }) - }); -} - -// each statement - -Each - = "each" _* "(" "let" _+ varn:NAME _* ","? _* items:Expr ")" _* x:BlockOrStatement -{ - return createNode('each', { - var: varn, - items: items, - for: x, - }); -} - / "each" _+ "let" _+ varn:NAME _* ","? _* items:Expr _+ x:BlockOrStatement -{ - return createNode('each', { - var: varn, - items: items, - for: x, - }); -} - -// for statement - -For - = "for" _* "(" "let" _+ varn:NAME _* from_:("=" _* v:Expr { return v; })? ","? _* to:Expr ")" _* x:BlockOrStatement -{ - return createNode('for', { - var: varn, - from: from_ ?? createNode('num', { value: 0 }), - to: to, - for: x, - }); -} - / "for" _+ "let" _+ varn:NAME _* from_:("=" _* v:Expr { return v; })? ","? _* to:Expr _+ x:BlockOrStatement -{ - return createNode('for', { - var: varn, - from: from_ ?? createNode('num', { value: 0 }), - to: to, - for: x, - }); -} - / "for" _* "(" times:Expr ")" _* x:BlockOrStatement -{ - return createNode('for', { - times: times, - for: x, - }); -} - / "for" _+ times:Expr _+ x:BlockOrStatement -{ - return createNode('for', { - times: times, - for: x, - }); -} - -// return statement - -Return - = "return" ![A-Z0-9_:]i _* expr:Expr -{ return createNode('return', { expr }); } - -// loop statement - -Loop - = "loop" _* "{" _* s:Statements _* "}" -{ return createNode('loop', { statements: s }); } - -// break statement - -Break - = "break" ![A-Z0-9_:]i -{ return createNode('break', {}); } - -// continue statement - -Continue - = "continue" ![A-Z0-9_:]i -{ return createNode('continue', {}); } - -// assign statement - -Assign - = dest:Expr _* op:("+=" / "-=" / "=") _* expr:Expr -{ - if (op === '+=') - return createNode('addAssign', { dest, expr }); - else if (op === '-=') - return createNode('subAssign', { dest, expr }); - else - return createNode('assign', { dest, expr }); -} - - - -// -// expressions -------------------------------------------------------------------- -// - -// infix expression - -Infix - = head:Expr2 tail:(InfixSp* op:Op InfixSp* term:Expr2 { return {op, term}; })+ -{ - return createNode('infix', { - operands: [head, ...tail.map(i => i.term)], - operators: tail.map(i => i.op) - }); -} - -InfixSp - = "\\" LF - / _ - -Op - = ("||" / "&&" / "==" / "!=" / "<=" / ">=" / "<" / ">" / "+" / "-" / "*" / "^" / "/" / "%") -{ return text(); } - -Not - = "!" expr:Expr -{ - return createNode('not', { - expr: expr, - }); -} - - -// chain - -Chain - = e:Expr3 chain:(CallChain / IndexChain / PropChain)+ -{ return { ...e, chain }; } - -CallChain - = "(" _* args:CallArgs? _* ")" -{ return createNode('callChain', { args: args ?? [] }); } - -CallArgs - = head:Expr tails:(SEP expr:Expr { return expr; })* -{ return [head, ...tails]; } - -IndexChain - = "[" _* index:Expr _* "]" -{ return createNode('indexChain', { index }); } - -PropChain - = "." name:NAME -{ return createNode('propChain', { name }); } - -// if statement - -If - = "if" _+ cond:Expr _+ then:BlockOrStatement elseif:(_+ @ElseifBlocks)? elseBlock:(_+ @ElseBlock)? -{ - return createNode('if', { - cond: cond, - then: then, - elseif: elseif ?? [], - else: elseBlock - }); -} - -ElseifBlocks - = head:ElseifBlock tails:(_* @ElseifBlock)* -{ return [head, ...tails]; } - -ElseifBlock - = "elif" ![A-Z0-9_:]i _* cond:Expr _* then:BlockOrStatement -{ return { cond, then }; } - -ElseBlock - = "else" ![A-Z0-9_:]i _* then:BlockOrStatement -{ return then; } - -// match expression - -Match - = "match" ![A-Z0-9_:]i _* about:Expr _* "{" _* qs:("case" _+ q:Expr _* "=>" _* a:BlockOrStatement _* { return { q, a }; })+ x:("default" _* "=>" _* @BlockOrStatement _*)? _* "}" -{ - return createNode('match', { - about: about, - qs: qs ?? [], - default: x - }); -} - -// eval expression - -Eval - = "eval" _* "{" _* s:Statements _* "}" -{ return createNode('block', { statements: s }); } - -// exists expression - -Exists - = "exists" _+ i:Identifier -{ return createNode('exists', { identifier: i }); } - -// variable reference expression - -Identifier - = name:NAME_WITH_NAMESPACE -{ return createNode('identifier', { name }); } - - - -// -// literals ------------------------------------------------------------------------------ -// - -// template literal - -Tmpl - = "`" items:(!"`" @TmplEmbed)* "`" -{ return createNode('tmpl', { tmpl: items }); } - -TmplEmbed - = "{" __* @expr:Expr __* "}" - / str:TmplAtom+ {return str.join("")} - -TmplAtom - = TmplEsc - / [^`{] - -TmplEsc - = "\\" @[{}`] - -// string literal - -Str - = "\"" value:(!"\"" c:(StrDoubleQuoteEsc / .) { return c; })* "\"" -{ return createNode('str', { value: value.join('') }); } - / "'" value:(!"'" c:(StrSingleQuoteEsc / .) { return c; })* "'" -{ return createNode('str', { value: value.join('') }); } - -StrDoubleQuoteEsc - = "\\\"" -{ return '"'; } - -StrSingleQuoteEsc - = "\\\'" -{ return '\''; } - -// number literal -Num - = Float - / Int - -Float - = [+-]? [1-9] [0-9]+ "." [0-9]+ - { return createNode('num', { value: parseFloat(text())}); } - / [+-]? [0-9] "." [0-9]+ - { return createNode('num', { value: parseFloat(text())}); } - -Int - = [+-]? [1-9] [0-9]+ -{ return createNode('num', { value: parseInt(text(), 10) }); } - / [+-]? [0-9] -{ return createNode('num', { value: parseInt(text(), 10) }); } - -// boolean literal - -Bool - = True - / False - -True - = "true" ![A-Z0-9_:]i -{ return createNode('bool', { value: true }); } - -False - = "false" ![A-Z0-9_:]i -{ return createNode('bool', { value: false }); } - -// null literal - -Null - = "null" ![A-Z0-9_:]i -{ return createNode('null', {}); } - -// object literal - -Obj - = "{" _* kvs:(k:NAME _* ":" _+ v:Expr _* ("," / ";")? _* { return { k, v }; })* "}" -{ - const obj = new Map(); - for (const kv of kvs) { - obj.set(kv.k, kv.v); - } - return createNode('obj', { value: obj }); -} - -// array literal - -Arr - = "[" _* items:(item:Expr _* ","? _* { return item; })* _* "]" -{ return createNode('arr', { value: items }); } - - - -// -// function ------------------------------------------------------------------------------ -// - -Arg - = name:NAME type:(_* ":" _* @Type)? -{ return { name, argType: type }; } - -Args - = head:Arg tails:(SEP @Arg)* -{ return [head, ...tails]; } - -// define function statement - -FnDef - = "@" s1:__* name:NAME s2:__* "(" _* args:Args? _* ")" ret:(_* ":" _* @Type)? _* "{" _* content:Statements? _* "}" -{ - if (s1.length > 0 || s2.length > 0) { - error('Cannot use spaces before or after the function name.'); - } - return createNode('def', { - name: name, - expr: createNode('fn', { args: args ?? [], retType: ret }, content ?? []), - mut: false, - attr: [] - }); -} - -// function expression - -Fn = "@(" _* args:Args? _* ")" ret:(_* ":" _* @Type)? _* "{" _* content:Statements? _* "}" -{ return createNode('fn', { args: args ?? [], retType: ret }, content ?? []); } - - - -// -// static literal ------------------------------------------------------------------------ -// - -// array literal (static) - -StaticArr - = "[" _* items:(item:StaticLiteral _* ","? _* { return item; })* _* "]" -{ return createNode('arr', { value: items }); } - -// object literal (static) - -StaticObj - = "{" _* kvs:(k:NAME _* ":" _+ v:StaticLiteral _* ("," / ";")? _* { return { k, v }; })* "}" -{ - const obj = new Map(); - for (const kv of kvs) { - obj.set(kv.k, kv.v); - } - return createNode('obj', { value: obj }); -} - - - -// -// type ---------------------------------------------------------------------------------- -// - -Type - = FnType - / NamedType - -FnType - = "@(" _* args:ArgTypes? _* ")" _* "=>" _* result:Type -{ return createNode('fnTypeSource', { args: args ?? [], result }); } - -ArgTypes - = head:Type tails:(SEP @Type)* -{ return [head, ...tails]; } - -NamedType - = name:NAME __* "<" __* inner:Type __* ">" -{ return createNode('namedTypeSource', { name, inner }); } - / name:NAME -{ return createNode('namedTypeSource', { name, inner: null }); } - - - -// -// general ------------------------------------------------------------------------------- -// - -NAME - = [A-Z_]i [A-Z0-9_]i* -{ return text(); } - -NAME_WITH_NAMESPACE - = NAME (":" NAME)* -{ return text(); } - -SEP - = _* "," _* - / _+ - -BlockOrStatement - = "{" _* s:Statements? _* "}" -{ return createNode('block', { statements: (s ?? []) }); } - / Statement - -LF - = "\r\n" / [\r\n] - -EOL - = !. / LF - -// spacing -_ - = [ \t\r\n] - -// spacing (no linebreaks) -__ - = [ \t] From ec77c61b976b8c6b15ff892520a198924e7104d3 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 8 Oct 2023 10:28:56 +0900 Subject: [PATCH 119/126] remove peg parser --- src/@types/parser.d.ts | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 src/@types/parser.d.ts diff --git a/src/@types/parser.d.ts b/src/@types/parser.d.ts deleted file mode 100644 index 6f2ff82c..00000000 --- a/src/@types/parser.d.ts +++ /dev/null @@ -1,6 +0,0 @@ -import type { Cst } from '../index.js'; - -declare module '*/parser.js' { - // FIXME: 型指定が効いていない - export const parse: (input: string, options: object) => Cst.Node[]; -} From 907abbaec8f57249fec3742b444872f387c9ed8c Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 8 Oct 2023 10:29:14 +0900 Subject: [PATCH 120/126] remove CST nodes --- src/index.ts | 2 - src/node.ts | 22 +- src/parser/index.ts | 8 +- src/parser/node.ts | 280 ------------------------- src/parser/plugins/validate-keyword.ts | 6 +- src/parser/plugins/validate-type.ts | 6 +- src/parser/syntaxes/common.ts | 20 +- src/parser/syntaxes/expressions.ts | 44 ++-- src/parser/syntaxes/statements.ts | 34 +-- src/parser/syntaxes/toplevel.ts | 14 +- src/parser/visit.ts | 72 +++---- 11 files changed, 121 insertions(+), 387 deletions(-) delete mode 100644 src/parser/node.ts diff --git a/src/index.ts b/src/index.ts index a0811739..b524c404 100644 --- a/src/index.ts +++ b/src/index.ts @@ -7,7 +7,6 @@ import { Scope } from './interpreter/scope.js'; import * as utils from './interpreter/util.js'; import * as values from './interpreter/value.js'; import { Parser, ParserPlugin, PluginType } from './parser/index.js'; -import * as Cst from './parser/node.js'; import * as errors from './error.js'; import * as Ast from './node.js'; export { Interpreter }; @@ -17,6 +16,5 @@ export { values }; export { Parser }; export { ParserPlugin }; export { PluginType }; -export { Cst }; export { errors }; export { Ast }; diff --git a/src/node.ts b/src/node.ts index ad75e28f..2d110339 100644 --- a/src/node.ts +++ b/src/node.ts @@ -1,9 +1,25 @@ /** * ASTノード - * - * ASTノードはCSTノードをインタプリタ等から操作しやすい構造に変形したものです。 */ +export function NODE(type: string, params: Record, loc: { column: number, line: number }): Node { + const node: Record = { type }; + for (const key of Object.keys(params)) { + if (params[key] !== undefined) { + node[key] = params[key]; + } + } + node.loc = loc; + return node as Node; +} + +export function CALL_NODE(name: string, args: Node[], loc: { column: number, line: number }): Node { + return NODE('call', { + target: NODE('identifier', { name }, loc), + args, + }, loc); +} + export type Loc = { line: number; column: number; @@ -12,7 +28,7 @@ export type Loc = { export type Node = Namespace | Meta | Statement | Expression | TypeSource | Attribute; type NodeBase = { - loc?: Loc; // コード位置 + loc: Loc; // コード位置 }; export type Namespace = NodeBase & { diff --git a/src/parser/index.ts b/src/parser/index.ts index ef0ce7f0..07092dbb 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -3,10 +3,10 @@ import { parseTopLevel } from './syntaxes/toplevel.js'; import { validateKeyword } from './plugins/validate-keyword.js'; import { validateType } from './plugins/validate-type.js'; -import type * as Cst from './node.js'; + import type * as Ast from '../node.js'; -export type ParserPlugin = (nodes: Cst.Node[]) => Cst.Node[]; +export type ParserPlugin = (nodes: Ast.Node[]) => Ast.Node[]; export type PluginType = 'validate' | 'transform'; export class Parser { @@ -48,7 +48,7 @@ export class Parser { } public parse(input: string): Ast.Node[] { - let nodes: Cst.Node[]; + let nodes: Ast.Node[]; const scanner = new Scanner(input); nodes = parseTopLevel(scanner); @@ -63,6 +63,6 @@ export class Parser { nodes = plugin(nodes); } - return nodes as Ast.Node[]; + return nodes; } } diff --git a/src/parser/node.ts b/src/parser/node.ts deleted file mode 100644 index 0919aab4..00000000 --- a/src/parser/node.ts +++ /dev/null @@ -1,280 +0,0 @@ -/** - * CSTノード - * - * パーサーが生成する直接的な処理結果です。 - * パーサーが生成しやすい形式になっているため、インタプリタ等では操作しにくい構造になっていることがあります。 - * この処理結果がプラグインによって処理されるとASTノードとなります。 -*/ - -export type Node = Namespace | Meta | Statement | Expression | TypeSource | Attribute; - -export function NODE(type: string, params: Record, loc: { column: number, line: number }): Node { - const node: Record = { type }; - for (const key of Object.keys(params)) { - if (params[key] !== undefined) { - node[key] = params[key]; - } - } - node.loc = loc; - return node as Node; -} - -export function CALL_NODE(name: string, args: Node[], loc: { column: number, line: number }): Node { - return NODE('call', { - target: NODE('identifier', { name }, loc), - args, - }, loc); -} - -type NodeBase = { - __AST_NODE: never; // phantom type - loc: { - line: number; - column: number; - }; -}; - -export type Namespace = NodeBase & { - type: 'ns'; - name: string; - members: (Definition | Namespace)[]; -}; - -export type Meta = NodeBase & { - type: 'meta'; - name: string | null; - value: Expression; -}; - -export type Statement = - Definition | - Return | - Each | - For | - Loop | - Break | - Continue | - Assign | - AddAssign | - SubAssign; - -export type Definition = NodeBase & { - type: 'def'; - name: string; - varType?: TypeSource; - expr: Expression; - mut: boolean; - attr?: Attribute[]; -}; - -export type Attribute = NodeBase & { - type: 'attr'; - name: string; - value: Expression; -}; - -export type Return = NodeBase & { - type: 'return'; - expr: Expression; -}; - -export type Each = NodeBase & { - type: 'each'; - var: string; - items: Expression; - for: Statement | Expression; -}; - -export type For = NodeBase & { - type: 'for'; - var?: string; - from?: Expression; - to?: Expression; - times?: Expression; - for: Statement | Expression; -}; - -export type Loop = NodeBase & { - type: 'loop'; - statements: (Statement | Expression)[]; -}; - -export type Break = NodeBase & { - type: 'break'; -}; - -export type Continue = NodeBase & { - type: 'continue'; -}; - -export type AddAssign = NodeBase & { - type: 'addAssign'; - dest: Expression; - expr: Expression; -}; - -export type SubAssign = NodeBase & { - type: 'subAssign'; - dest: Expression; - expr: Expression; -}; - -export type Assign = NodeBase & { - type: 'assign'; - dest: Expression; - expr: Expression; -}; - -// expressions - -export type Expression = - If | - Fn | - Match | - Block | - Exists | - Tmpl | - Str | - Num | - Bool | - Null | - Obj | - Arr | - Not | - And | - Or | - Identifier | - Call | - Index | - Prop; - -export type Not = NodeBase & { - type: 'not'; - expr: Expression; -}; - -export type And = NodeBase & { - type: 'and'; - left: Expression; - right: Expression; -} - -export type Or = NodeBase & { - type: 'or'; - left: Expression; - right: Expression; -} - -export type If = NodeBase & { - type: 'if'; - cond: Expression; - then: Statement | Expression; - elseif: { - cond: Expression; - then: Statement | Expression; - }[]; - else?: Statement | Expression; -}; - -export type Fn = NodeBase & { - type: 'fn'; - args: { - name: string; - argType?: TypeSource; - }[]; - retType?: TypeSource; - children: (Statement | Expression)[]; -}; - -export type Match = NodeBase & { - type: 'match'; - about: Expression; - qs: { - q: Expression; - a: Statement | Expression; - }[]; - default?: Statement | Expression; -}; - -export type Block = NodeBase & { - type: 'block'; - statements: (Statement | Expression)[]; -}; - -export type Exists = NodeBase & { - type: 'exists'; - identifier: Identifier; -}; - -export type Tmpl = NodeBase & { - type: 'tmpl'; - tmpl: (string | Expression)[]; -}; - -export type Str = NodeBase & { - type: 'str'; - value: string; -}; - -export type Num = NodeBase & { - type: 'num'; - value: number; -}; - -export type Bool = NodeBase & { - type: 'bool'; - value: boolean; -}; - -export type Null = NodeBase & { - type: 'null'; -}; - -export type Obj = NodeBase & { - type: 'obj'; - value: Map; -}; - -export type Arr = NodeBase & { - type: 'arr'; - value: Expression[]; -}; - -export type Identifier = NodeBase & { - type: 'identifier'; - name: string; -}; - -export type Call = NodeBase & { - type: 'call'; - target: Expression; - args: Expression[]; -}; - -export type Index = NodeBase & { - type: 'index'; - target: Expression; - index: Expression; -}; - -export type Prop = NodeBase & { - type: 'prop'; - target: Expression; - name: string; -}; - -// Type source - -export type TypeSource = NamedTypeSource | FnTypeSource; - -export type NamedTypeSource = NodeBase & { - type: 'namedTypeSource'; - name: string; - inner?: TypeSource; -}; - -export type FnTypeSource = NodeBase & { - type: 'fnTypeSource'; - args: TypeSource[]; - result: TypeSource; -}; diff --git a/src/parser/plugins/validate-keyword.ts b/src/parser/plugins/validate-keyword.ts index f2ad4bb5..bf6f3980 100644 --- a/src/parser/plugins/validate-keyword.ts +++ b/src/parser/plugins/validate-keyword.ts @@ -1,6 +1,6 @@ import { AiScriptSyntaxError } from '../../error.js'; import { visitNode } from '../visit.js'; -import type * as Cst from '../node.js'; +import type * as Ast from '../../node.js'; const reservedWord = [ 'null', @@ -48,7 +48,7 @@ function throwReservedWordError(name: string): void { throw new AiScriptSyntaxError(`Reserved word "${name}" cannot be used as variable name.`); } -function validateNode(node: Cst.Node): Cst.Node { +function validateNode(node: Ast.Node): Ast.Node { switch (node.type) { case 'def': case 'attr': @@ -78,7 +78,7 @@ function validateNode(node: Cst.Node): Cst.Node { return node; } -export function validateKeyword(nodes: Cst.Node[]): Cst.Node[] { +export function validateKeyword(nodes: Ast.Node[]): Ast.Node[] { for (const inner of nodes) { visitNode(inner, validateNode); } diff --git a/src/parser/plugins/validate-type.ts b/src/parser/plugins/validate-type.ts index 08d5addf..3dbc0cf3 100644 --- a/src/parser/plugins/validate-type.ts +++ b/src/parser/plugins/validate-type.ts @@ -1,8 +1,8 @@ import { getTypeBySource } from '../../type.js'; import { visitNode } from '../visit.js'; -import type * as Cst from '../node.js'; +import type * as Ast from '../../node.js'; -function validateNode(node: Cst.Node): Cst.Node { +function validateNode(node: Ast.Node): Ast.Node { switch (node.type) { case 'def': { if (node.varType != null) { @@ -26,7 +26,7 @@ function validateNode(node: Cst.Node): Cst.Node { return node; } -export function validateType(nodes: Cst.Node[]): Cst.Node[] { +export function validateType(nodes: Ast.Node[]): Ast.Node[] { for (const node of nodes) { visitNode(node, validateNode); } diff --git a/src/parser/syntaxes/common.ts b/src/parser/syntaxes/common.ts index 4d908403..affd6334 100644 --- a/src/parser/syntaxes/common.ts +++ b/src/parser/syntaxes/common.ts @@ -1,18 +1,18 @@ import { TokenKind } from '../token.js'; import { AiScriptSyntaxError } from '../../error.js'; +import { NODE } from '../../node.js'; import { parseStatement } from './statements.js'; -import { NODE } from '../node.js'; import type { ITokenStream } from '../streams/token-stream.js'; -import type * as Cst from '../node.js'; +import type * as Ast from '../../node.js'; /** * ```abnf * Params = "(" [IDENT *(("," / SPACE) IDENT)] ")" * ``` */ -export function parseParams(s: ITokenStream): { name: string, argType?: Cst.Node }[] { - const items: { name: string, argType?: Cst.Node }[] = []; +export function parseParams(s: ITokenStream): { name: string, argType?: Ast.Node }[] { + const items: { name: string, argType?: Ast.Node }[] = []; s.nextWith(TokenKind.OpenParen); @@ -49,14 +49,14 @@ export function parseParams(s: ITokenStream): { name: string, argType?: Cst.Node * Block = "{" *Statement "}" * ``` */ -export function parseBlock(s: ITokenStream): Cst.Node[] { +export function parseBlock(s: ITokenStream): Ast.Node[] { s.nextWith(TokenKind.OpenBrace); while (s.kind === TokenKind.NewLine) { s.next(); } - const steps: Cst.Node[] = []; + const steps: Ast.Node[] = []; while (s.kind !== TokenKind.CloseBrace) { steps.push(parseStatement(s)); @@ -75,7 +75,7 @@ export function parseBlock(s: ITokenStream): Cst.Node[] { //#region Type -export function parseType(s: ITokenStream): Cst.Node { +export function parseType(s: ITokenStream): Ast.Node { if (s.kind === TokenKind.At) { return parseFnType(s); } else { @@ -89,13 +89,13 @@ export function parseType(s: ITokenStream): Cst.Node { * ParamTypes = [Type *(("," / SPACE) Type)] * ``` */ -function parseFnType(s: ITokenStream): Cst.Node { +function parseFnType(s: ITokenStream): Ast.Node { const loc = s.token.loc; s.nextWith(TokenKind.At); s.nextWith(TokenKind.OpenParen); - const params: Cst.Node[] = []; + const params: Ast.Node[] = []; while (s.kind !== TokenKind.CloseParen) { if (params.length > 0) { if (s.kind === TokenKind.Comma) { @@ -121,7 +121,7 @@ function parseFnType(s: ITokenStream): Cst.Node { * NamedType = IDENT ["<" Type ">"] * ``` */ -function parseNamedType(s: ITokenStream): Cst.Node { +function parseNamedType(s: ITokenStream): Ast.Node { const loc = s.token.loc; s.expect(TokenKind.Identifier); diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 7e0614ef..d83a276f 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -1,14 +1,14 @@ import { AiScriptSyntaxError } from '../../error.js'; -import { CALL_NODE, NODE } from '../node.js'; +import { CALL_NODE, NODE } from '../../node.js'; import { TokenStream } from '../streams/token-stream.js'; import { TokenKind } from '../token.js'; import { parseBlock, parseParams, parseType } from './common.js'; import { parseBlockOrStatement } from './statements.js'; -import type * as Cst from '../node.js'; +import type * as Ast from '../../node.js'; import type { ITokenStream } from '../streams/token-stream.js'; -export function parseExpr(s: ITokenStream, isStatic: boolean): Cst.Node { +export function parseExpr(s: ITokenStream, isStatic: boolean): Ast.Node { if (isStatic) { return parseAtom(s, true); } else { @@ -51,7 +51,7 @@ const operators: OpInfo[] = [ { opKind: 'infix', kind: TokenKind.Or2, lbp: 2, rbp: 3 }, ]; -function parsePrefix(s: ITokenStream, minBp: number): Cst.Node { +function parsePrefix(s: ITokenStream, minBp: number): Ast.Node { const loc = s.token.loc; const op = s.kind; s.next(); @@ -94,7 +94,7 @@ function parsePrefix(s: ITokenStream, minBp: number): Cst.Node { } } -function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { +function parseInfix(s: ITokenStream, left: Ast.Node, minBp: number): Ast.Node { const loc = s.token.loc; const op = s.kind; s.next(); @@ -167,7 +167,7 @@ function parseInfix(s: ITokenStream, left: Cst.Node, minBp: number): Cst.Node { } } -function parsePostfix(s: ITokenStream, expr: Cst.Node): Cst.Node { +function parsePostfix(s: ITokenStream, expr: Ast.Node): Ast.Node { const loc = s.token.loc; const op = s.kind; @@ -191,7 +191,7 @@ function parsePostfix(s: ITokenStream, expr: Cst.Node): Cst.Node { } } -function parseAtom(s: ITokenStream, isStatic: boolean): Cst.Node { +function parseAtom(s: ITokenStream, isStatic: boolean): Ast.Node { const loc = s.token.loc; switch (s.kind) { @@ -216,7 +216,7 @@ function parseAtom(s: ITokenStream, isStatic: boolean): Cst.Node { return parseExists(s); } case TokenKind.Template: { - const values: (string | Cst.Node)[] = []; + const values: (string | Ast.Node)[] = []; if (isStatic) break; @@ -289,9 +289,9 @@ function parseAtom(s: ITokenStream, isStatic: boolean): Cst.Node { /** * Call = "(" [Expr *(("," / SPACE) Expr)] ")" */ -function parseCall(s: ITokenStream, target: Cst.Node): Cst.Node { +function parseCall(s: ITokenStream, target: Ast.Node): Ast.Node { const loc = s.token.loc; - const items: Cst.Node[] = []; + const items: Ast.Node[] = []; s.nextWith(TokenKind.OpenParen); @@ -321,7 +321,7 @@ function parseCall(s: ITokenStream, target: Cst.Node): Cst.Node { * If = "if" Expr BlockOrStatement *("elif" Expr BlockOrStatement) ["else" BlockOrStatement] * ``` */ -function parseIf(s: ITokenStream): Cst.Node { +function parseIf(s: ITokenStream): Ast.Node { const loc = s.token.loc; s.nextWith(TokenKind.IfKeyword); @@ -332,7 +332,7 @@ function parseIf(s: ITokenStream): Cst.Node { s.next(); } - const elseif: { cond: Cst.Node, then: Cst.Node }[] = []; + const elseif: { cond: Ast.Node, then: Ast.Node }[] = []; while (s.kind === TokenKind.ElifKeyword) { s.next(); const elifCond = parseExpr(s, false); @@ -357,7 +357,7 @@ function parseIf(s: ITokenStream): Cst.Node { * FnExpr = "@" Params [":" Type] Block * ``` */ -function parseFnExpr(s: ITokenStream): Cst.Node { +function parseFnExpr(s: ITokenStream): Ast.Node { const loc = s.token.loc; s.nextWith(TokenKind.At); @@ -380,7 +380,7 @@ function parseFnExpr(s: ITokenStream): Cst.Node { * Match = "match" Expr "{" *("case" Expr "=>" BlockOrStatement) ["default" "=>" BlockOrStatement] "}" * ``` */ -function parseMatch(s: ITokenStream): Cst.Node { +function parseMatch(s: ITokenStream): Ast.Node { const loc = s.token.loc; s.nextWith(TokenKind.MatchKeyword); @@ -389,7 +389,7 @@ function parseMatch(s: ITokenStream): Cst.Node { s.nextWith(TokenKind.OpenBrace); s.nextWith(TokenKind.NewLine); - const qs: { q: Cst.Node, a: Cst.Node }[] = []; + const qs: { q: Ast.Node, a: Ast.Node }[] = []; while (s.kind !== TokenKind.DefaultKeyword && s.kind !== TokenKind.CloseBrace) { s.nextWith(TokenKind.CaseKeyword); const q = parseExpr(s, false); @@ -417,7 +417,7 @@ function parseMatch(s: ITokenStream): Cst.Node { * Eval = "eval" Block * ``` */ -function parseEval(s: ITokenStream): Cst.Node { +function parseEval(s: ITokenStream): Ast.Node { const loc = s.token.loc; s.nextWith(TokenKind.EvalKeyword); @@ -430,7 +430,7 @@ function parseEval(s: ITokenStream): Cst.Node { * Exists = "exists" Reference * ``` */ -function parseExists(s: ITokenStream): Cst.Node { +function parseExists(s: ITokenStream): Ast.Node { const loc = s.token.loc; s.nextWith(TokenKind.ExistsKeyword); @@ -443,7 +443,7 @@ function parseExists(s: ITokenStream): Cst.Node { * Reference = IDENT *(":" IDENT) * ``` */ -function parseReference(s: ITokenStream): Cst.Node { +function parseReference(s: ITokenStream): Ast.Node { const loc = s.token.loc; const segs: string[] = []; @@ -467,7 +467,7 @@ function parseReference(s: ITokenStream): Cst.Node { * Object = "{" [IDENT ":" Expr *(("," / ";" / SPACE) IDENT ":" Expr) ["," / ";"]] "}" * ``` */ -function parseObject(s: ITokenStream, isStatic: boolean): Cst.Node { +function parseObject(s: ITokenStream, isStatic: boolean): Ast.Node { const loc = s.token.loc; s.nextWith(TokenKind.OpenBrace); @@ -518,7 +518,7 @@ function parseObject(s: ITokenStream, isStatic: boolean): Cst.Node { * Array = "[" [Expr *(("," / SPACE) Expr) [","]] "]" * ``` */ -function parseArray(s: ITokenStream, isStatic: boolean): Cst.Node { +function parseArray(s: ITokenStream, isStatic: boolean): Ast.Node { const loc = s.token.loc; s.nextWith(TokenKind.OpenBracket); @@ -561,11 +561,11 @@ type InfixInfo = { opKind: 'infix', kind: TokenKind, lbp: number, rbp: number }; type PostfixInfo = { opKind: 'postfix', kind: TokenKind, bp: number }; type OpInfo = PrefixInfo | InfixInfo | PostfixInfo; -function parsePratt(s: ITokenStream, minBp: number): Cst.Node { +function parsePratt(s: ITokenStream, minBp: number): Ast.Node { // pratt parsing // https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html - let left: Cst.Node; + let left: Ast.Node; const tokenKind = s.kind; const prefix = operators.find((x): x is PrefixInfo => x.opKind === 'prefix' && x.kind === tokenKind); diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index 7cd88f6e..6fdc2a2e 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -1,10 +1,10 @@ import { AiScriptSyntaxError } from '../../error.js'; -import { CALL_NODE, NODE } from '../node.js'; +import { CALL_NODE, NODE } from '../../node.js'; import { TokenKind } from '../token.js'; import { parseBlock, parseParams, parseType } from './common.js'; import { parseExpr } from './expressions.js'; -import type * as Cst from '../node.js'; +import type * as Ast from '../../node.js'; import type { ITokenStream } from '../streams/token-stream.js'; /** @@ -13,7 +13,7 @@ import type { ITokenStream } from '../streams/token-stream.js'; * / Break / Continue / Assign / Expr * ``` */ -export function parseStatement(s: ITokenStream): Cst.Node { +export function parseStatement(s: ITokenStream): Ast.Node { const loc = s.token.loc; switch (s.kind) { @@ -62,7 +62,7 @@ export function parseStatement(s: ITokenStream): Cst.Node { return expr; } -export function parseDefStatement(s: ITokenStream): Cst.Node { +export function parseDefStatement(s: ITokenStream): Ast.Node { switch (s.kind) { case TokenKind.VarKeyword: case TokenKind.LetKeyword: { @@ -82,7 +82,7 @@ export function parseDefStatement(s: ITokenStream): Cst.Node { * BlockOrStatement = Block / Statement * ``` */ -export function parseBlockOrStatement(s: ITokenStream): Cst.Node { +export function parseBlockOrStatement(s: ITokenStream): Ast.Node { const loc = s.token.loc; if (s.kind === TokenKind.OpenBrace) { @@ -98,7 +98,7 @@ export function parseBlockOrStatement(s: ITokenStream): Cst.Node { * VarDef = ("let" / "var") IDENT [":" Type] "=" Expr * ``` */ -function parseVarDef(s: ITokenStream): Cst.Node { +function parseVarDef(s: ITokenStream): Ast.Node { const loc = s.token.loc; let mut; @@ -143,7 +143,7 @@ function parseVarDef(s: ITokenStream): Cst.Node { * FnDef = "@" IDENT Params [":" Type] Block * ``` */ -function parseFnDef(s: ITokenStream): Cst.Node { +function parseFnDef(s: ITokenStream): Ast.Node { const loc = s.token.loc; s.nextWith(TokenKind.At); @@ -179,7 +179,7 @@ function parseFnDef(s: ITokenStream): Cst.Node { * Out = "<:" Expr * ``` */ -function parseOut(s: ITokenStream): Cst.Node { +function parseOut(s: ITokenStream): Ast.Node { const loc = s.token.loc; s.nextWith(TokenKind.Out); @@ -193,7 +193,7 @@ function parseOut(s: ITokenStream): Cst.Node { * / "each" "(" "let" IDENT ("," / SPACE) Expr ")" BlockOrStatement * ``` */ -function parseEach(s: ITokenStream): Cst.Node { +function parseEach(s: ITokenStream): Ast.Node { const loc = s.token.loc; let hasParen = false; @@ -231,7 +231,7 @@ function parseEach(s: ITokenStream): Cst.Node { }, loc); } -function parseFor(s: ITokenStream): Cst.Node { +function parseFor(s: ITokenStream): Ast.Node { const loc = s.token.loc; let hasParen = false; @@ -303,7 +303,7 @@ function parseFor(s: ITokenStream): Cst.Node { * Return = "return" Expr * ``` */ -function parseReturn(s: ITokenStream): Cst.Node { +function parseReturn(s: ITokenStream): Ast.Node { const loc = s.token.loc; s.nextWith(TokenKind.ReturnKeyword); @@ -316,10 +316,10 @@ function parseReturn(s: ITokenStream): Cst.Node { * StatementWithAttr = *Attr Statement * ``` */ -function parseStatementWithAttr(s: ITokenStream): Cst.Node { - const attrs: Cst.Attribute[] = []; +function parseStatementWithAttr(s: ITokenStream): Ast.Node { + const attrs: Ast.Attribute[] = []; while (s.kind === TokenKind.OpenSharpBracket) { - attrs.push(parseAttr(s) as Cst.Attribute); + attrs.push(parseAttr(s) as Ast.Attribute); s.nextWith(TokenKind.NewLine); } @@ -342,7 +342,7 @@ function parseStatementWithAttr(s: ITokenStream): Cst.Node { * Attr = "#[" IDENT [StaticExpr] "]" * ``` */ -function parseAttr(s: ITokenStream): Cst.Node { +function parseAttr(s: ITokenStream): Ast.Node { const loc = s.token.loc; s.nextWith(TokenKind.OpenSharpBracket); @@ -368,7 +368,7 @@ function parseAttr(s: ITokenStream): Cst.Node { * Loop = "loop" Block * ``` */ -function parseLoop(s: ITokenStream): Cst.Node { +function parseLoop(s: ITokenStream): Ast.Node { const loc = s.token.loc; s.nextWith(TokenKind.LoopKeyword); @@ -381,7 +381,7 @@ function parseLoop(s: ITokenStream): Cst.Node { * Assign = Expr ("=" / "+=" / "-=") Expr * ``` */ -function tryParseAssign(s: ITokenStream, dest: Cst.Node): Cst.Node | undefined { +function tryParseAssign(s: ITokenStream, dest: Ast.Node): Ast.Node | undefined { const loc = s.token.loc; // Assign diff --git a/src/parser/syntaxes/toplevel.ts b/src/parser/syntaxes/toplevel.ts index 7b23a676..20191278 100644 --- a/src/parser/syntaxes/toplevel.ts +++ b/src/parser/syntaxes/toplevel.ts @@ -1,10 +1,10 @@ -import { NODE } from '../node.js'; +import { NODE } from '../../node.js'; import { TokenKind } from '../token.js'; import { AiScriptSyntaxError } from '../../error.js'; import { parseDefStatement, parseStatement } from './statements.js'; import { parseExpr } from './expressions.js'; -import type * as Cst from '../node.js'; +import type * as Ast from '../../node.js'; import type { ITokenStream } from '../streams/token-stream.js'; /** @@ -12,8 +12,8 @@ import type { ITokenStream } from '../streams/token-stream.js'; * TopLevel = *(Namespace / Meta / Statement) * ``` */ -export function parseTopLevel(s: ITokenStream): Cst.Node[] { - const nodes: Cst.Node[] = []; +export function parseTopLevel(s: ITokenStream): Ast.Node[] { + const nodes: Ast.Node[] = []; while (s.kind === TokenKind.NewLine) { s.next(); @@ -51,7 +51,7 @@ export function parseTopLevel(s: ITokenStream): Cst.Node[] { * Namespace = "::" IDENT "{" *(VarDef / FnDef / Namespace) "}" * ``` */ -export function parseNamespace(s: ITokenStream): Cst.Node { +export function parseNamespace(s: ITokenStream): Ast.Node { const loc = s.token.loc; s.nextWith(TokenKind.Colon2); @@ -60,7 +60,7 @@ export function parseNamespace(s: ITokenStream): Cst.Node { const name = s.token.value!; s.next(); - const members: Cst.Node[] = []; + const members: Ast.Node[] = []; s.nextWith(TokenKind.OpenBrace); while (s.kind === TokenKind.NewLine) { @@ -98,7 +98,7 @@ export function parseNamespace(s: ITokenStream): Cst.Node { * Meta = "###" [IDENT] StaticExpr * ``` */ -export function parseMeta(s: ITokenStream): Cst.Node { +export function parseMeta(s: ITokenStream): Ast.Node { const loc = s.token.loc; s.nextWith(TokenKind.Sharp3); diff --git a/src/parser/visit.ts b/src/parser/visit.ts index b49257bb..29b5cbdf 100644 --- a/src/parser/visit.ts +++ b/src/parser/visit.ts @@ -1,127 +1,127 @@ -import type * as Cst from './node.js'; +import type * as Ast from '../node.js'; -export function visitNode(node: Cst.Node, fn: (node: Cst.Node) => Cst.Node): Cst.Node { +export function visitNode(node: Ast.Node, fn: (node: Ast.Node) => Ast.Node): Ast.Node { const result = fn(node); // nested nodes switch (result.type) { case 'def': { - result.expr = visitNode(result.expr, fn) as Cst.Definition['expr']; + result.expr = visitNode(result.expr, fn) as Ast.Definition['expr']; break; } case 'return': { - result.expr = visitNode(result.expr, fn) as Cst.Return['expr']; + result.expr = visitNode(result.expr, fn) as Ast.Return['expr']; break; } case 'each': { - result.items = visitNode(result.items, fn) as Cst.Each['items']; - result.for = visitNode(result.for, fn) as Cst.Each['for']; + result.items = visitNode(result.items, fn) as Ast.Each['items']; + result.for = visitNode(result.for, fn) as Ast.Each['for']; break; } case 'for': { if (result.from != null) { - result.from = visitNode(result.from, fn) as Cst.For['from']; + result.from = visitNode(result.from, fn) as Ast.For['from']; } if (result.to != null) { - result.to = visitNode(result.to, fn) as Cst.For['to']; + result.to = visitNode(result.to, fn) as Ast.For['to']; } if (result.times != null) { - result.times = visitNode(result.times, fn) as Cst.For['times']; + result.times = visitNode(result.times, fn) as Ast.For['times']; } - result.for = visitNode(result.for, fn) as Cst.For['for']; + result.for = visitNode(result.for, fn) as Ast.For['for']; break; } case 'loop': { for (let i = 0; i < result.statements.length; i++) { - result.statements[i] = visitNode(result.statements[i]!, fn) as Cst.Loop['statements'][number]; + result.statements[i] = visitNode(result.statements[i]!, fn) as Ast.Loop['statements'][number]; } break; } case 'addAssign': case 'subAssign': case 'assign': { - result.expr = visitNode(result.expr, fn) as Cst.Assign['expr']; - result.dest = visitNode(result.dest, fn) as Cst.Assign['dest']; + result.expr = visitNode(result.expr, fn) as Ast.Assign['expr']; + result.dest = visitNode(result.dest, fn) as Ast.Assign['dest']; break; } case 'not': { - result.expr = visitNode(result.expr, fn) as Cst.Return['expr']; + result.expr = visitNode(result.expr, fn) as Ast.Return['expr']; break; } case 'if': { - result.cond = visitNode(result.cond, fn) as Cst.If['cond']; - result.then = visitNode(result.then, fn) as Cst.If['then']; + result.cond = visitNode(result.cond, fn) as Ast.If['cond']; + result.then = visitNode(result.then, fn) as Ast.If['then']; for (const prop of result.elseif) { - prop.cond = visitNode(prop.cond, fn) as Cst.If['elseif'][number]['cond']; - prop.then = visitNode(prop.then, fn) as Cst.If['elseif'][number]['then']; + prop.cond = visitNode(prop.cond, fn) as Ast.If['elseif'][number]['cond']; + prop.then = visitNode(prop.then, fn) as Ast.If['elseif'][number]['then']; } if (result.else != null) { - result.else = visitNode(result.else, fn) as Cst.If['else']; + result.else = visitNode(result.else, fn) as Ast.If['else']; } break; } case 'fn': { for (let i = 0; i < result.children.length; i++) { - result.children[i] = visitNode(result.children[i]!, fn) as Cst.Fn['children'][number]; + result.children[i] = visitNode(result.children[i]!, fn) as Ast.Fn['children'][number]; } break; } case 'match': { - result.about = visitNode(result.about, fn) as Cst.Match['about']; + result.about = visitNode(result.about, fn) as Ast.Match['about']; for (const prop of result.qs) { - prop.q = visitNode(prop.q, fn) as Cst.Match['qs'][number]['q']; - prop.a = visitNode(prop.a, fn) as Cst.Match['qs'][number]['a']; + prop.q = visitNode(prop.q, fn) as Ast.Match['qs'][number]['q']; + prop.a = visitNode(prop.a, fn) as Ast.Match['qs'][number]['a']; } if (result.default != null) { - result.default = visitNode(result.default, fn) as Cst.Match['default']; + result.default = visitNode(result.default, fn) as Ast.Match['default']; } break; } case 'block': { for (let i = 0; i < result.statements.length; i++) { - result.statements[i] = visitNode(result.statements[i]!, fn) as Cst.Block['statements'][number]; + result.statements[i] = visitNode(result.statements[i]!, fn) as Ast.Block['statements'][number]; } break; } case 'exists': { - result.identifier = visitNode(result.identifier,fn) as Cst.Exists['identifier']; + result.identifier = visitNode(result.identifier,fn) as Ast.Exists['identifier']; break; } case 'tmpl': { for (let i = 0; i < result.tmpl.length; i++) { const item = result.tmpl[i]!; if (typeof item !== 'string') { - result.tmpl[i] = visitNode(item, fn) as Cst.Tmpl['tmpl'][number]; + result.tmpl[i] = visitNode(item, fn) as Ast.Tmpl['tmpl'][number]; } } break; } case 'obj': { for (const item of result.value) { - result.value.set(item[0], visitNode(item[1], fn) as Cst.Expression); + result.value.set(item[0], visitNode(item[1], fn) as Ast.Expression); } break; } case 'arr': { for (let i = 0; i < result.value.length; i++) { - result.value[i] = visitNode(result.value[i]!, fn) as Cst.Arr['value'][number]; + result.value[i] = visitNode(result.value[i]!, fn) as Ast.Arr['value'][number]; } break; } case 'call': { - result.target = visitNode(result.target, fn) as Cst.Call['target']; + result.target = visitNode(result.target, fn) as Ast.Call['target']; for (let i = 0; i < result.args.length; i++) { - result.args[i] = visitNode(result.args[i]!, fn) as Cst.Call['args'][number]; + result.args[i] = visitNode(result.args[i]!, fn) as Ast.Call['args'][number]; } break; } case 'index': { - result.target = visitNode(result.target, fn) as Cst.Index['target']; - result.index = visitNode(result.index, fn) as Cst.Index['index']; + result.target = visitNode(result.target, fn) as Ast.Index['target']; + result.index = visitNode(result.index, fn) as Ast.Index['index']; break; } case 'prop': { - result.target = visitNode(result.target, fn) as Cst.Prop['target']; + result.target = visitNode(result.target, fn) as Ast.Prop['target']; break; } case 'ns': { @@ -133,8 +133,8 @@ export function visitNode(node: Cst.Node, fn: (node: Cst.Node) => Cst.Node): Cst case 'or': case 'and': { - result.left = visitNode(result.left, fn) as (Cst.And | Cst.Or)['left']; - result.right = visitNode(result.right, fn) as (Cst.And | Cst.Or)['right']; + result.left = visitNode(result.left, fn) as (Ast.And | Ast.Or)['left']; + result.right = visitNode(result.right, fn) as (Ast.And | Ast.Or)['right']; break; } } From 9fd5c56792c40e85d7dd5cd8fe7cd52a8516dea6 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 8 Oct 2023 10:37:26 +0900 Subject: [PATCH 121/126] move node utility --- src/node.ts | 18 ------------------ src/parser/syntaxes/common.ts | 2 +- src/parser/syntaxes/expressions.ts | 2 +- src/parser/syntaxes/statements.ts | 2 +- src/parser/syntaxes/toplevel.ts | 2 +- src/parser/utils.ts | 19 +++++++++++++++++++ 6 files changed, 23 insertions(+), 22 deletions(-) create mode 100644 src/parser/utils.ts diff --git a/src/node.ts b/src/node.ts index 2d110339..caa33196 100644 --- a/src/node.ts +++ b/src/node.ts @@ -2,24 +2,6 @@ * ASTノード */ -export function NODE(type: string, params: Record, loc: { column: number, line: number }): Node { - const node: Record = { type }; - for (const key of Object.keys(params)) { - if (params[key] !== undefined) { - node[key] = params[key]; - } - } - node.loc = loc; - return node as Node; -} - -export function CALL_NODE(name: string, args: Node[], loc: { column: number, line: number }): Node { - return NODE('call', { - target: NODE('identifier', { name }, loc), - args, - }, loc); -} - export type Loc = { line: number; column: number; diff --git a/src/parser/syntaxes/common.ts b/src/parser/syntaxes/common.ts index affd6334..311b4f85 100644 --- a/src/parser/syntaxes/common.ts +++ b/src/parser/syntaxes/common.ts @@ -1,6 +1,6 @@ import { TokenKind } from '../token.js'; import { AiScriptSyntaxError } from '../../error.js'; -import { NODE } from '../../node.js'; +import { NODE } from '../utils.js'; import { parseStatement } from './statements.js'; import type { ITokenStream } from '../streams/token-stream.js'; diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index d83a276f..0440af5c 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -1,5 +1,5 @@ import { AiScriptSyntaxError } from '../../error.js'; -import { CALL_NODE, NODE } from '../../node.js'; +import { CALL_NODE, NODE } from '../utils.js'; import { TokenStream } from '../streams/token-stream.js'; import { TokenKind } from '../token.js'; import { parseBlock, parseParams, parseType } from './common.js'; diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts index 6fdc2a2e..f36ba94d 100644 --- a/src/parser/syntaxes/statements.ts +++ b/src/parser/syntaxes/statements.ts @@ -1,5 +1,5 @@ import { AiScriptSyntaxError } from '../../error.js'; -import { CALL_NODE, NODE } from '../../node.js'; +import { CALL_NODE, NODE } from '../utils.js'; import { TokenKind } from '../token.js'; import { parseBlock, parseParams, parseType } from './common.js'; import { parseExpr } from './expressions.js'; diff --git a/src/parser/syntaxes/toplevel.ts b/src/parser/syntaxes/toplevel.ts index 20191278..cd6f8ba2 100644 --- a/src/parser/syntaxes/toplevel.ts +++ b/src/parser/syntaxes/toplevel.ts @@ -1,4 +1,4 @@ -import { NODE } from '../../node.js'; +import { NODE } from '../utils.js'; import { TokenKind } from '../token.js'; import { AiScriptSyntaxError } from '../../error.js'; import { parseDefStatement, parseStatement } from './statements.js'; diff --git a/src/parser/utils.ts b/src/parser/utils.ts new file mode 100644 index 00000000..456764e5 --- /dev/null +++ b/src/parser/utils.ts @@ -0,0 +1,19 @@ +import type * as Ast from '../node.js'; + +export function NODE(type: string, params: Record, loc: { column: number, line: number }): Ast.Node { + const node: Record = { type }; + for (const key of Object.keys(params)) { + if (params[key] !== undefined) { + node[key] = params[key]; + } + } + node.loc = loc; + return node as Ast.Node; +} + +export function CALL_NODE(name: string, args: Ast.Node[], loc: { column: number, line: number }): Ast.Node { + return NODE('call', { + target: NODE('identifier', { name }, loc), + args, + }, loc); +} From ab3b201e326c59cadeeea57f7fc1dabe34f54e7f Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 8 Oct 2023 10:37:42 +0900 Subject: [PATCH 122/126] api --- etc/aiscript.api.md | 385 +------------------------------------------- 1 file changed, 6 insertions(+), 379 deletions(-) diff --git a/etc/aiscript.api.md b/etc/aiscript.api.md index c430eb08..82532fc2 100644 --- a/etc/aiscript.api.md +++ b/etc/aiscript.api.md @@ -13,15 +13,6 @@ type AddAssign = NodeBase & { expr: Expression; }; -// Warning: (ae-forgotten-export) The symbol "NodeBase_2" needs to be exported by the entry point index.d.ts -// -// @public (undocumented) -type AddAssign_2 = NodeBase_2 & { - type: 'addAssign'; - dest: Expression_2; - expr: Expression_2; -}; - // @public (undocumented) abstract class AiScriptError extends Error { constructor(message: string, info?: any); @@ -64,13 +55,6 @@ type And = NodeBase & { right: Expression; }; -// @public (undocumented) -type And_2 = NodeBase_2 & { - type: 'and'; - left: Expression_2; - right: Expression_2; -}; - // @public (undocumented) const ARR: (arr: VArr['value']) => VArr; @@ -80,14 +64,6 @@ type Arr = NodeBase & { value: Expression[]; }; -// Warning: (ae-forgotten-export) The symbol "ChainProp" needs to be exported by the entry point index.d.ts -// -// @public (undocumented) -type Arr_2 = NodeBase_2 & ChainProp & { - type: 'arr'; - value: Expression_2[]; -}; - // @public (undocumented) function assertArray(val: Value | null | undefined): asserts val is VArr; @@ -113,23 +89,15 @@ type Assign = NodeBase & { expr: Expression; }; -// @public (undocumented) -type Assign_2 = NodeBase_2 & { - type: 'assign'; - dest: Expression_2; - expr: Expression_2; -}; - declare namespace Ast { export { isStatement, isExpression, Loc, Node_2 as Node, - Statement, - Expression, Namespace, Meta, + Statement, Definition, Attribute, Return, @@ -141,6 +109,7 @@ declare namespace Ast { AddAssign, SubAssign, Assign, + Expression, Not, And, Or, @@ -182,25 +151,12 @@ type Attribute = NodeBase & { value: Expression; }; -// @public (undocumented) -type Attribute_2 = NodeBase_2 & { - type: 'attr'; - name: string; - value: Expression_2; -}; - // @public (undocumented) type Block = NodeBase & { type: 'block'; statements: (Statement | Expression)[]; }; -// @public (undocumented) -type Block_2 = NodeBase_2 & ChainProp & { - type: 'block'; - statements: (Statement_2 | Expression_2)[]; -}; - // @public (undocumented) const BOOL: (bool: VBool['value']) => VBool; @@ -210,12 +166,6 @@ type Bool = NodeBase & { value: boolean; }; -// @public (undocumented) -type Bool_2 = NodeBase_2 & ChainProp & { - type: 'bool'; - value: boolean; -}; - // @public (undocumented) const BREAK: () => Value; @@ -224,17 +174,6 @@ type Break = NodeBase & { type: 'break'; }; -// @public (undocumented) -type Break_2 = NodeBase_2 & { - type: 'break'; -}; - -// @public (undocumented) -function CALL(target: Call_2['target'], args: Call_2['args'], loc?: { - start: number; - end: number; -}): Call_2; - // @public (undocumented) type Call = NodeBase & { type: 'call'; @@ -242,22 +181,6 @@ type Call = NodeBase & { args: Expression[]; }; -// @public (undocumented) -type Call_2 = NodeBase_2 & { - type: 'call'; - target: Expression_2; - args: Expression_2[]; -}; - -// @public (undocumented) -type CallChain = NodeBase_2 & { - type: 'callChain'; - args: Expression_2[]; -}; - -// @public (undocumented) -type ChainMember = CallChain | IndexChain | PropChain; - // @public (undocumented) const CONTINUE: () => Value; @@ -266,67 +189,6 @@ type Continue = NodeBase & { type: 'continue'; }; -// @public (undocumented) -type Continue_2 = NodeBase_2 & { - type: 'continue'; -}; - -declare namespace Cst { - export { - isStatement_2 as isStatement, - isExpression_2 as isExpression, - hasChainProp, - CALL, - INDEX, - PROP, - Node_3 as Node, - Statement_2 as Statement, - Expression_2 as Expression, - Namespace_2 as Namespace, - Meta_2 as Meta, - Definition_2 as Definition, - Attribute_2 as Attribute, - Return_2 as Return, - Each_2 as Each, - For_2 as For, - Loop_2 as Loop, - Break_2 as Break, - Continue_2 as Continue, - AddAssign_2 as AddAssign, - SubAssign_2 as SubAssign, - Assign_2 as Assign, - InfixOperator, - Infix, - Not_2 as Not, - And_2 as And, - Or_2 as Or, - If_2 as If, - Fn_2 as Fn, - Match_2 as Match, - Block_2 as Block, - Exists_2 as Exists, - Tmpl_2 as Tmpl, - Str_2 as Str, - Num_2 as Num, - Bool_2 as Bool, - Null_2 as Null, - Obj_2 as Obj, - Arr_2 as Arr, - Identifier_2 as Identifier, - ChainMember, - CallChain, - IndexChain, - PropChain, - Call_2 as Call, - Index_2 as Index, - Prop_2 as Prop, - TypeSource_2 as TypeSource, - NamedTypeSource_2 as NamedTypeSource, - FnTypeSource_2 as FnTypeSource - } -} -export { Cst } - // @public (undocumented) type Definition = NodeBase & { type: 'def'; @@ -337,16 +199,6 @@ type Definition = NodeBase & { attr: Attribute[]; }; -// @public (undocumented) -type Definition_2 = NodeBase_2 & { - type: 'def'; - name: string; - varType?: TypeSource_2; - expr: Expression_2; - mut: boolean; - attr?: Attribute_2[]; -}; - // @public (undocumented) type Each = NodeBase & { type: 'each'; @@ -355,14 +207,6 @@ type Each = NodeBase & { for: Statement | Expression; }; -// @public (undocumented) -type Each_2 = NodeBase_2 & { - type: 'each'; - var: string; - items: Expression_2; - for: Statement_2 | Expression_2; -}; - // @public (undocumented) function eq(a: Value, b: Value): boolean; @@ -387,23 +231,12 @@ type Exists = NodeBase & { identifier: Identifier; }; -// @public (undocumented) -type Exists_2 = NodeBase_2 & ChainProp & { - type: 'exists'; - identifier: Identifier_2; -}; - // @public (undocumented) function expectAny(val: Value | null | undefined): asserts val is Value; // @public (undocumented) type Expression = If | Fn | Match | Block | Exists | Tmpl | Str | Num | Bool | Null | Obj | Arr | Not | And | Or | Identifier | Call | Index | Prop; -// @public (undocumented) -type Expression_2 = Infix | Not_2 | And_2 | Or_2 | If_2 | Fn_2 | Match_2 | Block_2 | Exists_2 | Tmpl_2 | Str_2 | Num_2 | Bool_2 | Null_2 | Obj_2 | Arr_2 | Identifier_2 | Call_2 | // IR -Index_2 | // IR -Prop_2; - // @public (undocumented) const FALSE: { type: "bool"; @@ -424,17 +257,6 @@ type Fn = NodeBase & { children: (Statement | Expression)[]; }; -// @public (undocumented) -type Fn_2 = NodeBase_2 & ChainProp & { - type: 'fn'; - args: { - name: string; - argType?: TypeSource_2; - }[]; - retType?: TypeSource_2; - children: (Statement_2 | Expression_2)[]; -}; - // @public (undocumented) const FN_NATIVE: (fn: VFn['native']) => VFn; @@ -445,13 +267,6 @@ type FnTypeSource = NodeBase & { result: TypeSource; }; -// @public (undocumented) -type FnTypeSource_2 = NodeBase_2 & { - type: 'fnTypeSource'; - args: TypeSource_2[]; - result: TypeSource_2; -}; - // @public (undocumented) type For = NodeBase & { type: 'for'; @@ -462,34 +277,15 @@ type For = NodeBase & { for: Statement | Expression; }; -// @public (undocumented) -type For_2 = NodeBase_2 & { - type: 'for'; - var?: string; - from?: Expression_2; - to?: Expression_2; - times?: Expression_2; - for: Statement_2 | Expression_2; -}; - // @public (undocumented) function getLangVersion(input: string): string | null; -// @public (undocumented) -function hasChainProp(x: T): x is T & ChainProp; - // @public (undocumented) type Identifier = NodeBase & { type: 'identifier'; name: string; }; -// @public (undocumented) -type Identifier_2 = NodeBase_2 & ChainProp & { - type: 'identifier'; - name: string; -}; - // @public (undocumented) type If = NodeBase & { type: 'if'; @@ -502,24 +298,6 @@ type If = NodeBase & { else?: Statement | Expression; }; -// @public (undocumented) -type If_2 = NodeBase_2 & { - type: 'if'; - cond: Expression_2; - then: Statement_2 | Expression_2; - elseif: { - cond: Expression_2; - then: Statement_2 | Expression_2; - }[]; - else?: Statement_2 | Expression_2; -}; - -// @public (undocumented) -function INDEX(target: Index_2['target'], index: Index_2['index'], loc?: { - start: number; - end: number; -}): Index_2; - // @public (undocumented) type Index = NodeBase & { type: 'index'; @@ -527,29 +305,6 @@ type Index = NodeBase & { index: Expression; }; -// @public (undocumented) -type Index_2 = NodeBase_2 & { - type: 'index'; - target: Expression_2; - index: Expression_2; -}; - -// @public (undocumented) -type IndexChain = NodeBase_2 & { - type: 'indexChain'; - index: Expression_2; -}; - -// @public (undocumented) -type Infix = NodeBase_2 & { - type: 'infix'; - operands: Expression_2[]; - operators: InfixOperator[]; -}; - -// @public (undocumented) -type InfixOperator = '||' | '&&' | '==' | '!=' | '<=' | '>=' | '<' | '>' | '+' | '-' | '*' | '^' | '/' | '%'; - // @public (undocumented) export class Interpreter { constructor(consts: Record, opts?: { @@ -586,9 +341,6 @@ function isBoolean(val: Value): val is VBool; // @public (undocumented) function isExpression(x: Node_2): x is Expression; -// @public (undocumented) -function isExpression_2(x: Node_3): x is Expression_2; - // @public (undocumented) function isFunction(val: Value): val is VFn; @@ -601,9 +353,6 @@ function isObject(val: Value): val is VObj; // @public (undocumented) function isStatement(x: Node_2): x is Statement; -// @public (undocumented) -function isStatement_2(x: Node_3): x is Statement_2; - // @public (undocumented) function isString(val: Value): val is VStr; @@ -612,8 +361,8 @@ function jsToVal(val: any): Value; // @public type Loc = { - start: number; - end: number; + line: number; + column: number; }; // @public (undocumented) @@ -622,12 +371,6 @@ type Loop = NodeBase & { statements: (Statement | Expression)[]; }; -// @public (undocumented) -type Loop_2 = NodeBase_2 & { - type: 'loop'; - statements: (Statement_2 | Expression_2)[]; -}; - // @public (undocumented) type Match = NodeBase & { type: 'match'; @@ -639,17 +382,6 @@ type Match = NodeBase & { default?: Statement | Expression; }; -// @public (undocumented) -type Match_2 = NodeBase_2 & ChainProp & { - type: 'match'; - about: Expression_2; - qs: { - q: Expression_2; - a: Statement_2 | Expression_2; - }[]; - default?: Statement_2 | Expression_2; -}; - // @public (undocumented) type Meta = NodeBase & { type: 'meta'; @@ -657,13 +389,6 @@ type Meta = NodeBase & { value: Expression; }; -// @public (undocumented) -type Meta_2 = NodeBase_2 & { - type: 'meta'; - name: string | null; - value: Expression_2; -}; - // @public (undocumented) type NamedTypeSource = NodeBase & { type: 'namedTypeSource'; @@ -671,13 +396,6 @@ type NamedTypeSource = NodeBase & { inner?: TypeSource; }; -// @public (undocumented) -type NamedTypeSource_2 = NodeBase_2 & { - type: 'namedTypeSource'; - name: string; - inner?: TypeSource_2; -}; - // @public (undocumented) type Namespace = NodeBase & { type: 'ns'; @@ -686,17 +404,7 @@ type Namespace = NodeBase & { }; // @public (undocumented) -type Namespace_2 = NodeBase_2 & { - type: 'ns'; - name: string; - members: (Definition_2 | Namespace_2)[]; -}; - -// @public (undocumented) -type Node_2 = Namespace | Meta | Statement | Expression | TypeSource; - -// @public -type Node_3 = Namespace_2 | Meta_2 | Statement_2 | Expression_2 | ChainMember | TypeSource_2; +type Node_2 = Namespace | Meta | Statement | Expression | TypeSource | Attribute; // @public class NonAiScriptError extends AiScriptError { @@ -711,12 +419,6 @@ type Not = NodeBase & { expr: Expression; }; -// @public (undocumented) -type Not_2 = NodeBase_2 & { - type: 'not'; - expr: Expression_2; -}; - // @public (undocumented) const NULL: { type: "null"; @@ -727,11 +429,6 @@ type Null = NodeBase & { type: 'null'; }; -// @public (undocumented) -type Null_2 = NodeBase_2 & ChainProp & { - type: 'null'; -}; - // @public (undocumented) const NUM: (num: VNum['value']) => VNum; @@ -741,12 +438,6 @@ type Num = NodeBase & { value: number; }; -// @public (undocumented) -type Num_2 = NodeBase_2 & ChainProp & { - type: 'num'; - value: number; -}; - // @public (undocumented) const OBJ: (obj: VObj['value']) => VObj; @@ -756,12 +447,6 @@ type Obj = NodeBase & { value: Map; }; -// @public (undocumented) -type Obj_2 = NodeBase_2 & ChainProp & { - type: 'obj'; - value: Map; -}; - // @public (undocumented) type Or = NodeBase & { type: 'or'; @@ -769,13 +454,6 @@ type Or = NodeBase & { right: Expression; }; -// @public (undocumented) -type Or_2 = NodeBase_2 & { - type: 'or'; - left: Expression_2; - right: Expression_2; -}; - // @public (undocumented) export class Parser { constructor(); @@ -788,17 +466,11 @@ export class Parser { } // @public (undocumented) -export type ParserPlugin = (nodes: Cst.Node[]) => Cst.Node[]; +export type ParserPlugin = (nodes: Ast.Node[]) => Ast.Node[]; // @public (undocumented) export type PluginType = 'validate' | 'transform'; -// @public (undocumented) -function PROP(target: Prop_2['target'], name: Prop_2['name'], loc?: { - start: number; - end: number; -}): Prop_2; - // @public (undocumented) type Prop = NodeBase & { type: 'prop'; @@ -806,19 +478,6 @@ type Prop = NodeBase & { name: string; }; -// @public (undocumented) -type Prop_2 = NodeBase_2 & { - type: 'prop'; - target: Expression_2; - name: string; -}; - -// @public (undocumented) -type PropChain = NodeBase_2 & { - type: 'propChain'; - name: string; -}; - // @public (undocumented) function reprValue(value: Value, literalLike?: boolean, processedObjects?: Set): string; @@ -831,12 +490,6 @@ type Return = NodeBase & { expr: Expression; }; -// @public (undocumented) -type Return_2 = NodeBase_2 & { - type: 'return'; - expr: Expression_2; -}; - // @public (undocumented) export class Scope { constructor(layerdStates?: Scope['layerdStates'], parent?: Scope, name?: Scope['name']); @@ -861,10 +514,6 @@ export class Scope { // @public (undocumented) type Statement = Definition | Return | Each | For | Loop | Break | Continue | Assign | AddAssign | SubAssign; -// @public (undocumented) -type Statement_2 = Definition_2 | Return_2 | Attribute_2 | // AST -Each_2 | For_2 | Loop_2 | Break_2 | Continue_2 | Assign_2 | AddAssign_2 | SubAssign_2; - // @public (undocumented) const STR: (str: VStr['value']) => VStr; @@ -874,12 +523,6 @@ type Str = NodeBase & { value: string; }; -// @public (undocumented) -type Str_2 = NodeBase_2 & ChainProp & { - type: 'str'; - value: string; -}; - // @public (undocumented) type SubAssign = NodeBase & { type: 'subAssign'; @@ -887,25 +530,12 @@ type SubAssign = NodeBase & { expr: Expression; }; -// @public (undocumented) -type SubAssign_2 = NodeBase_2 & { - type: 'subAssign'; - dest: Expression_2; - expr: Expression_2; -}; - // @public (undocumented) type Tmpl = NodeBase & { type: 'tmpl'; tmpl: (string | Expression)[]; }; -// @public (undocumented) -type Tmpl_2 = NodeBase_2 & ChainProp & { - type: 'tmpl'; - tmpl: (string | Expression_2)[]; -}; - // @public (undocumented) const TRUE: { type: "bool"; @@ -915,9 +545,6 @@ const TRUE: { // @public (undocumented) type TypeSource = NamedTypeSource | FnTypeSource; -// @public (undocumented) -type TypeSource_2 = NamedTypeSource_2 | FnTypeSource_2; - // @public (undocumented) const unWrapRet: (v: Value) => Value; From 36a37f43d9f12e6ea49d05f37c194c5291043220 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 8 Oct 2023 17:40:34 +0900 Subject: [PATCH 123/126] update CHANGELOG --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a12faf8f..72299005 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,10 +4,13 @@ >- 関数`Str:from_codepoint` `Str#codepoint_at`を追加 # 0.17.0 -- 演算子の前後で改行できるように - `package.json`を修正 +- 新しいAiScriptパーサーが実装されました。 +- スペースの厳密さが緩和されました。 +- 文字列リテラルやテンプレートで、`\`とそれに続く1文字は全てエスケープシーケンスとして扱われるようになりました。 ## Breaking changes - `match`の構文が変更されました。各行の先頭に`case`または`default`が必要になります。 +- 改行トークンを導入。改行の扱いが今までより厳密になりました。改行することができると決められた部分以外では文法エラーになります。 # 0.16.0 - **ネームスペースのトップレベルに`var`は定義できなくなりました。(`let`は可能)** From 3c07dd4be654bf64bab9102b13d877628a665243 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sun, 8 Oct 2023 18:50:58 +0900 Subject: [PATCH 124/126] add jsdoc --- src/parser/scanner.ts | 20 +++++++++++++ src/parser/streams/char-stream.ts | 15 ++++++++++ src/parser/streams/token-stream.ts | 45 ++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+) diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index c2b86d11..67a5d93e 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -28,14 +28,23 @@ export class Scanner implements ITokenStream { this._tokens.push(this.readToken()); } + /** + * カーソル位置にあるトークンを取得します。 + */ public get token(): Token { return this._tokens[0]!; } + /** + * カーソル位置にあるトークンの種類を取得します。 + */ public get kind(): TokenKind { return this.token.kind; } + /** + * カーソル位置を次のトークンへ進めます。 + */ public next(): void { // 現在のトークンがEOFだったら次のトークンに進まない if (this._tokens[0]!.kind === TokenKind.EOF) { @@ -49,6 +58,9 @@ export class Scanner implements ITokenStream { } } + /** + * トークンの先読みを行います。カーソル位置は移動されません。 + */ public lookahead(offset: number): Token { while (this._tokens.length <= offset) { this._tokens.push(this.readToken()); @@ -57,12 +69,20 @@ export class Scanner implements ITokenStream { return this._tokens[offset]!; } + /** + * カーソル位置にあるトークンが指定したトークンの種類と一致するかを確認します。 + * 一致しなかった場合には文法エラーを発生させます。 + */ public expect(kind: TokenKind): void { if (this.kind !== kind) { throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.kind]}`); } } + /** + * カーソル位置にあるトークンが指定したトークンの種類と一致することを確認し、 + * カーソル位置を次のトークンへ進めます。 + */ public nextWith(kind: TokenKind): void { this.expect(kind); this.next(); diff --git a/src/parser/streams/char-stream.ts b/src/parser/streams/char-stream.ts index 80d47f62..58b36793 100644 --- a/src/parser/streams/char-stream.ts +++ b/src/parser/streams/char-stream.ts @@ -25,10 +25,16 @@ export class CharStream { this.moveNext(); } + /** + * ストリームの終わりに達しているかどうかを取得します。 + */ public get eof(): boolean { return this.endOfPage && this.isLastPage; } + /** + * カーソル位置にある文字を取得します。 + */ public get char(): string { if (this.eof) { throw new Error('end of stream'); @@ -36,6 +42,9 @@ export class CharStream { return this._char!; } + /** + * カーソル位置に対応するソースコード上の行番号と列番号を取得します。 + */ public getPos(): { line: number, column: number } { return { line: (this.line + 1), @@ -43,6 +52,9 @@ export class CharStream { }; } + /** + * カーソル位置を次の文字へ進めます。 + */ public next(): void { if (!this.eof && this._char === '\n') { this.line++; @@ -54,6 +66,9 @@ export class CharStream { this.moveNext(); } + /** + * カーソル位置を前の文字へ戻します。 + */ public prev(): void { this.decAddr(); this.movePrev(); diff --git a/src/parser/streams/token-stream.ts b/src/parser/streams/token-stream.ts index 872e2e68..3dae2a2d 100644 --- a/src/parser/streams/token-stream.ts +++ b/src/parser/streams/token-stream.ts @@ -6,11 +6,36 @@ import type { Token } from '../token.js'; * トークンの読み取りに関するインターフェース */ export interface ITokenStream { + /** + * カーソル位置にあるトークンを取得します。 + */ get token(): Token; + + /** + * カーソル位置にあるトークンの種類を取得します。 + */ get kind(): TokenKind; + + /** + * カーソル位置を次のトークンへ進めます。 + */ next(): void; + + /** + * トークンの先読みを行います。カーソル位置は移動されません。 + */ lookahead(offset: number): Token; + + /** + * カーソル位置にあるトークンが指定したトークンの種類と一致するかを確認します。 + * 一致しなかった場合には文法エラーを発生させます。 + */ expect(kind: TokenKind): void; + + /** + * カーソル位置にあるトークンが指定したトークンの種類と一致することを確認し、 + * カーソル位置を次のトークンへ進めます。 + */ nextWith(kind: TokenKind): void; } @@ -32,6 +57,9 @@ export class TokenStream implements ITokenStream { return (this.index >= this.source.length); } + /** + * カーソル位置にあるトークンを取得します。 + */ public get token(): Token { if (this.eof) { return TOKEN(TokenKind.EOF, { line: -1, column: -1 }); @@ -39,10 +67,16 @@ export class TokenStream implements ITokenStream { return this._token; } + /** + * カーソル位置にあるトークンの種類を取得します。 + */ public get kind(): TokenKind { return this.token.kind; } + /** + * カーソル位置を次のトークンへ進めます。 + */ public next(): void { if (!this.eof) { this.index++; @@ -50,6 +84,9 @@ export class TokenStream implements ITokenStream { this.load(); } + /** + * トークンの先読みを行います。カーソル位置は移動されません。 + */ public lookahead(offset: number): Token { if (this.index + offset < this.source.length) { return this.source[this.index + offset]!; @@ -58,12 +95,20 @@ export class TokenStream implements ITokenStream { } } + /** + * カーソル位置にあるトークンが指定したトークンの種類と一致するかを確認します。 + * 一致しなかった場合には文法エラーを発生させます。 + */ public expect(kind: TokenKind): void { if (this.kind !== kind) { throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.kind]}`); } } + /** + * カーソル位置にあるトークンが指定したトークンの種類と一致することを確認し、 + * カーソル位置を次のトークンへ進めます。 + */ public nextWith(kind: TokenKind): void { this.expect(kind); this.next(); From 12f0af795804a9ab6f0894437ac9784e61fa8dd5 Mon Sep 17 00:00:00 2001 From: marihachi Date: Mon, 9 Oct 2023 21:22:42 +0900 Subject: [PATCH 125/126] disallow spaces in a reference --- src/parser/syntaxes/expressions.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts index 0440af5c..7aafacf7 100644 --- a/src/parser/syntaxes/expressions.ts +++ b/src/parser/syntaxes/expressions.ts @@ -450,7 +450,13 @@ function parseReference(s: ITokenStream): Ast.Node { while (true) { if (segs.length > 0) { if (s.kind === TokenKind.Colon) { + if (s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('Cannot use spaces in a reference.'); + } s.next(); + if (s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('Cannot use spaces in a reference.'); + } } else { break; } From 62a0a27f7093c92249886eca0c36f272fe9d6e83 Mon Sep 17 00:00:00 2001 From: marihachi Date: Sat, 14 Oct 2023 18:50:35 +0900 Subject: [PATCH 126/126] test: match syntax --- test/index.ts | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/test/index.ts b/test/index.ts index 1daf0779..48cb8a6b 100644 --- a/test/index.ts +++ b/test/index.ts @@ -303,8 +303,8 @@ describe('Infix expression', () => { test.concurrent('syntax symbols vs infix operators', async () => { const res = await exe(` <: match true { - 1 == 1 => "true" - 1 < 1 => "false" + case 1 == 1 => "true" + case 1 < 1 => "false" } `); eq(res, STR('true')); @@ -317,8 +317,8 @@ describe('Infix expression', () => { test.concurrent('number + match expression', async () => { const res = await exe(` <: 1 + match 2 == 2 { - true => 3 - false => 4 + case true => 3 + case false => 4 } `); eq(res, NUM(4)); @@ -1512,9 +1512,9 @@ describe('match', () => { test.concurrent('Basic', async () => { const res = await exe(` <: match 2 { - 1 => "a" - 2 => "b" - 3 => "c" + case 1 => "a" + case 2 => "b" + case 3 => "c" } `); eq(res, STR('b')); @@ -1523,9 +1523,9 @@ describe('match', () => { test.concurrent('When default not provided, returns null', async () => { const res = await exe(` <: match 42 { - 1 => "a" - 2 => "b" - 3 => "c" + case 1 => "a" + case 2 => "b" + case 3 => "c" } `); eq(res, NULL); @@ -1534,10 +1534,10 @@ describe('match', () => { test.concurrent('With default', async () => { const res = await exe(` <: match 42 { - 1 => "a" - 2 => "b" - 3 => "c" - * => "d" + case 1 => "a" + case 2 => "b" + case 3 => "c" + default => "d" } `); eq(res, STR('d')); @@ -1546,13 +1546,13 @@ describe('match', () => { test.concurrent('With block', async () => { const res = await exe(` <: match 2 { - 1 => 1 - 2 => { + case 1 => 1 + case 2 => { let a = 1 let b = 2 (a + b) } - 3 => 3 + case 3 => 3 } `); eq(res, NUM(3)); @@ -1562,7 +1562,7 @@ describe('match', () => { const res = await exe(` @f(x) { match x { - 1 => { + case 1 => { return "ai" } }