diff --git a/CHANGELOG.md b/CHANGELOG.md index a280b091..61ec5f6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,13 @@ バージョン0.16.0に記録漏れがありました。 >- 関数`Str:from_codepoint` `Str#codepoint_at`を追加 +# Next +- 新しいAiScriptパーサーが実装されました。 +- スペースの厳密さが緩和されました。 +- 文字列リテラルやテンプレートで、`\`とそれに続く1文字は全てエスケープシーケンスとして扱われるようになりました。 +## Breaking changes +- 改行トークンを導入。改行の扱いが今までより厳密になりました。改行することができると決められた部分以外では文法エラーになります。 + # 0.17.0 - `package.json`を修正 diff --git a/docs/parser/scanner.md b/docs/parser/scanner.md new file mode 100644 index 00000000..6e78da1b --- /dev/null +++ b/docs/parser/scanner.md @@ -0,0 +1,9 @@ +# Scanner 設計資料 +作成者: marihachi + +## 現在のトークンと先読みされたトークン +_tokensの0番には現在のトークンが保持される。また、トークンが先読みされた場合は1番以降にそれらのトークンが保持されていくことになる。 +例えば、次のトークンを1つ先読みした場合は0番に現在のトークンが入り1番に先読みされたトークンが入る。 + +nextメソッドで現在位置が移動すると、それまで0番にあったトークン(現在のトークン)は配列から削除され、1番にあった要素は現在のトークンとなる。 +配列から全てのトークンが無くなった場合はトークンの読み取りが実行される。 diff --git a/etc/aiscript.api.md b/etc/aiscript.api.md index c430eb08..82532fc2 100644 --- a/etc/aiscript.api.md +++ b/etc/aiscript.api.md @@ -13,15 +13,6 @@ type AddAssign = NodeBase & { expr: Expression; }; -// Warning: (ae-forgotten-export) The symbol "NodeBase_2" needs to be exported by the entry point index.d.ts -// -// @public (undocumented) -type AddAssign_2 = NodeBase_2 & { - type: 'addAssign'; - dest: Expression_2; - expr: Expression_2; -}; - // @public (undocumented) abstract class AiScriptError extends Error { constructor(message: string, info?: any); @@ -64,13 +55,6 @@ type And = NodeBase & { right: Expression; }; -// @public (undocumented) -type And_2 = NodeBase_2 & { - type: 'and'; - left: Expression_2; - right: Expression_2; -}; - // @public (undocumented) const ARR: (arr: VArr['value']) => VArr; @@ -80,14 +64,6 @@ type Arr = NodeBase & { value: Expression[]; }; -// Warning: (ae-forgotten-export) The symbol "ChainProp" needs to be exported by the entry point index.d.ts -// -// @public (undocumented) -type Arr_2 = NodeBase_2 & ChainProp & { - type: 'arr'; - value: Expression_2[]; -}; - // @public (undocumented) function assertArray(val: Value | null | undefined): asserts val is VArr; @@ -113,23 +89,15 @@ type Assign = NodeBase & { expr: Expression; }; -// @public (undocumented) -type Assign_2 = NodeBase_2 & { - type: 'assign'; - dest: Expression_2; - expr: Expression_2; -}; - declare namespace Ast { export { isStatement, isExpression, Loc, Node_2 as Node, - Statement, - Expression, Namespace, Meta, + Statement, Definition, Attribute, Return, @@ -141,6 +109,7 @@ declare namespace Ast { AddAssign, SubAssign, Assign, + Expression, Not, And, Or, @@ -182,25 +151,12 @@ type Attribute = NodeBase & { value: Expression; }; -// @public (undocumented) -type Attribute_2 = NodeBase_2 & { - type: 'attr'; - name: string; - value: Expression_2; -}; - // @public (undocumented) type Block = NodeBase & { type: 'block'; statements: (Statement | Expression)[]; }; -// @public (undocumented) -type Block_2 = NodeBase_2 & ChainProp & { - type: 'block'; - statements: (Statement_2 | Expression_2)[]; -}; - // @public (undocumented) const BOOL: (bool: VBool['value']) => VBool; @@ -210,12 +166,6 @@ type Bool = NodeBase & { value: boolean; }; -// @public (undocumented) -type Bool_2 = NodeBase_2 & ChainProp & { - type: 'bool'; - value: boolean; -}; - // @public (undocumented) const BREAK: () => Value; @@ -224,17 +174,6 @@ type Break = NodeBase & { type: 'break'; }; -// @public (undocumented) -type Break_2 = NodeBase_2 & { - type: 'break'; -}; - -// @public (undocumented) -function CALL(target: Call_2['target'], args: Call_2['args'], loc?: { - start: number; - end: number; -}): Call_2; - // @public (undocumented) type Call = NodeBase & { type: 'call'; @@ -242,22 +181,6 @@ type Call = NodeBase & { args: Expression[]; }; -// @public (undocumented) -type Call_2 = NodeBase_2 & { - type: 'call'; - target: Expression_2; - args: Expression_2[]; -}; - -// @public (undocumented) -type CallChain = NodeBase_2 & { - type: 'callChain'; - args: Expression_2[]; -}; - -// @public (undocumented) -type ChainMember = CallChain | IndexChain | PropChain; - // @public (undocumented) const CONTINUE: () => Value; @@ -266,67 +189,6 @@ type Continue = NodeBase & { type: 'continue'; }; -// @public (undocumented) -type Continue_2 = NodeBase_2 & { - type: 'continue'; -}; - -declare namespace Cst { - export { - isStatement_2 as isStatement, - isExpression_2 as isExpression, - hasChainProp, - CALL, - INDEX, - PROP, - Node_3 as Node, - Statement_2 as Statement, - Expression_2 as Expression, - Namespace_2 as Namespace, - Meta_2 as Meta, - Definition_2 as Definition, - Attribute_2 as Attribute, - Return_2 as Return, - Each_2 as Each, - For_2 as For, - Loop_2 as Loop, - Break_2 as Break, - Continue_2 as Continue, - AddAssign_2 as AddAssign, - SubAssign_2 as SubAssign, - Assign_2 as Assign, - InfixOperator, - Infix, - Not_2 as Not, - And_2 as And, - Or_2 as Or, - If_2 as If, - Fn_2 as Fn, - Match_2 as Match, - Block_2 as Block, - Exists_2 as Exists, - Tmpl_2 as Tmpl, - Str_2 as Str, - Num_2 as Num, - Bool_2 as Bool, - Null_2 as Null, - Obj_2 as Obj, - Arr_2 as Arr, - Identifier_2 as Identifier, - ChainMember, - CallChain, - IndexChain, - PropChain, - Call_2 as Call, - Index_2 as Index, - Prop_2 as Prop, - TypeSource_2 as TypeSource, - NamedTypeSource_2 as NamedTypeSource, - FnTypeSource_2 as FnTypeSource - } -} -export { Cst } - // @public (undocumented) type Definition = NodeBase & { type: 'def'; @@ -337,16 +199,6 @@ type Definition = NodeBase & { attr: Attribute[]; }; -// @public (undocumented) -type Definition_2 = NodeBase_2 & { - type: 'def'; - name: string; - varType?: TypeSource_2; - expr: Expression_2; - mut: boolean; - attr?: Attribute_2[]; -}; - // @public (undocumented) type Each = NodeBase & { type: 'each'; @@ -355,14 +207,6 @@ type Each = NodeBase & { for: Statement | Expression; }; -// @public (undocumented) -type Each_2 = NodeBase_2 & { - type: 'each'; - var: string; - items: Expression_2; - for: Statement_2 | Expression_2; -}; - // @public (undocumented) function eq(a: Value, b: Value): boolean; @@ -387,23 +231,12 @@ type Exists = NodeBase & { identifier: Identifier; }; -// @public (undocumented) -type Exists_2 = NodeBase_2 & ChainProp & { - type: 'exists'; - identifier: Identifier_2; -}; - // @public (undocumented) function expectAny(val: Value | null | undefined): asserts val is Value; // @public (undocumented) type Expression = If | Fn | Match | Block | Exists | Tmpl | Str | Num | Bool | Null | Obj | Arr | Not | And | Or | Identifier | Call | Index | Prop; -// @public (undocumented) -type Expression_2 = Infix | Not_2 | And_2 | Or_2 | If_2 | Fn_2 | Match_2 | Block_2 | Exists_2 | Tmpl_2 | Str_2 | Num_2 | Bool_2 | Null_2 | Obj_2 | Arr_2 | Identifier_2 | Call_2 | // IR -Index_2 | // IR -Prop_2; - // @public (undocumented) const FALSE: { type: "bool"; @@ -424,17 +257,6 @@ type Fn = NodeBase & { children: (Statement | Expression)[]; }; -// @public (undocumented) -type Fn_2 = NodeBase_2 & ChainProp & { - type: 'fn'; - args: { - name: string; - argType?: TypeSource_2; - }[]; - retType?: TypeSource_2; - children: (Statement_2 | Expression_2)[]; -}; - // @public (undocumented) const FN_NATIVE: (fn: VFn['native']) => VFn; @@ -445,13 +267,6 @@ type FnTypeSource = NodeBase & { result: TypeSource; }; -// @public (undocumented) -type FnTypeSource_2 = NodeBase_2 & { - type: 'fnTypeSource'; - args: TypeSource_2[]; - result: TypeSource_2; -}; - // @public (undocumented) type For = NodeBase & { type: 'for'; @@ -462,34 +277,15 @@ type For = NodeBase & { for: Statement | Expression; }; -// @public (undocumented) -type For_2 = NodeBase_2 & { - type: 'for'; - var?: string; - from?: Expression_2; - to?: Expression_2; - times?: Expression_2; - for: Statement_2 | Expression_2; -}; - // @public (undocumented) function getLangVersion(input: string): string | null; -// @public (undocumented) -function hasChainProp(x: T): x is T & ChainProp; - // @public (undocumented) type Identifier = NodeBase & { type: 'identifier'; name: string; }; -// @public (undocumented) -type Identifier_2 = NodeBase_2 & ChainProp & { - type: 'identifier'; - name: string; -}; - // @public (undocumented) type If = NodeBase & { type: 'if'; @@ -502,24 +298,6 @@ type If = NodeBase & { else?: Statement | Expression; }; -// @public (undocumented) -type If_2 = NodeBase_2 & { - type: 'if'; - cond: Expression_2; - then: Statement_2 | Expression_2; - elseif: { - cond: Expression_2; - then: Statement_2 | Expression_2; - }[]; - else?: Statement_2 | Expression_2; -}; - -// @public (undocumented) -function INDEX(target: Index_2['target'], index: Index_2['index'], loc?: { - start: number; - end: number; -}): Index_2; - // @public (undocumented) type Index = NodeBase & { type: 'index'; @@ -527,29 +305,6 @@ type Index = NodeBase & { index: Expression; }; -// @public (undocumented) -type Index_2 = NodeBase_2 & { - type: 'index'; - target: Expression_2; - index: Expression_2; -}; - -// @public (undocumented) -type IndexChain = NodeBase_2 & { - type: 'indexChain'; - index: Expression_2; -}; - -// @public (undocumented) -type Infix = NodeBase_2 & { - type: 'infix'; - operands: Expression_2[]; - operators: InfixOperator[]; -}; - -// @public (undocumented) -type InfixOperator = '||' | '&&' | '==' | '!=' | '<=' | '>=' | '<' | '>' | '+' | '-' | '*' | '^' | '/' | '%'; - // @public (undocumented) export class Interpreter { constructor(consts: Record, opts?: { @@ -586,9 +341,6 @@ function isBoolean(val: Value): val is VBool; // @public (undocumented) function isExpression(x: Node_2): x is Expression; -// @public (undocumented) -function isExpression_2(x: Node_3): x is Expression_2; - // @public (undocumented) function isFunction(val: Value): val is VFn; @@ -601,9 +353,6 @@ function isObject(val: Value): val is VObj; // @public (undocumented) function isStatement(x: Node_2): x is Statement; -// @public (undocumented) -function isStatement_2(x: Node_3): x is Statement_2; - // @public (undocumented) function isString(val: Value): val is VStr; @@ -612,8 +361,8 @@ function jsToVal(val: any): Value; // @public type Loc = { - start: number; - end: number; + line: number; + column: number; }; // @public (undocumented) @@ -622,12 +371,6 @@ type Loop = NodeBase & { statements: (Statement | Expression)[]; }; -// @public (undocumented) -type Loop_2 = NodeBase_2 & { - type: 'loop'; - statements: (Statement_2 | Expression_2)[]; -}; - // @public (undocumented) type Match = NodeBase & { type: 'match'; @@ -639,17 +382,6 @@ type Match = NodeBase & { default?: Statement | Expression; }; -// @public (undocumented) -type Match_2 = NodeBase_2 & ChainProp & { - type: 'match'; - about: Expression_2; - qs: { - q: Expression_2; - a: Statement_2 | Expression_2; - }[]; - default?: Statement_2 | Expression_2; -}; - // @public (undocumented) type Meta = NodeBase & { type: 'meta'; @@ -657,13 +389,6 @@ type Meta = NodeBase & { value: Expression; }; -// @public (undocumented) -type Meta_2 = NodeBase_2 & { - type: 'meta'; - name: string | null; - value: Expression_2; -}; - // @public (undocumented) type NamedTypeSource = NodeBase & { type: 'namedTypeSource'; @@ -671,13 +396,6 @@ type NamedTypeSource = NodeBase & { inner?: TypeSource; }; -// @public (undocumented) -type NamedTypeSource_2 = NodeBase_2 & { - type: 'namedTypeSource'; - name: string; - inner?: TypeSource_2; -}; - // @public (undocumented) type Namespace = NodeBase & { type: 'ns'; @@ -686,17 +404,7 @@ type Namespace = NodeBase & { }; // @public (undocumented) -type Namespace_2 = NodeBase_2 & { - type: 'ns'; - name: string; - members: (Definition_2 | Namespace_2)[]; -}; - -// @public (undocumented) -type Node_2 = Namespace | Meta | Statement | Expression | TypeSource; - -// @public -type Node_3 = Namespace_2 | Meta_2 | Statement_2 | Expression_2 | ChainMember | TypeSource_2; +type Node_2 = Namespace | Meta | Statement | Expression | TypeSource | Attribute; // @public class NonAiScriptError extends AiScriptError { @@ -711,12 +419,6 @@ type Not = NodeBase & { expr: Expression; }; -// @public (undocumented) -type Not_2 = NodeBase_2 & { - type: 'not'; - expr: Expression_2; -}; - // @public (undocumented) const NULL: { type: "null"; @@ -727,11 +429,6 @@ type Null = NodeBase & { type: 'null'; }; -// @public (undocumented) -type Null_2 = NodeBase_2 & ChainProp & { - type: 'null'; -}; - // @public (undocumented) const NUM: (num: VNum['value']) => VNum; @@ -741,12 +438,6 @@ type Num = NodeBase & { value: number; }; -// @public (undocumented) -type Num_2 = NodeBase_2 & ChainProp & { - type: 'num'; - value: number; -}; - // @public (undocumented) const OBJ: (obj: VObj['value']) => VObj; @@ -756,12 +447,6 @@ type Obj = NodeBase & { value: Map; }; -// @public (undocumented) -type Obj_2 = NodeBase_2 & ChainProp & { - type: 'obj'; - value: Map; -}; - // @public (undocumented) type Or = NodeBase & { type: 'or'; @@ -769,13 +454,6 @@ type Or = NodeBase & { right: Expression; }; -// @public (undocumented) -type Or_2 = NodeBase_2 & { - type: 'or'; - left: Expression_2; - right: Expression_2; -}; - // @public (undocumented) export class Parser { constructor(); @@ -788,17 +466,11 @@ export class Parser { } // @public (undocumented) -export type ParserPlugin = (nodes: Cst.Node[]) => Cst.Node[]; +export type ParserPlugin = (nodes: Ast.Node[]) => Ast.Node[]; // @public (undocumented) export type PluginType = 'validate' | 'transform'; -// @public (undocumented) -function PROP(target: Prop_2['target'], name: Prop_2['name'], loc?: { - start: number; - end: number; -}): Prop_2; - // @public (undocumented) type Prop = NodeBase & { type: 'prop'; @@ -806,19 +478,6 @@ type Prop = NodeBase & { name: string; }; -// @public (undocumented) -type Prop_2 = NodeBase_2 & { - type: 'prop'; - target: Expression_2; - name: string; -}; - -// @public (undocumented) -type PropChain = NodeBase_2 & { - type: 'propChain'; - name: string; -}; - // @public (undocumented) function reprValue(value: Value, literalLike?: boolean, processedObjects?: Set): string; @@ -831,12 +490,6 @@ type Return = NodeBase & { expr: Expression; }; -// @public (undocumented) -type Return_2 = NodeBase_2 & { - type: 'return'; - expr: Expression_2; -}; - // @public (undocumented) export class Scope { constructor(layerdStates?: Scope['layerdStates'], parent?: Scope, name?: Scope['name']); @@ -861,10 +514,6 @@ export class Scope { // @public (undocumented) type Statement = Definition | Return | Each | For | Loop | Break | Continue | Assign | AddAssign | SubAssign; -// @public (undocumented) -type Statement_2 = Definition_2 | Return_2 | Attribute_2 | // AST -Each_2 | For_2 | Loop_2 | Break_2 | Continue_2 | Assign_2 | AddAssign_2 | SubAssign_2; - // @public (undocumented) const STR: (str: VStr['value']) => VStr; @@ -874,12 +523,6 @@ type Str = NodeBase & { value: string; }; -// @public (undocumented) -type Str_2 = NodeBase_2 & ChainProp & { - type: 'str'; - value: string; -}; - // @public (undocumented) type SubAssign = NodeBase & { type: 'subAssign'; @@ -887,25 +530,12 @@ type SubAssign = NodeBase & { expr: Expression; }; -// @public (undocumented) -type SubAssign_2 = NodeBase_2 & { - type: 'subAssign'; - dest: Expression_2; - expr: Expression_2; -}; - // @public (undocumented) type Tmpl = NodeBase & { type: 'tmpl'; tmpl: (string | Expression)[]; }; -// @public (undocumented) -type Tmpl_2 = NodeBase_2 & ChainProp & { - type: 'tmpl'; - tmpl: (string | Expression_2)[]; -}; - // @public (undocumented) const TRUE: { type: "bool"; @@ -915,9 +545,6 @@ const TRUE: { // @public (undocumented) type TypeSource = NamedTypeSource | FnTypeSource; -// @public (undocumented) -type TypeSource_2 = NamedTypeSource_2 | FnTypeSource_2; - // @public (undocumented) const unWrapRet: (v: Value) => Value; diff --git a/package.json b/package.json index 4e635669..b63d2a89 100644 --- a/package.json +++ b/package.json @@ -20,14 +20,10 @@ "scripts": { "start": "node ./run", "parse": "node ./parse", - "peg": "peggy --format es --cache -o src/parser/parser.js --allowed-start-rules Preprocess,Main src/parser/parser.peggy && npm run peg-copy", - "peg-debug": "peggy --trace --format es --cache -o src/parser/parser.js --allowed-start-rules Preprocess,Main src/parser/parser.peggy && npm run peg-copy", - "peg-copy": "copyfiles -f src/parser/parser.js built/parser/", "ts": "npm run ts-esm && npm run ts-dts", "ts-esm": "tsc --outDir built/esm", "ts-dts": "tsc --outDir built/dts --declaration true --emitDeclarationOnly true --declarationMap true", - "build": "npm run peg && npm run ts", - "build-debug": "npm run peg-debug && tsc", + "build": "npm run ts", "api": "npx api-extractor run --local --verbose", "api-prod": "npx api-extractor run --verbose", "lint": "eslint . --ext .js,.jsx,.ts,.tsx", @@ -44,11 +40,9 @@ "@typescript-eslint/eslint-plugin": "6.7.5", "@typescript-eslint/parser": "6.7.5", "chalk": "5.3.0", - "copyfiles": "2.4.1", "eslint": "8.51.0", "eslint-plugin-import": "2.28.1", "jest": "29.7.0", - "peggy": "3.0.2", "ts-jest": "29.1.1", "ts-jest-resolver": "2.0.1", "ts-node": "10.9.1", diff --git a/parse.js b/parse.js index 74a859cd..ade1f9cb 100644 --- a/parse.js +++ b/parse.js @@ -1,6 +1,7 @@ import fs from 'fs'; import { Parser } from '@syuilo/aiscript'; +import { inspect } from 'util'; const script = fs.readFileSync('./test.is', 'utf8'); const ast = Parser.parse(script); -console.log(JSON.stringify(ast, null, 2)); +console.log(inspect(ast, { depth: 10 })); diff --git a/src/@types/parser.d.ts b/src/@types/parser.d.ts deleted file mode 100644 index 6f2ff82c..00000000 --- a/src/@types/parser.d.ts +++ /dev/null @@ -1,6 +0,0 @@ -import type { Cst } from '../index.js'; - -declare module '*/parser.js' { - // FIXME: 型指定が効いていない - export const parse: (input: string, options: object) => Cst.Node[]; -} diff --git a/src/index.ts b/src/index.ts index a0811739..b524c404 100644 --- a/src/index.ts +++ b/src/index.ts @@ -7,7 +7,6 @@ import { Scope } from './interpreter/scope.js'; import * as utils from './interpreter/util.js'; import * as values from './interpreter/value.js'; import { Parser, ParserPlugin, PluginType } from './parser/index.js'; -import * as Cst from './parser/node.js'; import * as errors from './error.js'; import * as Ast from './node.js'; export { Interpreter }; @@ -17,6 +16,5 @@ export { values }; export { Parser }; export { ParserPlugin }; export { PluginType }; -export { Cst }; export { errors }; export { Ast }; diff --git a/src/node.ts b/src/node.ts index 79153c10..caa33196 100644 --- a/src/node.ts +++ b/src/node.ts @@ -1,15 +1,31 @@ /** * ASTノード - * - * ASTノードはCSTノードをインタプリタ等から操作しやすい構造に変形したものです。 */ export type Loc = { - start: number; - end: number; + line: number; + column: number; }; -export type Node = Namespace | Meta | Statement | Expression | TypeSource; +export type Node = Namespace | Meta | Statement | Expression | TypeSource | Attribute; + +type NodeBase = { + loc: Loc; // コード位置 +}; + +export type Namespace = NodeBase & { + type: 'ns'; // 名前空間 + name: string; // 空間名 + members: (Definition | Namespace)[]; // メンバー +}; + +export type Meta = NodeBase & { + type: 'meta'; // メタデータ定義 + name: string | null; // 名 + value: Expression; // 値 +}; + +// statement export type Statement = Definition | @@ -30,53 +46,6 @@ export function isStatement(x: Node): x is Statement { return statementTypes.includes(x.type); } -export type Expression = - If | - Fn | - Match | - Block | - Exists | - Tmpl | - Str | - Num | - Bool | - Null | - Obj | - Arr | - Not | - And | - Or | - Identifier | - Call | - Index | - Prop; - -const expressionTypes = [ - 'if', 'fn', 'match', 'block', 'exists', 'tmpl', 'str', 'num', 'bool', 'null', 'obj', 'arr', 'identifier', 'call', 'index', 'prop', -]; -export function isExpression(x: Node): x is Expression { - return expressionTypes.includes(x.type); -} - -type NodeBase = { - loc?: { // コード位置 - start: number; - end: number; - }; -}; - -export type Namespace = NodeBase & { - type: 'ns'; // 名前空間 - name: string; // 空間名 - members: (Definition | Namespace)[]; // メンバー -}; - -export type Meta = NodeBase & { - type: 'meta'; // メタデータ定義 - name: string | null; // 名 - value: Expression; // 値 -}; - export type Definition = NodeBase & { type: 'def'; // 変数宣言文 name: string; // 変数名 @@ -144,6 +113,36 @@ export type Assign = NodeBase & { expr: Expression; // 式 }; +// expressions + +export type Expression = + If | + Fn | + Match | + Block | + Exists | + Tmpl | + Str | + Num | + Bool | + Null | + Obj | + Arr | + Not | + And | + Or | + Identifier | + Call | + Index | + Prop; + +const expressionTypes = [ + 'if', 'fn', 'match', 'block', 'exists', 'tmpl', 'str', 'num', 'bool', 'null', 'obj', 'arr', 'not', 'and', 'or', 'identifier', 'call', 'index', 'prop', +]; +export function isExpression(x: Node): x is Expression { + return expressionTypes.includes(x.type); +} + export type Not = NodeBase & { type: 'not'; // 否定 expr: Expression; // 式 @@ -241,14 +240,6 @@ export type Identifier = NodeBase & { name: string; // 変数名 }; -// chain node example: -// call > fn -// call > var(fn) -// index > arr -// index > var(arr) -// prop > prop(obj) > var(obj) -// call > prop(fn) > obj - export type Call = NodeBase & { type: 'call'; // 関数呼び出し target: Expression; // 対象 diff --git a/src/parser/index.ts b/src/parser/index.ts index 0c34f647..07092dbb 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -1,15 +1,12 @@ -import { AiScriptSyntaxError } from '../error.js'; -import * as parser from './parser.js'; +import { Scanner } from './scanner.js'; +import { parseTopLevel } from './syntaxes/toplevel.js'; import { validateKeyword } from './plugins/validate-keyword.js'; import { validateType } from './plugins/validate-type.js'; -import { setAttribute } from './plugins/set-attribute.js'; -import { transformChain } from './plugins/transform-chain.js'; -import { infixToFnCall } from './plugins/infix-to-fncall.js'; -import type * as Cst from './node.js'; + import type * as Ast from '../node.js'; -export type ParserPlugin = (nodes: Cst.Node[]) => Cst.Node[]; +export type ParserPlugin = (nodes: Ast.Node[]) => Ast.Node[]; export type PluginType = 'validate' | 'transform'; export class Parser { @@ -26,9 +23,6 @@ export class Parser { validateType, ], transform: [ - setAttribute, - transformChain, - infixToFnCall, ], }; } @@ -54,24 +48,10 @@ export class Parser { } public parse(input: string): Ast.Node[] { - let nodes: Cst.Node[]; + let nodes: Ast.Node[]; - // generate a node tree - try { - // apply preprocessor - const code = parser.parse(input, { startRule: 'Preprocess' }); - // apply main parser - nodes = parser.parse(code, { startRule: 'Main' }); - } catch (e) { - if (e.location) { - if (e.expected) { - throw new AiScriptSyntaxError(`Parsing error. (Line ${e.location.start.line}:${e.location.start.column})`, e); - } else { - throw new AiScriptSyntaxError(`${e.message} (Line ${e.location.start.line}:${e.location.start.column})`, e); - } - } - throw e; - } + const scanner = new Scanner(input); + nodes = parseTopLevel(scanner); // validate the node tree for (const plugin of this.plugins.validate) { @@ -83,6 +63,6 @@ export class Parser { nodes = plugin(nodes); } - return nodes as Ast.Node[]; + return nodes; } } diff --git a/src/parser/node.ts b/src/parser/node.ts deleted file mode 100644 index b9a2dd77..00000000 --- a/src/parser/node.ts +++ /dev/null @@ -1,329 +0,0 @@ -/** - * CSTノード - * - * パーサーが生成する直接的な処理結果です。 - * パーサーが生成しやすい形式になっているため、インタプリタ等では操作しにくい構造になっていることがあります。 - * この処理結果がプラグインによって処理されるとASTノードとなります。 -*/ - -export type Node = Namespace | Meta | Statement | Expression | ChainMember | TypeSource; - -export type Statement = - Definition | - Return | - Attribute | // AST - Each | - For | - Loop | - Break | - Continue | - Assign | - AddAssign | - SubAssign; - -const statementTypes = [ - 'def', 'return', 'attr', 'each', 'for', 'loop', 'break', 'continue', 'assign', 'addAssign', 'subAssign', -]; -export function isStatement(x: Node): x is Statement { - return statementTypes.includes(x.type); -} - -export type Expression = - Infix | - Not | - And | - Or | - If | - Fn | - Match | - Block | - Exists | - Tmpl | - Str | - Num | - Bool | - Null | - Obj | - Arr | - Identifier | - Call | // IR - Index | // IR - Prop; // IR - -const expressionTypes = [ - 'infix', 'if', 'fn', 'match', 'block', 'exists', 'tmpl', 'str', 'num', 'bool', 'null', 'obj', 'arr', 'identifier', 'call', 'index', 'prop', -]; -export function isExpression(x: Node): x is Expression { - return expressionTypes.includes(x.type); -} - -type NodeBase = { - __AST_NODE: never; // phantom type - loc?: { - start: number; - end: number; - }; -}; - -export type Namespace = NodeBase & { - type: 'ns'; - name: string; - members: (Definition | Namespace)[]; -}; - -export type Meta = NodeBase & { - type: 'meta'; - name: string | null; - value: Expression; -}; - -export type Definition = NodeBase & { - type: 'def'; - name: string; - varType?: TypeSource; - expr: Expression; - mut: boolean; - attr?: Attribute[]; // IR -}; - -export type Attribute = NodeBase & { - type: 'attr'; - name: string; - value: Expression; -}; - -export type Return = NodeBase & { - type: 'return'; - expr: Expression; -}; - -export type Each = NodeBase & { - type: 'each'; - var: string; - items: Expression; - for: Statement | Expression; -}; - -export type For = NodeBase & { - type: 'for'; - var?: string; - from?: Expression; - to?: Expression; - times?: Expression; - for: Statement | Expression; -}; - -export type Loop = NodeBase & { - type: 'loop'; - statements: (Statement | Expression)[]; -}; - -export type Break = NodeBase & { - type: 'break'; -}; - -export type Continue = NodeBase & { - type: 'continue'; -}; - -export type AddAssign = NodeBase & { - type: 'addAssign'; - dest: Expression; - expr: Expression; -}; - -export type SubAssign = NodeBase & { - type: 'subAssign'; - dest: Expression; - expr: Expression; -}; - -export type Assign = NodeBase & { - type: 'assign'; - dest: Expression; - expr: Expression; -}; - -export type InfixOperator = '||' | '&&' | '==' | '!=' | '<=' | '>=' | '<' | '>' | '+' | '-' | '*' | '^' | '/' | '%'; - -export type Infix = NodeBase & { - type: 'infix'; - operands: Expression[]; - operators: InfixOperator[]; -}; - -export type Not = NodeBase & { - type: 'not'; - expr: Expression; -}; - -export type And = NodeBase & { - type: 'and'; - left: Expression; - right: Expression; -} - -export type Or = NodeBase & { - type: 'or'; - left: Expression; - right: Expression; -} - -export type If = NodeBase & { - type: 'if'; - cond: Expression; - then: Statement | Expression; - elseif: { - cond: Expression; - then: Statement | Expression; - }[]; - else?: Statement | Expression; -}; - -export type Fn = NodeBase & ChainProp & { - type: 'fn'; - args: { - name: string; - argType?: TypeSource; - }[]; - retType?: TypeSource; - children: (Statement | Expression)[]; -}; - -export type Match = NodeBase & ChainProp & { - type: 'match'; - about: Expression; - qs: { - q: Expression; - a: Statement | Expression; - }[]; - default?: Statement | Expression; -}; - -export type Block = NodeBase & ChainProp & { - type: 'block'; - statements: (Statement | Expression)[]; -}; - -export type Exists = NodeBase & ChainProp & { - type: 'exists'; - identifier: Identifier; -}; - -export type Tmpl = NodeBase & ChainProp & { - type: 'tmpl'; - tmpl: (string | Expression)[]; -}; - -export type Str = NodeBase & ChainProp & { - type: 'str'; - value: string; -}; - -export type Num = NodeBase & ChainProp & { - type: 'num'; - value: number; -}; - -export type Bool = NodeBase & ChainProp & { - type: 'bool'; - value: boolean; -}; - -export type Null = NodeBase & ChainProp & { - type: 'null'; -}; - -export type Obj = NodeBase & ChainProp & { - type: 'obj'; - value: Map; -}; - -export type Arr = NodeBase & ChainProp & { - type: 'arr'; - value: Expression[]; -}; - -export type Identifier = NodeBase & ChainProp & { - type: 'identifier'; - name: string; -}; - -// AST -type ChainProp = { - chain?: ChainMember[]; -}; - -// AST -export function hasChainProp(x: T): x is T & ChainProp { - return 'chain' in x && x.chain !== null; -} - -// AST -export type ChainMember = CallChain | IndexChain | PropChain; - -// AST -export type CallChain = NodeBase & { - type: 'callChain'; - args: Expression[]; -}; - -// AST -export type IndexChain = NodeBase & { - type: 'indexChain'; - index: Expression; -}; - -// AST -export type PropChain = NodeBase & { - type: 'propChain'; - name: string; -}; - -// IR -export type Call = NodeBase & { - type: 'call'; - target: Expression; - args: Expression[]; -}; -export function CALL(target: Call['target'], args: Call['args'], loc?: { start: number, end: number }): Call { - return { type: 'call', target, args, loc } as Call; -} - -// IR -export type Index = NodeBase & { - type: 'index'; - target: Expression; - index: Expression; -}; - -export function INDEX(target: Index['target'], index: Index['index'], loc?: { start: number, end: number }): Index { - return { type: 'index', target, index, loc } as Index; -} - -// IR -export type Prop = NodeBase & { - type: 'prop'; - target: Expression; - name: string; -}; - -export function PROP(target: Prop['target'], name: Prop['name'], loc?: { start: number, end: number }): Prop { - return { type: 'prop', target, name, loc } as Prop; -} - -// Type source - -export type TypeSource = NamedTypeSource | FnTypeSource; - -export type NamedTypeSource = NodeBase & { - type: 'namedTypeSource'; - name: string; - inner?: TypeSource; -}; - -export type FnTypeSource = NodeBase & { - type: 'fnTypeSource'; - args: TypeSource[]; - result: TypeSource; -}; diff --git a/src/parser/parser.peggy b/src/parser/parser.peggy deleted file mode 100644 index 2c55e520..00000000 --- a/src/parser/parser.peggy +++ /dev/null @@ -1,596 +0,0 @@ -{ - function createNode(type, params, children) { - const node = { type }; - params.children = children; - for (const key of Object.keys(params)) { - if (params[key] !== undefined) { - node[key] = params[key]; - } - } - const loc = location(); - node.loc = { start: loc.start.offset, end: loc.end.offset - 1 }; - return node; - } -} - -// -// preprocessor -// - -Preprocess - = s:PreprocessPart* -{ return s.join(''); } - -PreprocessPart - = Tmpl { return text(); } - / Str { return text(); } - / Comment - / . - -Comment - = "//" (!EOL .)* { return ''; } - / "/*" (!"*/" .)* "*/" { return ''; } - - -// -// main parser -// - -Main - = _* content:GlobalStatements? _* -{ return content ?? []; } - -GlobalStatements - = head:GlobalStatement tails:(__* LF _* s:GlobalStatement { return s; })* -{ return [head, ...tails]; } - -NamespaceStatements - = head:NamespaceStatement tails:(__* LF _* s:NamespaceStatement { return s; })* -{ return [head, ...tails]; } - -Statements - = head:Statement tails:(__* LF _* s:Statement { return s; })* -{ return [head, ...tails]; } - -// list of global statements - -GlobalStatement - = Namespace // "::" - / Meta // "###" - / Statement - -// list of namespace statement - -NamespaceStatement - = VarDef - / FnDef - / Namespace - -// list of statement - -Statement - = VarDef // "let" NAME | "var" NAME - / FnDef // "@" - / Out // "<:" - / Return // "return" - / Attr // "+" - / Each // "each" - / For // "for" - / Loop // "loop" - / Break // "break" - / Continue // "continue" - / Assign // Expr "=" | Expr "+=" | Expr "-=" - / Expr - -// list of expression - -Expr - = Infix - / Expr2 - -Expr2 - = If // "if" - / Fn // "@(" - / Chain // Expr3 "(" | Expr3 "[" | Expr3 "." - / Expr3 - -Expr3 - = Match // "match" - / Eval // "eval" - / Exists // "exists" - / Tmpl // "`" - / Str // "\"" - / Num // "+" | "-" | "1"~"9" - / Bool // "true" | "false" - / Null // "null" - / Obj // "{" - / Arr // "[" - / Not // "!" - / Identifier // NAME_WITH_NAMESPACE - / "(" _* e:Expr _* ")" { return e; } - -// list of static literal - -StaticLiteral - = Num // "+" "1"~"9" | "-" "1"~"9" | "1"~"9" - / Str // "\"" - / Bool // "true" | "false" - / StaticArr // "[" - / StaticObj // "{" - / Null // "null" - - - -// -// global statements --------------------------------------------------------------------- -// - -// namespace statement - -Namespace - = "::" _+ name:NAME _+ "{" _* members:NamespaceStatements? _* "}" -{ return createNode('ns', { name, members }); } - -// meta statement - -Meta - = "###" __* name:NAME _* value:StaticLiteral -{ return createNode('meta', { name, value }); } - / "###" __* value:StaticLiteral -{ return createNode('meta', { name: null, value }); } - - - -// -// statements ---------------------------------------------------------------------------- -// - -// define statement - -VarDef - = "let" _+ name:NAME type:(_* ":" _* @Type)? _* "=" _* expr:Expr -{ return createNode('def', { name, varType: type, expr, mut: false, attr: [] }); } - / "var" _+ name:NAME type:(_* ":" _* @Type)? _* "=" _* expr:Expr -{ return createNode('def', { name, varType: type, expr, mut: true, attr: [] }); } - -// output statement - -// NOTE: out is syntax sugar for print(expr) -Out - = "<:" _* expr:Expr -{ - return createNode('identifier', { - name: 'print', - chain: [createNode('callChain', { args: [expr] })], - }); -} - -// attribute statement - -// Note: Attribute will be combined with def node when parsing is complete. -Attr - = "#[" _* name:NAME value:(_* @StaticLiteral)? _* "]" -{ - return createNode('attr', { - name: name, - value: value ?? createNode('bool', { value: true }) - }); -} - -// each statement - -Each - = "each" _* "(" "let" _+ varn:NAME _* ","? _* items:Expr ")" _* x:BlockOrStatement -{ - return createNode('each', { - var: varn, - items: items, - for: x, - }); -} - / "each" _+ "let" _+ varn:NAME _* ","? _* items:Expr _+ x:BlockOrStatement -{ - return createNode('each', { - var: varn, - items: items, - for: x, - }); -} - -// for statement - -For - = "for" _* "(" "let" _+ varn:NAME _* from_:("=" _* v:Expr { return v; })? ","? _* to:Expr ")" _* x:BlockOrStatement -{ - return createNode('for', { - var: varn, - from: from_ ?? createNode('num', { value: 0 }), - to: to, - for: x, - }); -} - / "for" _+ "let" _+ varn:NAME _* from_:("=" _* v:Expr { return v; })? ","? _* to:Expr _+ x:BlockOrStatement -{ - return createNode('for', { - var: varn, - from: from_ ?? createNode('num', { value: 0 }), - to: to, - for: x, - }); -} - / "for" _* "(" times:Expr ")" _* x:BlockOrStatement -{ - return createNode('for', { - times: times, - for: x, - }); -} - / "for" _+ times:Expr _+ x:BlockOrStatement -{ - return createNode('for', { - times: times, - for: x, - }); -} - -// return statement - -Return - = "return" ![A-Z0-9_:]i _* expr:Expr -{ return createNode('return', { expr }); } - -// loop statement - -Loop - = "loop" _* "{" _* s:Statements _* "}" -{ return createNode('loop', { statements: s }); } - -// break statement - -Break - = "break" ![A-Z0-9_:]i -{ return createNode('break', {}); } - -// continue statement - -Continue - = "continue" ![A-Z0-9_:]i -{ return createNode('continue', {}); } - -// assign statement - -Assign - = dest:Expr _* op:("+=" / "-=" / "=") _* expr:Expr -{ - if (op === '+=') - return createNode('addAssign', { dest, expr }); - else if (op === '-=') - return createNode('subAssign', { dest, expr }); - else - return createNode('assign', { dest, expr }); -} - - - -// -// expressions -------------------------------------------------------------------- -// - -// infix expression - -Infix - = head:Expr2 tail:(InfixSp* op:Op InfixSp* term:Expr2 { return {op, term}; })+ -{ - return createNode('infix', { - operands: [head, ...tail.map(i => i.term)], - operators: tail.map(i => i.op) - }); -} - -InfixSp - = "\\" LF - / __ - -Op - = ("||" / "&&" / "==" / "!=" / "<=" / ">=" / "<" / ">" / "+" / "-" / "*" / "^" / "/" / "%") -{ return text(); } - -Not - = "!" expr:Expr -{ - return createNode('not', { - expr: expr, - }); -} - - -// chain - -Chain - = e:Expr3 chain:(CallChain / IndexChain / PropChain)+ -{ return { ...e, chain }; } - -CallChain - = "(" _* args:CallArgs? _* ")" -{ return createNode('callChain', { args: args ?? [] }); } - -CallArgs - = head:Expr tails:(SEP expr:Expr { return expr; })* -{ return [head, ...tails]; } - -IndexChain - = "[" _* index:Expr _* "]" -{ return createNode('indexChain', { index }); } - -PropChain - = "." name:NAME -{ return createNode('propChain', { name }); } - -// if statement - -If - = "if" _+ cond:Expr _+ then:BlockOrStatement elseif:(_+ @ElseifBlocks)? elseBlock:(_+ @ElseBlock)? -{ - return createNode('if', { - cond: cond, - then: then, - elseif: elseif ?? [], - else: elseBlock - }); -} - -ElseifBlocks - = head:ElseifBlock tails:(_* @ElseifBlock)* -{ return [head, ...tails]; } - -ElseifBlock - = "elif" ![A-Z0-9_:]i _* cond:Expr _* then:BlockOrStatement -{ return { cond, then }; } - -ElseBlock - = "else" ![A-Z0-9_:]i _* then:BlockOrStatement -{ return then; } - -// match expression - -Match - = "match" ![A-Z0-9_:]i _* about:Expr _* "{" _* qs:(q:Expr _* "=>" _* a:BlockOrStatement _* { return { q, a }; })+ x:("*" _* "=>" _* @BlockOrStatement _*)? _* "}" -{ - return createNode('match', { - about: about, - qs: qs ?? [], - default: x - }); -} - -// eval expression - -Eval - = "eval" _* "{" _* s:Statements _* "}" -{ return createNode('block', { statements: s }); } - -// exists expression - -Exists - = "exists" _+ i:Identifier -{ return createNode('exists', { identifier: i }); } - -// variable reference expression - -Identifier - = name:NAME_WITH_NAMESPACE -{ return createNode('identifier', { name }); } - - - -// -// literals ------------------------------------------------------------------------------ -// - -// template literal - -Tmpl - = "`" items:(!"`" @TmplEmbed)* "`" -{ return createNode('tmpl', { tmpl: items }); } - -TmplEmbed - = "{" __* @expr:Expr __* "}" - / str:TmplAtom+ {return str.join("")} - -TmplAtom - = TmplEsc - / [^`{] - -TmplEsc - = "\\" @[{}`] - -// string literal - -Str - = "\"" value:(!"\"" c:(StrDoubleQuoteEsc / .) { return c; })* "\"" -{ return createNode('str', { value: value.join('') }); } - / "'" value:(!"'" c:(StrSingleQuoteEsc / .) { return c; })* "'" -{ return createNode('str', { value: value.join('') }); } - -StrDoubleQuoteEsc - = "\\\"" -{ return '"'; } - -StrSingleQuoteEsc - = "\\\'" -{ return '\''; } - -// number literal -Num - = Float - / Int - -Float - = [+-]? [1-9] [0-9]+ "." [0-9]+ - { return createNode('num', { value: parseFloat(text())}); } - / [+-]? [0-9] "." [0-9]+ - { return createNode('num', { value: parseFloat(text())}); } - -Int - = [+-]? [1-9] [0-9]+ -{ return createNode('num', { value: parseInt(text(), 10) }); } - / [+-]? [0-9] -{ return createNode('num', { value: parseInt(text(), 10) }); } - -// boolean literal - -Bool - = True - / False - -True - = "true" ![A-Z0-9_:]i -{ return createNode('bool', { value: true }); } - -False - = "false" ![A-Z0-9_:]i -{ return createNode('bool', { value: false }); } - -// null literal - -Null - = "null" ![A-Z0-9_:]i -{ return createNode('null', {}); } - -// object literal - -Obj - = "{" _* kvs:(k:NAME _* ":" _+ v:Expr _* ("," / ";")? _* { return { k, v }; })* "}" -{ - const obj = new Map(); - for (const kv of kvs) { - obj.set(kv.k, kv.v); - } - return createNode('obj', { value: obj }); -} - -// array literal - -Arr - = "[" _* items:(item:Expr _* ","? _* { return item; })* _* "]" -{ return createNode('arr', { value: items }); } - - - -// -// function ------------------------------------------------------------------------------ -// - -Arg - = name:NAME type:(_* ":" _* @Type)? -{ return { name, argType: type }; } - -Args - = head:Arg tails:(SEP @Arg)* -{ return [head, ...tails]; } - -// define function statement - -FnDef - = "@" s1:__* name:NAME s2:__* "(" _* args:Args? _* ")" ret:(_* ":" _* @Type)? _* "{" _* content:Statements? _* "}" -{ - if (s1.length > 0 || s2.length > 0) { - error('Cannot use spaces before or after the function name.'); - } - return createNode('def', { - name: name, - expr: createNode('fn', { args: args ?? [], retType: ret }, content ?? []), - mut: false, - attr: [] - }); -} - -// function expression - -Fn = "@(" _* args:Args? _* ")" ret:(_* ":" _* @Type)? _* "{" _* content:Statements? _* "}" -{ return createNode('fn', { args: args ?? [], retType: ret }, content ?? []); } - - - -// -// static literal ------------------------------------------------------------------------ -// - -// array literal (static) - -StaticArr - = "[" _* items:(item:StaticLiteral _* ","? _* { return item; })* _* "]" -{ return createNode('arr', { value: items }); } - -// object literal (static) - -StaticObj - = "{" _* kvs:(k:NAME _* ":" _+ v:StaticLiteral _* ("," / ";")? _* { return { k, v }; })* "}" -{ - const obj = new Map(); - for (const kv of kvs) { - obj.set(kv.k, kv.v); - } - return createNode('obj', { value: obj }); -} - - - -// -// type ---------------------------------------------------------------------------------- -// - -Type - = FnType - / NamedType - -FnType - = "@(" _* args:ArgTypes? _* ")" _* "=>" _* result:Type -{ return createNode('fnTypeSource', { args: args ?? [], result }); } - -ArgTypes - = head:Type tails:(SEP @Type)* -{ return [head, ...tails]; } - -NamedType - = name:NAME __* "<" __* inner:Type __* ">" -{ return createNode('namedTypeSource', { name, inner }); } - / name:NAME -{ return createNode('namedTypeSource', { name, inner: null }); } - - - -// -// general ------------------------------------------------------------------------------- -// - -NAME - = [A-Z_]i [A-Z0-9_]i* -{ return text(); } - -NAME_WITH_NAMESPACE - = NAME (":" NAME)* -{ return text(); } - -SEP - = _* "," _* - / _+ - -BlockOrStatement - = "{" _* s:Statements? _* "}" -{ return createNode('block', { statements: (s ?? []) }); } - / Statement - -LF - = "\r\n" / [\r\n] - -EOL - = !. / LF - -// spacing -_ - = [ \t\r\n] - -// spacing (no linebreaks) -__ - = [ \t] diff --git a/src/parser/plugins/infix-to-fncall.ts b/src/parser/plugins/infix-to-fncall.ts deleted file mode 100644 index 7d551320..00000000 --- a/src/parser/plugins/infix-to-fncall.ts +++ /dev/null @@ -1,146 +0,0 @@ -import { visitNode } from '../visit.js'; -import { AiScriptSyntaxError } from '../../error.js'; -import type * as Cst from '../node.js'; - -/** - * 中置演算子式を表す木 - * 1 + 3 ならば次のようなイメージ - * ``` - * (+) - * (1) (3) - * ``` - */ -type InfixTree = { - type: 'infixTree'; - left: InfixTree | Cst.Node; - right: InfixTree | Cst.Node; - info: { - priority: number; // 優先度(高いほど優先して計算される値) - } & ({ - func: string; // 対応する関数名 - mapFn?: undefined; - } | { - func?: undefined; - mapFn: ((infix: InfixTree) => Cst.Node); //Nodeへ変換する関数 - }) -}; - -function INFIX_TREE(left: InfixTree | Cst.Node, right: InfixTree | Cst.Node, info: InfixTree['info']): InfixTree { - return { type: 'infixTree', left, right, info }; -} - -/** - * 現在の中置演算子式を表す木に新たな演算子と項を追加した木を構築する - * - * - 新しい演算子の優先度が現在見ている木の演算子の優先度 **以下** である場合は、現在見ている木は新しい演算子の左側の子になる。 - * 1 + 3 - 4 = (1 + 3) - 4 ならば - * ``` - * (-) - * (+) (4) - * (1) (3) - * ``` - * - * - 新しい演算子の優先度が現在見ている木の演算子の優先度 **より大きい** 場合は、右側の子と結合する。 - * 1 + 3 * 4 = 1 + (3 * 4) ならば - * ``` - * (+) - * (1) (*) - * (3) (4) - * ``` - * - * - TODO: 左結合性の場合しか考えていない(結合性によって優先度が同じ場合の振る舞いが変わりそう) - * - NOTE: 右結合性の演算子としては代入演算子などが挙げられる - * - NOTE: 比較の演算子などは非結合性とされる - */ -function insertTree(currTree: InfixTree | Cst.Node, nextTree: InfixTree | Cst.Node, nextOpInfo: InfixTree['info']): InfixTree { - if (currTree.type !== 'infixTree') { - return INFIX_TREE(currTree, nextTree, nextOpInfo); - } - - if (nextOpInfo.priority <= currTree.info.priority) { - return INFIX_TREE(currTree, nextTree, nextOpInfo); - } else { - const { left, right, info: currInfo } = currTree; - return INFIX_TREE(left, insertTree(right, nextTree, nextOpInfo), currInfo); - } -} - -/** - * 中置演算子式を表す木を対応する関数呼び出しの構造体に変換する - */ -function treeToNode(tree: InfixTree | Cst.Node): Cst.Node { - if (tree.type !== 'infixTree') { - return tree; - } - - if (tree.info.mapFn) { - return tree.info.mapFn(tree); - } else { - return { - type: 'call', - target: { type: 'identifier', name: tree.info.func }, - args: [treeToNode(tree.left), treeToNode(tree.right)], - } as Cst.Call; - } -} - -const infoTable: Record = { - '*': { func: 'Core:mul', priority: 7 }, - '^': { func: 'Core:pow', priority: 7 }, - '/': { func: 'Core:div', priority: 7 }, - '%': { func: 'Core:mod', priority: 7 }, - '+': { func: 'Core:add', priority: 6 }, - '-': { func: 'Core:sub', priority: 6 }, - '==': { func: 'Core:eq', priority: 4 }, - '!=': { func: 'Core:neq', priority: 4 }, - '<': { func: 'Core:lt', priority: 4 }, - '>': { func: 'Core:gt', priority: 4 }, - '<=': { func: 'Core:lteq', priority: 4 }, - '>=': { func: 'Core:gteq', priority: 4 }, - '&&': { - mapFn: infix => ({ - type: 'and', - left: treeToNode(infix.left), - right: treeToNode(infix.right), - }) as Cst.And, - priority: 3, - }, - '||': { - mapFn: infix => ({ - type: 'or', - left: treeToNode(infix.left), - right: treeToNode(infix.right), - }) as Cst.Or, - priority: 3, - }, -}; - -/** - * NInfix を関数呼び出し形式に変換する - */ -function transform(node: Cst.Infix): Cst.Node { - const infos = node.operators.map(op => { - const info = infoTable[op]; - if (info == null) { - throw new AiScriptSyntaxError(`No such operator: ${op}.`); - } - return info; - }); - let currTree = INFIX_TREE(node.operands[0]!, node.operands[1]!, infos[0]!); - for (let i = 0; i < infos.length - 1; i++) { - currTree = insertTree(currTree, node.operands[2 + i]!, infos[1 + i]!); - } - return treeToNode(currTree); -} - -export function infixToFnCall(nodes: Cst.Node[]): Cst.Node[] { - for (let i = 0; i < nodes.length; i++) { - nodes[i] = visitNode(nodes[i]!, (node) => { - if (node.type === 'infix') { - return transform(node); - } - return node; - }); - } - return nodes; -} diff --git a/src/parser/plugins/set-attribute.ts b/src/parser/plugins/set-attribute.ts deleted file mode 100644 index a19e754e..00000000 --- a/src/parser/plugins/set-attribute.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { AiScriptSyntaxError } from '../../error.js'; -import type * as Cst from '../node.js'; - -export function setAttribute(node: Cst.Expression[]): Cst.Expression[] -export function setAttribute(node: Cst.Statement[]): Cst.Statement[] -export function setAttribute(node: (Cst.Statement | Cst.Expression)[]): (Cst.Statement | Cst.Expression)[] -export function setAttribute(node: Cst.Node[]): Cst.Node[] -export function setAttribute(nodes: Cst.Node[]): Cst.Node[] { - const result: Cst.Node[] = []; - const stockedAttrs: Cst.Attribute[] = []; - - for (const node of nodes) { - if (node.type === 'attr') { - stockedAttrs.push(node); - } else if (node.type === 'def') { - if (node.attr == null) { - node.attr = []; - } - node.attr.push(...stockedAttrs); - // clear all - stockedAttrs.splice(0, stockedAttrs.length); - if (node.expr.type === 'fn') { - node.expr.children = setAttribute(node.expr.children); - } - result.push(node); - } else { - if (stockedAttrs.length > 0) { - throw new AiScriptSyntaxError('invalid attribute.'); - } - switch (node.type) { - case 'fn': { - node.children = setAttribute(node.children); - break; - } - case 'block': { - node.statements = setAttribute(node.statements); - break; - } - } - result.push(node); - } - } - if (stockedAttrs.length > 0) { - throw new AiScriptSyntaxError('invalid attribute.'); - } - - return result; -} diff --git a/src/parser/plugins/transform-chain.ts b/src/parser/plugins/transform-chain.ts deleted file mode 100644 index 528b9258..00000000 --- a/src/parser/plugins/transform-chain.ts +++ /dev/null @@ -1,39 +0,0 @@ -import * as Cst from '../node.js'; -import { visitNode } from '../visit.js'; - -function transformNode(node: Cst.Node): Cst.Node { - // chain - if (Cst.isExpression(node) && Cst.hasChainProp(node) && node.chain != null) { - const { chain, ...hostNode } = node; - let parent: Cst.Expression = hostNode; - for (const item of chain) { - switch (item.type) { - case 'callChain': { - parent = Cst.CALL(parent, item.args, item.loc); - break; - } - case 'indexChain': { - parent = Cst.INDEX(parent, item.index, item.loc); - break; - } - case 'propChain': { - parent = Cst.PROP(parent, item.name, item.loc); - break; - } - default: { - break; - } - } - } - return parent; - } - - return node; -} - -export function transformChain(nodes: Cst.Node[]): Cst.Node[] { - for (let i = 0; i < nodes.length; i++) { - nodes[i] = visitNode(nodes[i]!, transformNode); - } - return nodes; -} diff --git a/src/parser/plugins/validate-keyword.ts b/src/parser/plugins/validate-keyword.ts index 3e9af586..1d7d5b10 100644 --- a/src/parser/plugins/validate-keyword.ts +++ b/src/parser/plugins/validate-keyword.ts @@ -1,6 +1,6 @@ import { AiScriptSyntaxError } from '../../error.js'; import { visitNode } from '../visit.js'; -import type * as Cst from '../node.js'; +import type * as Ast from '../../node.js'; const reservedWord = [ 'null', @@ -46,13 +46,12 @@ function throwReservedWordError(name: string): void { throw new AiScriptSyntaxError(`Reserved word "${name}" cannot be used as variable name.`); } -function validateNode(node: Cst.Node): Cst.Node { +function validateNode(node: Ast.Node): Ast.Node { switch (node.type) { case 'def': case 'attr': case 'ns': - case 'identifier': - case 'propChain': { + case 'identifier': { if (reservedWord.includes(node.name)) { throwReservedWordError(node.name); } @@ -77,7 +76,7 @@ function validateNode(node: Cst.Node): Cst.Node { return node; } -export function validateKeyword(nodes: Cst.Node[]): Cst.Node[] { +export function validateKeyword(nodes: Ast.Node[]): Ast.Node[] { for (const inner of nodes) { visitNode(inner, validateNode); } diff --git a/src/parser/plugins/validate-type.ts b/src/parser/plugins/validate-type.ts index 08d5addf..3dbc0cf3 100644 --- a/src/parser/plugins/validate-type.ts +++ b/src/parser/plugins/validate-type.ts @@ -1,8 +1,8 @@ import { getTypeBySource } from '../../type.js'; import { visitNode } from '../visit.js'; -import type * as Cst from '../node.js'; +import type * as Ast from '../../node.js'; -function validateNode(node: Cst.Node): Cst.Node { +function validateNode(node: Ast.Node): Ast.Node { switch (node.type) { case 'def': { if (node.varType != null) { @@ -26,7 +26,7 @@ function validateNode(node: Cst.Node): Cst.Node { return node; } -export function validateType(nodes: Cst.Node[]): Cst.Node[] { +export function validateType(nodes: Ast.Node[]): Ast.Node[] { for (const node of nodes) { visitNode(node, validateNode); } diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts new file mode 100644 index 00000000..67a5d93e --- /dev/null +++ b/src/parser/scanner.ts @@ -0,0 +1,608 @@ +import { AiScriptSyntaxError } from '../error.js'; +import { CharStream } from './streams/char-stream.js'; +import { TOKEN, TokenKind } from './token.js'; + +import type { ITokenStream } from './streams/token-stream.js'; +import type { Token } from './token.js'; + +const spaceChars = [' ', '\t']; +const lineBreakChars = ['\r', '\n']; +const digit = /^[0-9]$/; +const wordChar = /^[A-Za-z0-9_]$/; + +/** + * 入力文字列からトークンを読み取るクラス +*/ +export class Scanner implements ITokenStream { + private stream: CharStream; + private _tokens: Token[] = []; + + constructor(source: string) + constructor(stream: CharStream) + constructor(x: string | CharStream) { + if (typeof x === 'string') { + this.stream = new CharStream(x); + } else { + this.stream = x; + } + this._tokens.push(this.readToken()); + } + + /** + * カーソル位置にあるトークンを取得します。 + */ + public get token(): Token { + return this._tokens[0]!; + } + + /** + * カーソル位置にあるトークンの種類を取得します。 + */ + public get kind(): TokenKind { + return this.token.kind; + } + + /** + * カーソル位置を次のトークンへ進めます。 + */ + public next(): void { + // 現在のトークンがEOFだったら次のトークンに進まない + if (this._tokens[0]!.kind === TokenKind.EOF) { + return; + } + + this._tokens.shift(); + + if (this._tokens.length === 0) { + this._tokens.push(this.readToken()); + } + } + + /** + * トークンの先読みを行います。カーソル位置は移動されません。 + */ + public lookahead(offset: number): Token { + while (this._tokens.length <= offset) { + this._tokens.push(this.readToken()); + } + + return this._tokens[offset]!; + } + + /** + * カーソル位置にあるトークンが指定したトークンの種類と一致するかを確認します。 + * 一致しなかった場合には文法エラーを発生させます。 + */ + public expect(kind: TokenKind): void { + if (this.kind !== kind) { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.kind]}`); + } + } + + /** + * カーソル位置にあるトークンが指定したトークンの種類と一致することを確認し、 + * カーソル位置を次のトークンへ進めます。 + */ + public nextWith(kind: TokenKind): void { + this.expect(kind); + this.next(); + } + + private readToken(): Token { + let token; + let hasLeftSpacing = false; + + while (true) { + if (this.stream.eof) { + token = TOKEN(TokenKind.EOF, this.stream.getPos(), { hasLeftSpacing }); + break; + } + // skip spasing + if (spaceChars.includes(this.stream.char)) { + this.stream.next(); + hasLeftSpacing = true; + continue; + } + + // トークン位置を記憶 + const loc = this.stream.getPos(); + + if (lineBreakChars.includes(this.stream.char)) { + this.stream.next(); + token = TOKEN(TokenKind.NewLine, loc, { hasLeftSpacing }); + return token; + } + switch (this.stream.char) { + case '!': { + this.stream.next(); + if ((this.stream.char as string) === '=') { + this.stream.next(); + token = TOKEN(TokenKind.NotEq, loc, { hasLeftSpacing }); + } else { + token = TOKEN(TokenKind.Not, loc, { hasLeftSpacing }); + } + break; + } + case '"': + case '\'': { + token = this.readStringLiteral(hasLeftSpacing); + break; + } + case '#': { + this.stream.next(); + if ((this.stream.char as string) === '#') { + this.stream.next(); + if ((this.stream.char as string) === '#') { + this.stream.next(); + token = TOKEN(TokenKind.Sharp3, loc, { hasLeftSpacing }); + } + } else if ((this.stream.char as string) === '[') { + this.stream.next(); + token = TOKEN(TokenKind.OpenSharpBracket, loc, { hasLeftSpacing }); + } else { + throw new AiScriptSyntaxError('invalid character: "#"'); + } + break; + } + case '%': { + this.stream.next(); + token = TOKEN(TokenKind.Percent, loc, { hasLeftSpacing }); + break; + } + case '&': { + this.stream.next(); + if ((this.stream.char as string) === '&') { + this.stream.next(); + token = TOKEN(TokenKind.And2, loc, { hasLeftSpacing }); + } + break; + } + case '(': { + this.stream.next(); + token = TOKEN(TokenKind.OpenParen, loc, { hasLeftSpacing }); + break; + } + case ')': { + this.stream.next(); + token = TOKEN(TokenKind.CloseParen, loc, { hasLeftSpacing }); + break; + } + case '*': { + this.stream.next(); + token = TOKEN(TokenKind.Asterisk, loc, { hasLeftSpacing }); + break; + } + case '+': { + this.stream.next(); + if ((this.stream.char as string) === '=') { + this.stream.next(); + token = TOKEN(TokenKind.PlusEq, loc, { hasLeftSpacing }); + } else { + token = TOKEN(TokenKind.Plus, loc, { hasLeftSpacing }); + } + break; + } + case ',': { + this.stream.next(); + token = TOKEN(TokenKind.Comma, loc, { hasLeftSpacing }); + break; + } + case '-': { + this.stream.next(); + if ((this.stream.char as string) === '=') { + this.stream.next(); + token = TOKEN(TokenKind.MinusEq, loc, { hasLeftSpacing }); + } else { + token = TOKEN(TokenKind.Minus, loc, { hasLeftSpacing }); + } + break; + } + case '.': { + this.stream.next(); + token = TOKEN(TokenKind.Dot, loc, { hasLeftSpacing }); + break; + } + case '/': { + this.stream.next(); + if ((this.stream.char as string) === '*') { + this.stream.next(); + this.skipCommentRange(); + continue; + } else if ((this.stream.char as string) === '/') { + this.stream.next(); + this.skipCommentLine(); + continue; + } else { + token = TOKEN(TokenKind.Slash, loc, { hasLeftSpacing }); + } + break; + } + case ':': { + this.stream.next(); + if ((this.stream.char as string) === ':') { + this.stream.next(); + token = TOKEN(TokenKind.Colon2, loc, { hasLeftSpacing }); + } else { + token = TOKEN(TokenKind.Colon, loc, { hasLeftSpacing }); + } + break; + } + case ';': { + this.stream.next(); + token = TOKEN(TokenKind.SemiColon, loc, { hasLeftSpacing }); + break; + } + case '<': { + this.stream.next(); + if ((this.stream.char as string) === '=') { + this.stream.next(); + token = TOKEN(TokenKind.LtEq, loc, { hasLeftSpacing }); + } else if ((this.stream.char as string) === ':') { + this.stream.next(); + token = TOKEN(TokenKind.Out, loc, { hasLeftSpacing }); + } else { + token = TOKEN(TokenKind.Lt, loc, { hasLeftSpacing }); + } + break; + } + case '=': { + this.stream.next(); + if ((this.stream.char as string) === '=') { + this.stream.next(); + token = TOKEN(TokenKind.Eq2, loc, { hasLeftSpacing }); + } else if ((this.stream.char as string) === '>') { + this.stream.next(); + token = TOKEN(TokenKind.Arrow, loc, { hasLeftSpacing }); + } else { + token = TOKEN(TokenKind.Eq, loc, { hasLeftSpacing }); + } + break; + } + case '>': { + this.stream.next(); + if ((this.stream.char as string) === '=') { + this.stream.next(); + token = TOKEN(TokenKind.GtEq, loc, { hasLeftSpacing }); + } else { + token = TOKEN(TokenKind.Gt, loc, { hasLeftSpacing }); + } + break; + } + case '@': { + this.stream.next(); + token = TOKEN(TokenKind.At, loc, { hasLeftSpacing }); + break; + } + case '[': { + this.stream.next(); + token = TOKEN(TokenKind.OpenBracket, loc, { hasLeftSpacing }); + break; + } + case '\\': { + this.stream.next(); + token = TOKEN(TokenKind.BackSlash, loc, { hasLeftSpacing }); + break; + } + case ']': { + this.stream.next(); + token = TOKEN(TokenKind.CloseBracket, loc, { hasLeftSpacing }); + break; + } + case '^': { + this.stream.next(); + token = TOKEN(TokenKind.Hat, loc, { hasLeftSpacing }); + break; + } + case '`': { + token = this.readTemplate(hasLeftSpacing); + break; + } + case '{': { + this.stream.next(); + token = TOKEN(TokenKind.OpenBrace, loc, { hasLeftSpacing }); + break; + } + case '|': { + this.stream.next(); + if ((this.stream.char as string) === '|') { + this.stream.next(); + token = TOKEN(TokenKind.Or2, loc, { hasLeftSpacing }); + } + break; + } + case '}': { + this.stream.next(); + token = TOKEN(TokenKind.CloseBrace, loc, { hasLeftSpacing }); + break; + } + } + if (token == null) { + const digitToken = this.tryReadDigits(hasLeftSpacing); + if (digitToken) { + token = digitToken; + break; + } + const wordToken = this.tryReadWord(hasLeftSpacing); + if (wordToken) { + token = wordToken; + break; + } + throw new AiScriptSyntaxError(`invalid character: "${this.stream.char}"`); + } + break; + } + return token; + } + + private tryReadWord(hasLeftSpacing: boolean): Token | undefined { + // read a word + let value = ''; + + const loc = this.stream.getPos(); + + while (!this.stream.eof && wordChar.test(this.stream.char)) { + value += this.stream.char; + this.stream.next(); + } + if (value.length === 0) { + return; + } + // check word kind + switch (value) { + case 'null': { + return TOKEN(TokenKind.NullKeyword, loc, { hasLeftSpacing }); + } + case 'true': { + return TOKEN(TokenKind.TrueKeyword, loc, { hasLeftSpacing }); + } + case 'false': { + return TOKEN(TokenKind.FalseKeyword, loc, { hasLeftSpacing }); + } + case 'each': { + return TOKEN(TokenKind.EachKeyword, loc, { hasLeftSpacing }); + } + case 'for': { + return TOKEN(TokenKind.ForKeyword, loc, { hasLeftSpacing }); + } + case 'loop': { + return TOKEN(TokenKind.LoopKeyword, loc, { hasLeftSpacing }); + } + case 'break': { + return TOKEN(TokenKind.BreakKeyword, loc, { hasLeftSpacing }); + } + case 'continue': { + return TOKEN(TokenKind.ContinueKeyword, loc, { hasLeftSpacing }); + } + case 'match': { + return TOKEN(TokenKind.MatchKeyword, loc, { hasLeftSpacing }); + } + case 'case': { + return TOKEN(TokenKind.CaseKeyword, loc, { hasLeftSpacing }); + } + case 'default': { + return TOKEN(TokenKind.DefaultKeyword, loc, { hasLeftSpacing }); + } + case 'if': { + return TOKEN(TokenKind.IfKeyword, loc, { hasLeftSpacing }); + } + case 'elif': { + return TOKEN(TokenKind.ElifKeyword, loc, { hasLeftSpacing }); + } + case 'else': { + return TOKEN(TokenKind.ElseKeyword, loc, { hasLeftSpacing }); + } + case 'return': { + return TOKEN(TokenKind.ReturnKeyword, loc, { hasLeftSpacing }); + } + case 'eval': { + return TOKEN(TokenKind.EvalKeyword, loc, { hasLeftSpacing }); + } + case 'var': { + return TOKEN(TokenKind.VarKeyword, loc, { hasLeftSpacing }); + } + case 'let': { + return TOKEN(TokenKind.LetKeyword, loc, { hasLeftSpacing }); + } + case 'exists': { + return TOKEN(TokenKind.ExistsKeyword, loc, { hasLeftSpacing }); + } + default: { + return TOKEN(TokenKind.Identifier, loc, { hasLeftSpacing, value }); + } + } + } + + private tryReadDigits(hasLeftSpacing: boolean): Token | undefined { + let wholeNumber = ''; + let fractional = ''; + + const loc = this.stream.getPos(); + + while (!this.stream.eof && digit.test(this.stream.char)) { + wholeNumber += this.stream.char; + this.stream.next(); + } + if (wholeNumber.length === 0) { + return; + } + if (!this.stream.eof && this.stream.char === '.') { + this.stream.next(); + while (!this.stream.eof as boolean && digit.test(this.stream.char as string)) { + fractional += this.stream.char; + this.stream.next(); + } + if (fractional.length === 0) { + throw new AiScriptSyntaxError('digit expected'); + } + } + let value; + if (fractional.length > 0) { + value = wholeNumber + '.' + fractional; + } else { + value = wholeNumber; + } + return TOKEN(TokenKind.NumberLiteral, loc, { hasLeftSpacing, value }); + } + + private readStringLiteral(hasLeftSpacing: boolean): Token { + let value = ''; + const literalMark = this.stream.char; + let state: 'string' | 'escape' | 'finish' = 'string'; + + const loc = this.stream.getPos(); + this.stream.next(); + + while (state !== 'finish') { + switch (state) { + case 'string': { + if (this.stream.eof) { + throw new AiScriptSyntaxError('unexpected EOF'); + } + if (this.stream.char === '\\') { + this.stream.next(); + state = 'escape'; + break; + } + if (this.stream.char === literalMark) { + this.stream.next(); + state = 'finish'; + break; + } + value += this.stream.char; + this.stream.next(); + break; + } + case 'escape': { + if (this.stream.eof) { + throw new AiScriptSyntaxError('unexpected EOF'); + } + value += this.stream.char; + this.stream.next(); + state = 'string'; + break; + } + } + } + return TOKEN(TokenKind.StringLiteral, loc, { hasLeftSpacing, value }); + } + + private readTemplate(hasLeftSpacing: boolean): Token { + const elements: Token[] = []; + let buf = ''; + let tokenBuf: Token[] = []; + let state: 'string' | 'escape' | 'expr' | 'finish' = 'string'; + + const loc = this.stream.getPos(); + let elementLoc = loc; + this.stream.next(); + + while (state !== 'finish') { + switch (state) { + case 'string': { + // テンプレートの終了が無いままEOFに達した + if (this.stream.eof) { + throw new AiScriptSyntaxError('unexpected EOF'); + } + // エスケープ + if (this.stream.char === '\\') { + this.stream.next(); + state = 'escape'; + break; + } + // テンプレートの終了 + if (this.stream.char === '`') { + this.stream.next(); + if (buf.length > 0) { + elements.push(TOKEN(TokenKind.TemplateStringElement, elementLoc, { hasLeftSpacing, value: buf })); + } + state = 'finish'; + break; + } + // 埋め込み式の開始 + if (this.stream.char === '{') { + this.stream.next(); + if (buf.length > 0) { + elements.push(TOKEN(TokenKind.TemplateStringElement, elementLoc, { hasLeftSpacing, value: buf })); + buf = ''; + } + // ここから式エレメントになるので位置を更新 + elementLoc = this.stream.getPos(); + state = 'expr'; + break; + } + buf += this.stream.char; + this.stream.next(); + break; + } + case 'escape': { + // エスケープ対象の文字が無いままEOFに達した + if (this.stream.eof) { + throw new AiScriptSyntaxError('unexpected EOF'); + } + // 普通の文字として取り込み + buf += this.stream.char; + this.stream.next(); + // 通常の文字列に戻る + state = 'string'; + break; + } + case 'expr': { + // 埋め込み式の終端記号が無いままEOFに達した + if (this.stream.eof) { + throw new AiScriptSyntaxError('unexpected EOF'); + } + // skip spasing + if (spaceChars.includes(this.stream.char)) { + this.stream.next(); + continue; + } + // 埋め込み式の終了 + if ((this.stream.char as string) === '}') { + this.stream.next(); + elements.push(TOKEN(TokenKind.TemplateExprElement, elementLoc, { hasLeftSpacing, children: tokenBuf })); + tokenBuf = []; + // ここから文字列エレメントになるので位置を更新 + elementLoc = this.stream.getPos(); + state = 'string'; + break; + } + const token = this.readToken(); + tokenBuf.push(token); + break; + } + } + } + + return TOKEN(TokenKind.Template, loc, { hasLeftSpacing, children: elements }); + } + + private skipCommentLine(): void { + while (true) { + if (this.stream.eof) { + break; + } + if (this.stream.char === '\n') { + this.stream.next(); + break; + } + this.stream.next(); + } + } + + private skipCommentRange(): void { + while (true) { + if (this.stream.eof) { + break; + } + if (this.stream.char === '*') { + this.stream.next(); + if ((this.stream.char as string) === '/') { + this.stream.next(); + break; + } + continue; + } + this.stream.next(); + } + } +} diff --git a/src/parser/streams/char-stream.ts b/src/parser/streams/char-stream.ts new file mode 100644 index 00000000..58b36793 --- /dev/null +++ b/src/parser/streams/char-stream.ts @@ -0,0 +1,139 @@ +/** + * 入力文字列から文字を読み取るクラス +*/ +export class CharStream { + private pages: Map; + private firstPageIndex: number; + private lastPageIndex: number; + private pageIndex: number; + private address: number; + private _char?: string; + /** zero-based number */ + private line: number; + /** zero-based number */ + private column: number; + + constructor(source: string, opts?: { line?: number, column?: number }) { + this.pages = new Map(); + this.pages.set(0, source); + this.firstPageIndex = 0; + this.lastPageIndex = 0; + this.pageIndex = 0; + this.address = 0; + this.line = opts?.line ?? 0; + this.column = opts?.column ?? 0; + this.moveNext(); + } + + /** + * ストリームの終わりに達しているかどうかを取得します。 + */ + public get eof(): boolean { + return this.endOfPage && this.isLastPage; + } + + /** + * カーソル位置にある文字を取得します。 + */ + public get char(): string { + if (this.eof) { + throw new Error('end of stream'); + } + return this._char!; + } + + /** + * カーソル位置に対応するソースコード上の行番号と列番号を取得します。 + */ + public getPos(): { line: number, column: number } { + return { + line: (this.line + 1), + column: (this.column + 1), + }; + } + + /** + * カーソル位置を次の文字へ進めます。 + */ + public next(): void { + if (!this.eof && this._char === '\n') { + this.line++; + this.column = 0; + } else { + this.column++; + } + this.incAddr(); + this.moveNext(); + } + + /** + * カーソル位置を前の文字へ戻します。 + */ + public prev(): void { + this.decAddr(); + this.movePrev(); + } + + private get isFirstPage(): boolean { + return (this.pageIndex <= this.firstPageIndex); + } + + private get isLastPage(): boolean { + return (this.pageIndex >= this.lastPageIndex); + } + + private get endOfPage(): boolean { + const page = this.pages.get(this.pageIndex)!; + return (this.address >= page.length); + } + + private moveNext(): void { + this.loadChar(); + while (true) { + if (!this.eof && this._char === '\r') { + this.incAddr(); + this.loadChar(); + continue; + } + break; + } + } + + private incAddr(): void { + if (!this.endOfPage) { + this.address++; + } else if (!this.isLastPage) { + this.pageIndex++; + this.address = 0; + } + } + + private movePrev(): void { + this.loadChar(); + while (true) { + if (!this.eof && this._char === '\r') { + this.decAddr(); + this.loadChar(); + continue; + } + break; + } + } + + private decAddr(): void { + if (this.address > 0) { + this.address--; + } else if (!this.isFirstPage) { + this.pageIndex--; + this.address = this.pages.get(this.pageIndex)!.length - 1; + } + } + + private loadChar(): void { + if (this.eof) { + this._char = undefined; + } else { + this._char = this.pages.get(this.pageIndex)![this.address]!; + } + } +} diff --git a/src/parser/streams/token-stream.ts b/src/parser/streams/token-stream.ts new file mode 100644 index 00000000..3dae2a2d --- /dev/null +++ b/src/parser/streams/token-stream.ts @@ -0,0 +1,124 @@ +import { AiScriptSyntaxError } from '../../error.js'; +import { TOKEN, TokenKind } from '../token.js'; +import type { Token } from '../token.js'; + +/** + * トークンの読み取りに関するインターフェース +*/ +export interface ITokenStream { + /** + * カーソル位置にあるトークンを取得します。 + */ + get token(): Token; + + /** + * カーソル位置にあるトークンの種類を取得します。 + */ + get kind(): TokenKind; + + /** + * カーソル位置を次のトークンへ進めます。 + */ + next(): void; + + /** + * トークンの先読みを行います。カーソル位置は移動されません。 + */ + lookahead(offset: number): Token; + + /** + * カーソル位置にあるトークンが指定したトークンの種類と一致するかを確認します。 + * 一致しなかった場合には文法エラーを発生させます。 + */ + expect(kind: TokenKind): void; + + /** + * カーソル位置にあるトークンが指定したトークンの種類と一致することを確認し、 + * カーソル位置を次のトークンへ進めます。 + */ + nextWith(kind: TokenKind): void; +} + +/** + * トークン列からトークンを読み取るクラス +*/ +export class TokenStream implements ITokenStream { + private source: Token[]; + private index: number; + private _token: Token; + + constructor(source: TokenStream['source']) { + this.source = source; + this.index = 0; + this.load(); + } + + private get eof(): boolean { + return (this.index >= this.source.length); + } + + /** + * カーソル位置にあるトークンを取得します。 + */ + public get token(): Token { + if (this.eof) { + return TOKEN(TokenKind.EOF, { line: -1, column: -1 }); + } + return this._token; + } + + /** + * カーソル位置にあるトークンの種類を取得します。 + */ + public get kind(): TokenKind { + return this.token.kind; + } + + /** + * カーソル位置を次のトークンへ進めます。 + */ + public next(): void { + if (!this.eof) { + this.index++; + } + this.load(); + } + + /** + * トークンの先読みを行います。カーソル位置は移動されません。 + */ + public lookahead(offset: number): Token { + if (this.index + offset < this.source.length) { + return this.source[this.index + offset]!; + } else { + return TOKEN(TokenKind.EOF, { line: -1, column: -1 }); + } + } + + /** + * カーソル位置にあるトークンが指定したトークンの種類と一致するかを確認します。 + * 一致しなかった場合には文法エラーを発生させます。 + */ + public expect(kind: TokenKind): void { + if (this.kind !== kind) { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[this.kind]}`); + } + } + + /** + * カーソル位置にあるトークンが指定したトークンの種類と一致することを確認し、 + * カーソル位置を次のトークンへ進めます。 + */ + public nextWith(kind: TokenKind): void { + this.expect(kind); + this.next(); + } + + private load(): void { + if (this.eof) { + this._token = TOKEN(TokenKind.EOF, { line: -1, column: -1 }); + } else { + this._token = this.source[this.index]!; + } + } +} diff --git a/src/parser/syntaxes/common.ts b/src/parser/syntaxes/common.ts new file mode 100644 index 00000000..311b4f85 --- /dev/null +++ b/src/parser/syntaxes/common.ts @@ -0,0 +1,142 @@ +import { TokenKind } from '../token.js'; +import { AiScriptSyntaxError } from '../../error.js'; +import { NODE } from '../utils.js'; +import { parseStatement } from './statements.js'; + +import type { ITokenStream } from '../streams/token-stream.js'; +import type * as Ast from '../../node.js'; + +/** + * ```abnf + * Params = "(" [IDENT *(("," / SPACE) IDENT)] ")" + * ``` +*/ +export function parseParams(s: ITokenStream): { name: string, argType?: Ast.Node }[] { + const items: { name: string, argType?: Ast.Node }[] = []; + + s.nextWith(TokenKind.OpenParen); + + while (s.kind !== TokenKind.CloseParen) { + // separator + if (items.length > 0) { + if (s.kind === TokenKind.Comma) { + s.next(); + } else if (!s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('separator expected'); + } + } + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + let type; + if ((s.kind as TokenKind) === TokenKind.Colon) { + s.next(); + type = parseType(s); + } + + items.push({ name, argType: type }); + } + + s.nextWith(TokenKind.CloseParen); + + return items; +} + +/** + * ```abnf + * Block = "{" *Statement "}" + * ``` +*/ +export function parseBlock(s: ITokenStream): Ast.Node[] { + s.nextWith(TokenKind.OpenBrace); + + while (s.kind === TokenKind.NewLine) { + s.next(); + } + + const steps: Ast.Node[] = []; + while (s.kind !== TokenKind.CloseBrace) { + steps.push(parseStatement(s)); + + if ((s.kind as TokenKind) !== TokenKind.NewLine && (s.kind as TokenKind) !== TokenKind.CloseBrace) { + throw new AiScriptSyntaxError('Multiple statements cannot be placed on a single line.'); + } + while ((s.kind as TokenKind) === TokenKind.NewLine) { + s.next(); + } + } + + s.nextWith(TokenKind.CloseBrace); + + return steps; +} + +//#region Type + +export function parseType(s: ITokenStream): Ast.Node { + if (s.kind === TokenKind.At) { + return parseFnType(s); + } else { + return parseNamedType(s); + } +} + +/** + * ```abnf + * FnType = "@" "(" ParamTypes ")" "=>" Type + * ParamTypes = [Type *(("," / SPACE) Type)] + * ``` +*/ +function parseFnType(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + s.nextWith(TokenKind.At); + s.nextWith(TokenKind.OpenParen); + + const params: Ast.Node[] = []; + while (s.kind !== TokenKind.CloseParen) { + if (params.length > 0) { + if (s.kind === TokenKind.Comma) { + s.next(); + } else if (!s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('separator expected'); + } + } + const type = parseType(s); + params.push(type); + } + + s.nextWith(TokenKind.CloseParen); + s.nextWith(TokenKind.Arrow); + + const resultType = parseType(s); + + return NODE('fnTypeSource', { args: params, result: resultType }, loc); +} + +/** + * ```abnf + * NamedType = IDENT ["<" Type ">"] + * ``` +*/ +function parseNamedType(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + // inner type + let inner = null; + if (s.kind === TokenKind.Lt) { + s.next(); + inner = parseType(s); + s.nextWith(TokenKind.Gt); + } + + return NODE('namedTypeSource', { name, inner }, loc); +} + +//#endregion Type diff --git a/src/parser/syntaxes/expressions.ts b/src/parser/syntaxes/expressions.ts new file mode 100644 index 00000000..7aafacf7 --- /dev/null +++ b/src/parser/syntaxes/expressions.ts @@ -0,0 +1,623 @@ +import { AiScriptSyntaxError } from '../../error.js'; +import { CALL_NODE, NODE } from '../utils.js'; +import { TokenStream } from '../streams/token-stream.js'; +import { TokenKind } from '../token.js'; +import { parseBlock, parseParams, parseType } from './common.js'; +import { parseBlockOrStatement } from './statements.js'; + +import type * as Ast from '../../node.js'; +import type { ITokenStream } from '../streams/token-stream.js'; + +export function parseExpr(s: ITokenStream, isStatic: boolean): Ast.Node { + if (isStatic) { + return parseAtom(s, true); + } else { + return parsePratt(s, 0); + } +} + +// NOTE: infix(中置演算子)ではlbpを大きくすると右結合、rbpを大きくすると左結合の演算子になります。 +// この値は演算子が左と右に対してどのくらい結合力があるかを表わしています。詳細はpratt parsingの説明ページを参照してください。 + +const operators: OpInfo[] = [ + { opKind: 'postfix', kind: TokenKind.OpenParen, bp: 20 }, + { opKind: 'postfix', kind: TokenKind.OpenBracket, bp: 20 }, + + { opKind: 'infix', kind: TokenKind.Dot, lbp: 18, rbp: 19 }, + + { opKind: 'infix', kind: TokenKind.Hat, lbp: 17, rbp: 16 }, + + { opKind: 'prefix', kind: TokenKind.Plus, bp: 14 }, + { opKind: 'prefix', kind: TokenKind.Minus, bp: 14 }, + { opKind: 'prefix', kind: TokenKind.Not, bp: 14 }, + + { opKind: 'infix', kind: TokenKind.Asterisk, lbp: 12, rbp: 13 }, + { opKind: 'infix', kind: TokenKind.Slash, lbp: 12, rbp: 13 }, + { opKind: 'infix', kind: TokenKind.Percent, lbp: 12, rbp: 13 }, + + { opKind: 'infix', kind: TokenKind.Plus, lbp: 10, rbp: 11 }, + { opKind: 'infix', kind: TokenKind.Minus, lbp: 10, rbp: 11 }, + + { opKind: 'infix', kind: TokenKind.Lt, lbp: 8, rbp: 9 }, + { opKind: 'infix', kind: TokenKind.LtEq, lbp: 8, rbp: 9 }, + { opKind: 'infix', kind: TokenKind.Gt, lbp: 8, rbp: 9 }, + { opKind: 'infix', kind: TokenKind.GtEq, lbp: 8, rbp: 9 }, + + { opKind: 'infix', kind: TokenKind.Eq2, lbp: 6, rbp: 7 }, + { opKind: 'infix', kind: TokenKind.NotEq, lbp: 6, rbp: 7 }, + + { opKind: 'infix', kind: TokenKind.And2, lbp: 4, rbp: 5 }, + + { opKind: 'infix', kind: TokenKind.Or2, lbp: 2, rbp: 3 }, +]; + +function parsePrefix(s: ITokenStream, minBp: number): Ast.Node { + const loc = s.token.loc; + const op = s.kind; + s.next(); + + // 改行のエスケープ + if (s.kind === TokenKind.BackSlash) { + s.next(); + s.nextWith(TokenKind.NewLine); + } + + const expr = parsePratt(s, minBp); + + switch (op) { + case TokenKind.Plus: { + // 数値リテラル以外は非サポート + if (expr.type === 'num') { + return NODE('num', { value: expr.value }, loc); + } else { + throw new AiScriptSyntaxError('currently, sign is only supported for number literal.'); + } + // TODO: 将来的にサポートされる式を拡張 + // return NODE('plus', { expr }, loc); + } + case TokenKind.Minus: { + // 数値リテラル以外は非サポート + if (expr.type === 'num') { + return NODE('num', { value: -1 * expr.value }, loc); + } else { + throw new AiScriptSyntaxError('currently, sign is only supported for number literal.'); + } + // TODO: 将来的にサポートされる式を拡張 + // return NODE('minus', { expr }, loc); + } + case TokenKind.Not: { + return NODE('not', { expr }, loc); + } + default: { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[op]}`); + } + } +} + +function parseInfix(s: ITokenStream, left: Ast.Node, minBp: number): Ast.Node { + const loc = s.token.loc; + const op = s.kind; + s.next(); + + // 改行のエスケープ + if (s.kind === TokenKind.BackSlash) { + s.next(); + s.nextWith(TokenKind.NewLine); + } + + if (op === TokenKind.Dot) { + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + return NODE('prop', { + target: left, + name, + }, loc); + } else { + const right = parsePratt(s, minBp); + + switch (op) { + case TokenKind.Hat: { + return CALL_NODE('Core:pow', [left, right], loc); + } + case TokenKind.Asterisk: { + return CALL_NODE('Core:mul', [left, right], loc); + } + case TokenKind.Slash: { + return CALL_NODE('Core:div', [left, right], loc); + } + case TokenKind.Percent: { + return CALL_NODE('Core:mod', [left, right], loc); + } + case TokenKind.Plus: { + return CALL_NODE('Core:add', [left, right], loc); + } + case TokenKind.Minus: { + return CALL_NODE('Core:sub', [left, right], loc); + } + case TokenKind.Lt: { + return CALL_NODE('Core:lt', [left, right], loc); + } + case TokenKind.LtEq: { + return CALL_NODE('Core:lteq', [left, right], loc); + } + case TokenKind.Gt: { + return CALL_NODE('Core:gt', [left, right], loc); + } + case TokenKind.GtEq: { + return CALL_NODE('Core:gteq', [left, right], loc); + } + case TokenKind.Eq2: { + return CALL_NODE('Core:eq', [left, right], loc); + } + case TokenKind.NotEq: { + return CALL_NODE('Core:neq', [left, right], loc); + } + case TokenKind.And2: { + return NODE('and', { left, right }, loc); + } + case TokenKind.Or2: { + return NODE('or', { left, right }, loc); + } + default: { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[op]}`); + } + } + } +} + +function parsePostfix(s: ITokenStream, expr: Ast.Node): Ast.Node { + const loc = s.token.loc; + const op = s.kind; + + switch (op) { + case TokenKind.OpenParen: { + return parseCall(s, expr); + } + case TokenKind.OpenBracket: { + s.next(); + const index = parseExpr(s, false); + s.nextWith(TokenKind.CloseBracket); + + return NODE('index', { + target: expr, + index, + }, loc); + } + default: { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[op]}`); + } + } +} + +function parseAtom(s: ITokenStream, isStatic: boolean): Ast.Node { + const loc = s.token.loc; + + switch (s.kind) { + case TokenKind.IfKeyword: { + if (isStatic) break; + return parseIf(s); + } + case TokenKind.At: { + if (isStatic) break; + return parseFnExpr(s); + } + case TokenKind.MatchKeyword: { + if (isStatic) break; + return parseMatch(s); + } + case TokenKind.EvalKeyword: { + if (isStatic) break; + return parseEval(s); + } + case TokenKind.ExistsKeyword: { + if (isStatic) break; + return parseExists(s); + } + case TokenKind.Template: { + const values: (string | Ast.Node)[] = []; + + if (isStatic) break; + + for (const element of s.token.children!) { + switch (element.kind) { + case TokenKind.TemplateStringElement: { + values.push(NODE('str', { value: element.value! }, element.loc)); + break; + } + case TokenKind.TemplateExprElement: { + // スキャナで埋め込み式として事前に読み取っておいたトークン列をパースする + const exprStream = new TokenStream(element.children!); + const expr = parseExpr(exprStream, false); + if (exprStream.kind !== TokenKind.EOF) { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[exprStream.token.kind]}`); + } + values.push(expr); + break; + } + default: { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[element.kind]}`); + } + } + } + + s.next(); + return NODE('tmpl', { tmpl: values }, loc); + } + case TokenKind.StringLiteral: { + const value = s.token.value!; + s.next(); + return NODE('str', { value }, loc); + } + case TokenKind.NumberLiteral: { + // TODO: validate number value + const value = Number(s.token.value!); + s.next(); + return NODE('num', { value }, loc); + } + case TokenKind.TrueKeyword: + case TokenKind.FalseKeyword: { + const value = (s.kind === TokenKind.TrueKeyword); + s.next(); + return NODE('bool', { value }, loc); + } + case TokenKind.NullKeyword: { + s.next(); + return NODE('null', { }, loc); + } + case TokenKind.OpenBrace: { + return parseObject(s, isStatic); + } + case TokenKind.OpenBracket: { + return parseArray(s, isStatic); + } + case TokenKind.Identifier: { + if (isStatic) break; + return parseReference(s); + } + case TokenKind.OpenParen: { + s.next(); + const expr = parseExpr(s, isStatic); + s.nextWith(TokenKind.CloseParen); + return expr; + } + } + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[s.kind]}`); +} + +/** + * Call = "(" [Expr *(("," / SPACE) Expr)] ")" +*/ +function parseCall(s: ITokenStream, target: Ast.Node): Ast.Node { + const loc = s.token.loc; + const items: Ast.Node[] = []; + + s.nextWith(TokenKind.OpenParen); + + while (s.kind !== TokenKind.CloseParen) { + // separator + if (items.length > 0) { + if (s.kind === TokenKind.Comma) { + s.next(); + } else if (!s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('separator expected'); + } + } + + items.push(parseExpr(s, false)); + } + + s.nextWith(TokenKind.CloseParen); + + return NODE('call', { + target, + args: items, + }, loc); +} + +/** + * ```abnf + * If = "if" Expr BlockOrStatement *("elif" Expr BlockOrStatement) ["else" BlockOrStatement] + * ``` +*/ +function parseIf(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + s.nextWith(TokenKind.IfKeyword); + const cond = parseExpr(s, false); + const then = parseBlockOrStatement(s); + + if (s.kind === TokenKind.NewLine && [TokenKind.ElifKeyword, TokenKind.ElseKeyword].includes(s.lookahead(1).kind)) { + s.next(); + } + + const elseif: { cond: Ast.Node, then: Ast.Node }[] = []; + while (s.kind === TokenKind.ElifKeyword) { + s.next(); + const elifCond = parseExpr(s, false); + const elifThen = parseBlockOrStatement(s); + if ((s.kind as TokenKind) === TokenKind.NewLine && [TokenKind.ElifKeyword, TokenKind.ElseKeyword].includes(s.lookahead(1).kind)) { + s.next(); + } + elseif.push({ cond: elifCond, then: elifThen }); + } + + let _else = undefined; + if (s.kind === TokenKind.ElseKeyword) { + s.next(); + _else = parseBlockOrStatement(s); + } + + return NODE('if', { cond, then, elseif, else: _else }, loc); +} + +/** + * ```abnf + * FnExpr = "@" Params [":" Type] Block + * ``` +*/ +function parseFnExpr(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + s.nextWith(TokenKind.At); + + const params = parseParams(s); + + let type; + if ((s.kind as TokenKind) === TokenKind.Colon) { + s.next(); + type = parseType(s); + } + + const body = parseBlock(s); + + return NODE('fn', { args: params, retType: type, children: body }, loc); +} + +/** + * ```abnf + * Match = "match" Expr "{" *("case" Expr "=>" BlockOrStatement) ["default" "=>" BlockOrStatement] "}" + * ``` +*/ +function parseMatch(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + s.nextWith(TokenKind.MatchKeyword); + const about = parseExpr(s, false); + + s.nextWith(TokenKind.OpenBrace); + s.nextWith(TokenKind.NewLine); + + const qs: { q: Ast.Node, a: Ast.Node }[] = []; + while (s.kind !== TokenKind.DefaultKeyword && s.kind !== TokenKind.CloseBrace) { + s.nextWith(TokenKind.CaseKeyword); + const q = parseExpr(s, false); + s.nextWith(TokenKind.Arrow); + const a = parseBlockOrStatement(s); + s.nextWith(TokenKind.NewLine); + qs.push({ q, a }); + } + + let x; + if (s.kind === TokenKind.DefaultKeyword) { + s.next(); + s.nextWith(TokenKind.Arrow); + x = parseBlockOrStatement(s); + s.nextWith(TokenKind.NewLine); + } + + s.nextWith(TokenKind.CloseBrace); + + return NODE('match', { about, qs, default: x }, loc); +} + +/** + * ```abnf + * Eval = "eval" Block + * ``` +*/ +function parseEval(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + s.nextWith(TokenKind.EvalKeyword); + const statements = parseBlock(s); + return NODE('block', { statements }, loc); +} + +/** + * ```abnf + * Exists = "exists" Reference + * ``` +*/ +function parseExists(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + s.nextWith(TokenKind.ExistsKeyword); + const identifier = parseReference(s); + return NODE('exists', { identifier }, loc); +} + +/** + * ```abnf + * Reference = IDENT *(":" IDENT) + * ``` +*/ +function parseReference(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + const segs: string[] = []; + while (true) { + if (segs.length > 0) { + if (s.kind === TokenKind.Colon) { + if (s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('Cannot use spaces in a reference.'); + } + s.next(); + if (s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('Cannot use spaces in a reference.'); + } + } else { + break; + } + } + s.expect(TokenKind.Identifier); + segs.push(s.token.value!); + s.next(); + } + return NODE('identifier', { name: segs.join(':') }, loc); +} + +/** + * ```abnf + * Object = "{" [IDENT ":" Expr *(("," / ";" / SPACE) IDENT ":" Expr) ["," / ";"]] "}" + * ``` +*/ +function parseObject(s: ITokenStream, isStatic: boolean): Ast.Node { + const loc = s.token.loc; + + s.nextWith(TokenKind.OpenBrace); + + if (s.kind === TokenKind.NewLine) { + s.next(); + } + + const map = new Map(); + while (s.kind !== TokenKind.CloseBrace) { + s.expect(TokenKind.Identifier); + const k = s.token.value!; + s.next(); + + s.nextWith(TokenKind.Colon); + + const v = parseExpr(s, isStatic); + + map.set(k, v); + + // separator + if ((s.kind as TokenKind) === TokenKind.CloseBrace) { + break; + } else if (s.kind === TokenKind.Comma) { + s.next(); + } else if (s.kind === TokenKind.SemiColon) { + s.next(); + } else if (s.kind === TokenKind.NewLine) { + // noop + } else { + if (!s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('separator expected'); + } + } + + if (s.kind === TokenKind.NewLine) { + s.next(); + } + } + + s.nextWith(TokenKind.CloseBrace); + + return NODE('obj', { value: map }, loc); +} + +/** + * ```abnf + * Array = "[" [Expr *(("," / SPACE) Expr) [","]] "]" + * ``` +*/ +function parseArray(s: ITokenStream, isStatic: boolean): Ast.Node { + const loc = s.token.loc; + + s.nextWith(TokenKind.OpenBracket); + + if (s.kind === TokenKind.NewLine) { + s.next(); + } + + const value = []; + while (s.kind !== TokenKind.CloseBracket) { + value.push(parseExpr(s, isStatic)); + + // separator + if ((s.kind as TokenKind) === TokenKind.CloseBracket) { + break; + } else if (s.kind === TokenKind.Comma) { + s.next(); + } else if (s.kind === TokenKind.NewLine) { + // noop + } else { + if (!s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('separator expected'); + } + } + + if (s.kind === TokenKind.NewLine) { + s.next(); + } + } + + s.nextWith(TokenKind.CloseBracket); + + return NODE('arr', { value }, loc); +} + +//#region Pratt parsing + +type PrefixInfo = { opKind: 'prefix', kind: TokenKind, bp: number }; +type InfixInfo = { opKind: 'infix', kind: TokenKind, lbp: number, rbp: number }; +type PostfixInfo = { opKind: 'postfix', kind: TokenKind, bp: number }; +type OpInfo = PrefixInfo | InfixInfo | PostfixInfo; + +function parsePratt(s: ITokenStream, minBp: number): Ast.Node { + // pratt parsing + // https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html + + let left: Ast.Node; + + const tokenKind = s.kind; + const prefix = operators.find((x): x is PrefixInfo => x.opKind === 'prefix' && x.kind === tokenKind); + if (prefix != null) { + left = parsePrefix(s, prefix.bp); + } else { + left = parseAtom(s, false); + } + + while (true) { + // 改行のエスケープ + if (s.kind === TokenKind.BackSlash) { + s.next(); + s.nextWith(TokenKind.NewLine); + } + + const tokenKind = s.kind; + + const postfix = operators.find((x): x is PostfixInfo => x.opKind === 'postfix' && x.kind === tokenKind); + if (postfix != null) { + if (postfix.bp < minBp) { + break; + } + + if ([TokenKind.OpenBracket, TokenKind.OpenParen].includes(tokenKind) && s.token.hasLeftSpacing) { + // 前にスペースがある場合は後置演算子として処理しない + } else { + left = parsePostfix(s, left); + continue; + } + } + + const infix = operators.find((x): x is InfixInfo => x.opKind === 'infix' && x.kind === tokenKind); + if (infix != null) { + if (infix.lbp < minBp) { + break; + } + + left = parseInfix(s, left, infix.rbp); + continue; + } + + break; + } + + return left; +} + +//#endregion Pratt parsing diff --git a/src/parser/syntaxes/statements.ts b/src/parser/syntaxes/statements.ts new file mode 100644 index 00000000..f36ba94d --- /dev/null +++ b/src/parser/syntaxes/statements.ts @@ -0,0 +1,408 @@ +import { AiScriptSyntaxError } from '../../error.js'; +import { CALL_NODE, NODE } from '../utils.js'; +import { TokenKind } from '../token.js'; +import { parseBlock, parseParams, parseType } from './common.js'; +import { parseExpr } from './expressions.js'; + +import type * as Ast from '../../node.js'; +import type { ITokenStream } from '../streams/token-stream.js'; + +/** + * ```abnf + * Statement = VarDef / FnDef / Out / Return / Attr / Each / For / Loop + * / Break / Continue / Assign / Expr + * ``` +*/ +export function parseStatement(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + switch (s.kind) { + case TokenKind.VarKeyword: + case TokenKind.LetKeyword: { + return parseVarDef(s); + } + case TokenKind.At: { + if (s.lookahead(1).kind === TokenKind.Identifier) { + return parseFnDef(s); + } + break; + } + case TokenKind.Out: { + return parseOut(s); + } + case TokenKind.ReturnKeyword: { + return parseReturn(s); + } + case TokenKind.OpenSharpBracket: { + return parseStatementWithAttr(s); + } + case TokenKind.EachKeyword: { + return parseEach(s); + } + case TokenKind.ForKeyword: { + return parseFor(s); + } + case TokenKind.LoopKeyword: { + return parseLoop(s); + } + case TokenKind.BreakKeyword: { + s.next(); + return NODE('break', {}, loc); + } + case TokenKind.ContinueKeyword: { + s.next(); + return NODE('continue', {}, loc); + } + } + const expr = parseExpr(s, false); + const assign = tryParseAssign(s, expr); + if (assign) { + return assign; + } + return expr; +} + +export function parseDefStatement(s: ITokenStream): Ast.Node { + switch (s.kind) { + case TokenKind.VarKeyword: + case TokenKind.LetKeyword: { + return parseVarDef(s); + } + case TokenKind.At: { + return parseFnDef(s); + } + default: { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[s.kind]}`); + } + } +} + +/** + * ```abnf + * BlockOrStatement = Block / Statement + * ``` +*/ +export function parseBlockOrStatement(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + if (s.kind === TokenKind.OpenBrace) { + const statements = parseBlock(s); + return NODE('block', { statements }, loc); + } else { + return parseStatement(s); + } +} + +/** + * ```abnf + * VarDef = ("let" / "var") IDENT [":" Type] "=" Expr + * ``` +*/ +function parseVarDef(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + let mut; + switch (s.kind) { + case TokenKind.LetKeyword: { + mut = false; + break; + } + case TokenKind.VarKeyword: { + mut = true; + break; + } + default: { + throw new AiScriptSyntaxError(`unexpected token: ${TokenKind[s.kind]}`); + } + } + s.next(); + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + let type; + if ((s.kind as TokenKind) === TokenKind.Colon) { + s.next(); + type = parseType(s); + } + + s.nextWith(TokenKind.Eq); + + if ((s.kind as TokenKind) === TokenKind.NewLine) { + s.next(); + } + + const expr = parseExpr(s, false); + + return NODE('def', { name, varType: type, expr, mut, attr: [] }, loc); +} + +/** + * ```abnf + * FnDef = "@" IDENT Params [":" Type] Block + * ``` +*/ +function parseFnDef(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + s.nextWith(TokenKind.At); + + s.expect(TokenKind.Identifier); + const name = s.token.value; + s.next(); + + const params = parseParams(s); + + let type; + if ((s.kind as TokenKind) === TokenKind.Colon) { + s.next(); + type = parseType(s); + } + + const body = parseBlock(s); + + return NODE('def', { + name, + expr: NODE('fn', { + args: params, + retType: type, + children: body, + }, loc), + mut: false, + attr: [], + }, loc); +} + +/** + * ```abnf + * Out = "<:" Expr + * ``` +*/ +function parseOut(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + s.nextWith(TokenKind.Out); + const expr = parseExpr(s, false); + return CALL_NODE('print', [expr], loc); +} + +/** + * ```abnf + * Each = "each" "let" IDENT ("," / SPACE) Expr BlockOrStatement + * / "each" "(" "let" IDENT ("," / SPACE) Expr ")" BlockOrStatement + * ``` +*/ +function parseEach(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + let hasParen = false; + + s.nextWith(TokenKind.EachKeyword); + + if (s.kind === TokenKind.OpenParen) { + hasParen = true; + s.next(); + } + + s.nextWith(TokenKind.LetKeyword); + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + if (s.kind === TokenKind.Comma) { + s.next(); + } else if (!s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('separator expected'); + } + + const items = parseExpr(s, false); + + if (hasParen) { + s.nextWith(TokenKind.CloseParen); + } + + const body = parseBlockOrStatement(s); + + return NODE('each', { + var: name, + items: items, + for: body, + }, loc); +} + +function parseFor(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + let hasParen = false; + + s.nextWith(TokenKind.ForKeyword); + + if (s.kind === TokenKind.OpenParen) { + hasParen = true; + s.next(); + } + + if (s.kind === TokenKind.LetKeyword) { + // range syntax + s.next(); + + const identLoc = s.token.loc; + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + let _from; + if ((s.kind as TokenKind) === TokenKind.Eq) { + s.next(); + _from = parseExpr(s, false); + } else { + _from = NODE('num', { value: 0 }, identLoc); + } + + if ((s.kind as TokenKind) === TokenKind.Comma) { + s.next(); + } else if (!s.token.hasLeftSpacing) { + throw new AiScriptSyntaxError('separator expected'); + } + + const to = parseExpr(s, false); + + if (hasParen) { + s.nextWith(TokenKind.CloseParen); + } + + const body = parseBlockOrStatement(s); + + return NODE('for', { + var: name, + from: _from, + to, + for: body, + }, loc); + } else { + // times syntax + + const times = parseExpr(s, false); + + if (hasParen) { + s.nextWith(TokenKind.CloseParen); + } + + const body = parseBlockOrStatement(s); + + return NODE('for', { + times, + for: body, + }, loc); + } +} + +/** + * ```abnf + * Return = "return" Expr + * ``` +*/ +function parseReturn(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + s.nextWith(TokenKind.ReturnKeyword); + const expr = parseExpr(s, false); + return NODE('return', { expr }, loc); +} + +/** + * ```abnf + * StatementWithAttr = *Attr Statement + * ``` +*/ +function parseStatementWithAttr(s: ITokenStream): Ast.Node { + const attrs: Ast.Attribute[] = []; + while (s.kind === TokenKind.OpenSharpBracket) { + attrs.push(parseAttr(s) as Ast.Attribute); + s.nextWith(TokenKind.NewLine); + } + + const statement = parseStatement(s); + + if (statement.type !== 'def') { + throw new AiScriptSyntaxError('invalid attribute.'); + } + if (statement.attr != null) { + statement.attr.push(...attrs); + } else { + statement.attr = attrs; + } + + return statement; +} + +/** + * ```abnf + * Attr = "#[" IDENT [StaticExpr] "]" + * ``` +*/ +function parseAttr(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + s.nextWith(TokenKind.OpenSharpBracket); + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + let value; + if (s.kind !== TokenKind.CloseBracket) { + value = parseExpr(s, true); + } else { + value = NODE('bool', { value: true }, loc); + } + + s.nextWith(TokenKind.CloseBracket); + + return NODE('attr', { name, value }, loc); +} + +/** + * ```abnf + * Loop = "loop" Block + * ``` +*/ +function parseLoop(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + s.nextWith(TokenKind.LoopKeyword); + const statements = parseBlock(s); + return NODE('loop', { statements }, loc); +} + +/** + * ```abnf + * Assign = Expr ("=" / "+=" / "-=") Expr + * ``` +*/ +function tryParseAssign(s: ITokenStream, dest: Ast.Node): Ast.Node | undefined { + const loc = s.token.loc; + + // Assign + switch (s.kind) { + case TokenKind.Eq: { + s.next(); + const expr = parseExpr(s, false); + return NODE('assign', { dest, expr }, loc); + } + case TokenKind.PlusEq: { + s.next(); + const expr = parseExpr(s, false); + return NODE('addAssign', { dest, expr }, loc); + } + case TokenKind.MinusEq: { + s.next(); + const expr = parseExpr(s, false); + return NODE('subAssign', { dest, expr }, loc); + } + default: { + return; + } + } +} diff --git a/src/parser/syntaxes/toplevel.ts b/src/parser/syntaxes/toplevel.ts new file mode 100644 index 00000000..cd6f8ba2 --- /dev/null +++ b/src/parser/syntaxes/toplevel.ts @@ -0,0 +1,115 @@ +import { NODE } from '../utils.js'; +import { TokenKind } from '../token.js'; +import { AiScriptSyntaxError } from '../../error.js'; +import { parseDefStatement, parseStatement } from './statements.js'; +import { parseExpr } from './expressions.js'; + +import type * as Ast from '../../node.js'; +import type { ITokenStream } from '../streams/token-stream.js'; + +/** + * ```abnf + * TopLevel = *(Namespace / Meta / Statement) + * ``` +*/ +export function parseTopLevel(s: ITokenStream): Ast.Node[] { + const nodes: Ast.Node[] = []; + + while (s.kind === TokenKind.NewLine) { + s.next(); + } + + while (s.kind !== TokenKind.EOF) { + switch (s.kind) { + case TokenKind.Colon2: { + nodes.push(parseNamespace(s)); + break; + } + case TokenKind.Sharp3: { + nodes.push(parseMeta(s)); + break; + } + default: { + nodes.push(parseStatement(s)); + break; + } + } + + if ((s.kind as TokenKind) !== TokenKind.NewLine && (s.kind as TokenKind) !== TokenKind.EOF) { + throw new AiScriptSyntaxError('Multiple statements cannot be placed on a single line.'); + } + while ((s.kind as TokenKind) === TokenKind.NewLine) { + s.next(); + } + } + + return nodes; +} + +/** + * ```abnf + * Namespace = "::" IDENT "{" *(VarDef / FnDef / Namespace) "}" + * ``` +*/ +export function parseNamespace(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + s.nextWith(TokenKind.Colon2); + + s.expect(TokenKind.Identifier); + const name = s.token.value!; + s.next(); + + const members: Ast.Node[] = []; + s.nextWith(TokenKind.OpenBrace); + + while (s.kind === TokenKind.NewLine) { + s.next(); + } + + while (s.kind !== TokenKind.CloseBrace) { + switch (s.kind) { + case TokenKind.VarKeyword: + case TokenKind.LetKeyword: + case TokenKind.At: { + members.push(parseDefStatement(s)); + break; + } + case TokenKind.Colon2: { + members.push(parseNamespace(s)); + break; + } + } + + if ((s.kind as TokenKind) !== TokenKind.NewLine && (s.kind as TokenKind) !== TokenKind.CloseBrace) { + throw new AiScriptSyntaxError('Multiple statements cannot be placed on a single line.'); + } + while ((s.kind as TokenKind) === TokenKind.NewLine) { + s.next(); + } + } + s.nextWith(TokenKind.CloseBrace); + + return NODE('ns', { name, members }, loc); +} + +/** + * ```abnf + * Meta = "###" [IDENT] StaticExpr + * ``` +*/ +export function parseMeta(s: ITokenStream): Ast.Node { + const loc = s.token.loc; + + s.nextWith(TokenKind.Sharp3); + + let name = null; + if (s.kind === TokenKind.Identifier) { + name = s.token.value!; + s.next(); + } + + const value = parseExpr(s, true); + + return NODE('meta', { name, value }, loc); +} diff --git a/src/parser/token.ts b/src/parser/token.ts new file mode 100644 index 00000000..67aca6b6 --- /dev/null +++ b/src/parser/token.ts @@ -0,0 +1,128 @@ +export enum TokenKind { + EOF, + NewLine, + Identifier, + + // literal + NumberLiteral, + StringLiteral, + + // template string + Template, + TemplateStringElement, + TemplateExprElement, + + // keyword + NullKeyword, + TrueKeyword, + FalseKeyword, + EachKeyword, + ForKeyword, + LoopKeyword, + BreakKeyword, + ContinueKeyword, + MatchKeyword, + CaseKeyword, + DefaultKeyword, + IfKeyword, + ElifKeyword, + ElseKeyword, + ReturnKeyword, + EvalKeyword, + VarKeyword, + LetKeyword, + ExistsKeyword, + + /** "!" */ + Not, + /** "!=" */ + NotEq, + /** "#[" */ + OpenSharpBracket, + /** "###" */ + Sharp3, + /** "%" */ + Percent, + /** "&&" */ + And2, + /** "(" */ + OpenParen, + /** ")" */ + CloseParen, + /** "*" */ + Asterisk, + /** "+" */ + Plus, + /** "+=" */ + PlusEq, + /** "," */ + Comma, + /** "-" */ + Minus, + /** "-=" */ + MinusEq, + /** "." */ + Dot, + /** "/" */ + Slash, + /** ":" */ + Colon, + /** "::" */ + Colon2, + /** ";" */ + SemiColon, + /** "<" */ + Lt, + /** "<=" */ + LtEq, + /** "<:" */ + Out, + /** "=" */ + Eq, + /** "==" */ + Eq2, + /** "=>" */ + Arrow, + /** ">" */ + Gt, + /** ">=" */ + GtEq, + /** "@" */ + At, + /** "[" */ + OpenBracket, + /** "\\" */ + BackSlash, + /** "]" */ + CloseBracket, + /** "^" */ + Hat, + /** "{" */ + OpenBrace, + /** "||" */ + Or2, + /** "}" */ + CloseBrace, +} + +export type TokenLocation = { column: number, line: number }; + +export class Token { + constructor( + public kind: TokenKind, + public loc: { column: number, line: number }, + public hasLeftSpacing = false, + /** for number literal, string literal */ + public value?: string, + /** for template syntax */ + public children?: Token[], + ) { } +} + +/** + * - opts.value: for number literal, string literal + * - opts.children: for template syntax +*/ +export function TOKEN(kind: TokenKind, loc: TokenLocation, opts?: { hasLeftSpacing?: boolean, value?: Token['value'], children?: Token['children'] }): Token { + return new Token(kind, loc, opts?.hasLeftSpacing, opts?.value, opts?.children); +} diff --git a/src/parser/utils.ts b/src/parser/utils.ts new file mode 100644 index 00000000..456764e5 --- /dev/null +++ b/src/parser/utils.ts @@ -0,0 +1,19 @@ +import type * as Ast from '../node.js'; + +export function NODE(type: string, params: Record, loc: { column: number, line: number }): Ast.Node { + const node: Record = { type }; + for (const key of Object.keys(params)) { + if (params[key] !== undefined) { + node[key] = params[key]; + } + } + node.loc = loc; + return node as Ast.Node; +} + +export function CALL_NODE(name: string, args: Ast.Node[], loc: { column: number, line: number }): Ast.Node { + return NODE('call', { + target: NODE('identifier', { name }, loc), + args, + }, loc); +} diff --git a/src/parser/visit.ts b/src/parser/visit.ts index db617de2..29b5cbdf 100644 --- a/src/parser/visit.ts +++ b/src/parser/visit.ts @@ -1,143 +1,127 @@ -import * as Cst from './node.js'; +import type * as Ast from '../node.js'; -export function visitNode(node: Cst.Node, fn: (node: Cst.Node) => Cst.Node): Cst.Node { +export function visitNode(node: Ast.Node, fn: (node: Ast.Node) => Ast.Node): Ast.Node { const result = fn(node); // nested nodes switch (result.type) { case 'def': { - result.expr = visitNode(result.expr, fn) as Cst.Definition['expr']; + result.expr = visitNode(result.expr, fn) as Ast.Definition['expr']; break; } case 'return': { - result.expr = visitNode(result.expr, fn) as Cst.Return['expr']; + result.expr = visitNode(result.expr, fn) as Ast.Return['expr']; break; } case 'each': { - result.items = visitNode(result.items, fn) as Cst.Each['items']; - result.for = visitNode(result.for, fn) as Cst.Each['for']; + result.items = visitNode(result.items, fn) as Ast.Each['items']; + result.for = visitNode(result.for, fn) as Ast.Each['for']; break; } case 'for': { if (result.from != null) { - result.from = visitNode(result.from, fn) as Cst.For['from']; + result.from = visitNode(result.from, fn) as Ast.For['from']; } if (result.to != null) { - result.to = visitNode(result.to, fn) as Cst.For['to']; + result.to = visitNode(result.to, fn) as Ast.For['to']; } if (result.times != null) { - result.times = visitNode(result.times, fn) as Cst.For['times']; + result.times = visitNode(result.times, fn) as Ast.For['times']; } - result.for = visitNode(result.for, fn) as Cst.For['for']; + result.for = visitNode(result.for, fn) as Ast.For['for']; break; } case 'loop': { for (let i = 0; i < result.statements.length; i++) { - result.statements[i] = visitNode(result.statements[i]!, fn) as Cst.Loop['statements'][number]; + result.statements[i] = visitNode(result.statements[i]!, fn) as Ast.Loop['statements'][number]; } break; } case 'addAssign': case 'subAssign': case 'assign': { - result.expr = visitNode(result.expr, fn) as Cst.Assign['expr']; - result.dest = visitNode(result.dest, fn) as Cst.Assign['dest']; - break; - } - case 'infix': { - for (let i = 0; i < result.operands.length; i++) { - result.operands[i] = visitNode(result.operands[i]!, fn) as Cst.Infix['operands'][number]; - } + result.expr = visitNode(result.expr, fn) as Ast.Assign['expr']; + result.dest = visitNode(result.dest, fn) as Ast.Assign['dest']; break; } case 'not': { - result.expr = visitNode(result.expr, fn) as Cst.Return['expr']; + result.expr = visitNode(result.expr, fn) as Ast.Return['expr']; break; } case 'if': { - result.cond = visitNode(result.cond, fn) as Cst.If['cond']; - result.then = visitNode(result.then, fn) as Cst.If['then']; + result.cond = visitNode(result.cond, fn) as Ast.If['cond']; + result.then = visitNode(result.then, fn) as Ast.If['then']; for (const prop of result.elseif) { - prop.cond = visitNode(prop.cond, fn) as Cst.If['elseif'][number]['cond']; - prop.then = visitNode(prop.then, fn) as Cst.If['elseif'][number]['then']; + prop.cond = visitNode(prop.cond, fn) as Ast.If['elseif'][number]['cond']; + prop.then = visitNode(prop.then, fn) as Ast.If['elseif'][number]['then']; } if (result.else != null) { - result.else = visitNode(result.else, fn) as Cst.If['else']; + result.else = visitNode(result.else, fn) as Ast.If['else']; } break; } case 'fn': { for (let i = 0; i < result.children.length; i++) { - result.children[i] = visitNode(result.children[i]!, fn) as Cst.Fn['children'][number]; + result.children[i] = visitNode(result.children[i]!, fn) as Ast.Fn['children'][number]; } break; } case 'match': { - result.about = visitNode(result.about, fn) as Cst.Match['about']; + result.about = visitNode(result.about, fn) as Ast.Match['about']; for (const prop of result.qs) { - prop.q = visitNode(prop.q, fn) as Cst.Match['qs'][number]['q']; - prop.a = visitNode(prop.a, fn) as Cst.Match['qs'][number]['a']; + prop.q = visitNode(prop.q, fn) as Ast.Match['qs'][number]['q']; + prop.a = visitNode(prop.a, fn) as Ast.Match['qs'][number]['a']; } if (result.default != null) { - result.default = visitNode(result.default, fn) as Cst.Match['default']; + result.default = visitNode(result.default, fn) as Ast.Match['default']; } break; } case 'block': { for (let i = 0; i < result.statements.length; i++) { - result.statements[i] = visitNode(result.statements[i]!, fn) as Cst.Block['statements'][number]; + result.statements[i] = visitNode(result.statements[i]!, fn) as Ast.Block['statements'][number]; } break; } case 'exists': { - result.identifier = visitNode(result.identifier,fn) as Cst.Exists['identifier']; + result.identifier = visitNode(result.identifier,fn) as Ast.Exists['identifier']; break; } case 'tmpl': { for (let i = 0; i < result.tmpl.length; i++) { const item = result.tmpl[i]!; if (typeof item !== 'string') { - result.tmpl[i] = visitNode(item, fn) as Cst.Tmpl['tmpl'][number]; + result.tmpl[i] = visitNode(item, fn) as Ast.Tmpl['tmpl'][number]; } } break; } case 'obj': { for (const item of result.value) { - result.value.set(item[0], visitNode(item[1], fn) as Cst.Expression); + result.value.set(item[0], visitNode(item[1], fn) as Ast.Expression); } break; } case 'arr': { for (let i = 0; i < result.value.length; i++) { - result.value[i] = visitNode(result.value[i]!, fn) as Cst.Arr['value'][number]; - } - break; - } - case 'callChain': { - for (let i = 0; i < result.args.length; i++) { - result.args[i] = visitNode(result.args[i]!, fn) as Cst.Call['args'][number]; + result.value[i] = visitNode(result.value[i]!, fn) as Ast.Arr['value'][number]; } break; } - case 'indexChain': { - result.index = visitNode(result.index, fn) as Cst.Index['index']; - break; - } case 'call': { - result.target = visitNode(result.target, fn) as Cst.Call['target']; + result.target = visitNode(result.target, fn) as Ast.Call['target']; for (let i = 0; i < result.args.length; i++) { - result.args[i] = visitNode(result.args[i]!, fn) as Cst.Call['args'][number]; + result.args[i] = visitNode(result.args[i]!, fn) as Ast.Call['args'][number]; } break; } case 'index': { - result.target = visitNode(result.target, fn) as Cst.Index['target']; - result.index = visitNode(result.index, fn) as Cst.Index['index']; + result.target = visitNode(result.target, fn) as Ast.Index['target']; + result.index = visitNode(result.index, fn) as Ast.Index['index']; break; } case 'prop': { - result.target = visitNode(result.target, fn) as Cst.Prop['target']; + result.target = visitNode(result.target, fn) as Ast.Prop['target']; break; } case 'ns': { @@ -149,19 +133,11 @@ export function visitNode(node: Cst.Node, fn: (node: Cst.Node) => Cst.Node): Cst case 'or': case 'and': { - result.left = visitNode(result.left, fn) as (Cst.And | Cst.Or)['left']; - result.right = visitNode(result.right, fn) as (Cst.And | Cst.Or)['right']; + result.left = visitNode(result.left, fn) as (Ast.And | Ast.Or)['left']; + result.right = visitNode(result.right, fn) as (Ast.And | Ast.Or)['right']; break; } } - if (Cst.hasChainProp(result)) { - if (result.chain != null) { - for (let i = 0; i < result.chain.length; i++) { - result.chain[i] = visitNode(result.chain[i]!, fn) as Cst.ChainMember; - } - } - } - return result; } diff --git a/test/index.ts b/test/index.ts index b1b9aac9..48cb8a6b 100644 --- a/test/index.ts +++ b/test/index.ts @@ -16,9 +16,13 @@ const exe = (program: string): Promise => new Promise((ok, err) => { maxStep: 9999, }); - const parser = new Parser(); - const ast = parser.parse(program); - aiscript.exec(ast).catch(err); + try { + const parser = new Parser(); + const ast = parser.parse(program); + aiscript.exec(ast).catch(err); + } catch (e) { + err(e); + } }); const getMeta = (program: string) => { @@ -299,8 +303,8 @@ describe('Infix expression', () => { test.concurrent('syntax symbols vs infix operators', async () => { const res = await exe(` <: match true { - 1 == 1 => "true" - 1 < 1 => "false" + case 1 == 1 => "true" + case 1 < 1 => "false" } `); eq(res, STR('true')); @@ -313,8 +317,8 @@ describe('Infix expression', () => { test.concurrent('number + match expression', async () => { const res = await exe(` <: 1 + match 2 == 2 { - true => 3 - false => 4 + case true => 3 + case false => 4 } `); eq(res, NUM(4)); @@ -474,6 +478,20 @@ describe('Cannot put multiple statements in a line', () => { } assert.fail(); }); + + test.concurrent('var def in block', async () => { + try { + await exe(` + eval { + let a = 42 let b = 11 + } + `); + } catch (e) { + assert.ok(true); + return; + } + assert.fail(); + }); }); test.concurrent('empty function', async () => { @@ -1494,9 +1512,9 @@ describe('match', () => { test.concurrent('Basic', async () => { const res = await exe(` <: match 2 { - 1 => "a" - 2 => "b" - 3 => "c" + case 1 => "a" + case 2 => "b" + case 3 => "c" } `); eq(res, STR('b')); @@ -1505,9 +1523,9 @@ describe('match', () => { test.concurrent('When default not provided, returns null', async () => { const res = await exe(` <: match 42 { - 1 => "a" - 2 => "b" - 3 => "c" + case 1 => "a" + case 2 => "b" + case 3 => "c" } `); eq(res, NULL); @@ -1516,10 +1534,10 @@ describe('match', () => { test.concurrent('With default', async () => { const res = await exe(` <: match 42 { - 1 => "a" - 2 => "b" - 3 => "c" - * => "d" + case 1 => "a" + case 2 => "b" + case 3 => "c" + default => "d" } `); eq(res, STR('d')); @@ -1528,13 +1546,13 @@ describe('match', () => { test.concurrent('With block', async () => { const res = await exe(` <: match 2 { - 1 => 1 - 2 => { + case 1 => 1 + case 2 => { let a = 1 let b = 2 (a + b) } - 3 => 3 + case 3 => 3 } `); eq(res, NUM(3)); @@ -1544,7 +1562,7 @@ describe('match', () => { const res = await exe(` @f(x) { match x { - 1 => { + case 1 => { return "ai" } } @@ -2266,12 +2284,12 @@ describe('Location', () => { let node: Ast.Node; const parser = new Parser(); const nodes = parser.parse(` - @f(a) { a } + @f(a) { a } `); assert.equal(nodes.length, 1); node = nodes[0]; if (!node.loc) assert.fail(); - assert.deepEqual(node.loc, { start: 3, end: 13 }); + assert.deepEqual(node.loc, { line: 2, column: 4 }); }); }); diff --git a/test/parser.ts b/test/parser.ts new file mode 100644 index 00000000..893fff91 --- /dev/null +++ b/test/parser.ts @@ -0,0 +1,146 @@ +import * as assert from 'assert'; +import { Scanner } from '../src/parser/scanner'; +import { TOKEN, TokenKind, TokenLocation } from '../src/parser/token'; +import { CharStream } from '../src/parser/streams/char-stream'; + +describe('CharStream', () => { + test.concurrent('char', async () => { + const source = 'abc'; + const stream = new CharStream(source); + assert.strictEqual('a', stream.char); + }); + + test.concurrent('next', async () => { + const source = 'abc'; + const stream = new CharStream(source); + stream.next(); + assert.strictEqual('b', stream.char); + }); + + describe('prev', () => { + test.concurrent('move', async () => { + const source = 'abc'; + const stream = new CharStream(source); + stream.next(); + assert.strictEqual('b', stream.char); + stream.prev(); + assert.strictEqual('a', stream.char); + }); + + test.concurrent('境界外には移動しない', async () => { + const source = 'abc'; + const stream = new CharStream(source); + stream.prev(); + assert.strictEqual('a', stream.char); + }); + }); + + test.concurrent('eof', async () => { + const source = 'abc'; + const stream = new CharStream(source); + assert.strictEqual(false, stream.eof); + stream.next(); + assert.strictEqual(false, stream.eof); + stream.next(); + assert.strictEqual(false, stream.eof); + stream.next(); + assert.strictEqual(true, stream.eof); + }); + + test.concurrent('EOFでcharを参照するとエラー', async () => { + const source = ''; + const stream = new CharStream(source); + assert.strictEqual(true, stream.eof); + try { + stream.char; + } catch (e) { + return; + } + assert.fail(); + }); + + test.concurrent('CRは読み飛ばされる', async () => { + const source = 'a\r\nb'; + const stream = new CharStream(source); + assert.strictEqual('a', stream.char); + stream.next(); + assert.strictEqual('\n', stream.char); + stream.next(); + assert.strictEqual('b', stream.char); + stream.next(); + assert.strictEqual(true, stream.eof); + }); +}); + +describe('Scanner', () => { + function init(source: string) { + const stream = new Scanner(source); + return stream; + } + function next(stream: Scanner, kind: TokenKind, loc: TokenLocation, opts: { hasLeftSpacing?: boolean, value?: string }) { + assert.deepStrictEqual(stream.token, TOKEN(kind, loc, opts)); + stream.next(); + } + + test.concurrent('eof', async () => { + const source = ''; + const stream = init(source); + next(stream, TokenKind.EOF, { line: 1, column: 1 }, { }); + next(stream, TokenKind.EOF, { line: 1, column: 1 }, { }); + }); + test.concurrent('keyword', async () => { + const source = 'if'; + const stream = init(source); + next(stream, TokenKind.IfKeyword, { line: 1, column: 1 }, { }); + next(stream, TokenKind.EOF, { line: 1, column: 3 }, { }); + }); + test.concurrent('identifier', async () => { + const source = 'xyz'; + const stream = init(source); + next(stream, TokenKind.Identifier, { line: 1, column: 1 }, { value: 'xyz' }); + next(stream, TokenKind.EOF, { line: 1, column: 4 }, { }); + }); + test.concurrent('invalid token', async () => { + const source = '$'; + try { + const stream = new Scanner(source); + } catch (e) { + return; + } + assert.fail(); + }); + test.concurrent('words', async () => { + const source = 'abc xyz'; + const stream = init(source); + next(stream, TokenKind.Identifier, { line: 1, column: 1 }, { value: 'abc' }); + next(stream, TokenKind.Identifier, { line: 1, column: 5 }, { hasLeftSpacing: true, value: 'xyz' }); + next(stream, TokenKind.EOF, { line: 1, column: 8 }, { }); + }); + test.concurrent('stream', async () => { + const source = '@abc() { }'; + const stream = init(source); + next(stream, TokenKind.At, { line: 1, column: 1 }, { }); + next(stream, TokenKind.Identifier, { line: 1, column: 2 }, { value: 'abc' }); + next(stream, TokenKind.OpenParen, { line: 1, column: 5 }, { }); + next(stream, TokenKind.CloseParen, { line: 1, column: 6 }, { }); + next(stream, TokenKind.OpenBrace, { line: 1, column: 8 }, { hasLeftSpacing: true }); + next(stream, TokenKind.CloseBrace, { line: 1, column: 10 }, { hasLeftSpacing: true }); + next(stream, TokenKind.EOF, { line: 1, column: 11 }, { }); + }); + test.concurrent('multi-lines', async () => { + const source = 'aaa\nbbb'; + const stream = init(source); + next(stream, TokenKind.Identifier, { line: 1, column: 1 }, { value: 'aaa' }); + next(stream, TokenKind.NewLine, { line: 1, column: 4 }, { }); + next(stream, TokenKind.Identifier, { line: 2, column: 1 }, { value: 'bbb' }); + next(stream, TokenKind.EOF, { line: 2, column: 4 }, { }); + }); + test.concurrent('lookahead', async () => { + const source = '@abc() { }'; + const stream = init(source); + assert.deepStrictEqual(stream.lookahead(1), TOKEN(TokenKind.Identifier, { line: 1, column: 2 }, { value: 'abc' })); + next(stream, TokenKind.At, { line: 1, column: 1 }, { }); + next(stream, TokenKind.Identifier, { line: 1, column: 2 }, { value: 'abc' }); + next(stream, TokenKind.OpenParen, { line: 1, column: 5 }, { }); + }); +});