From 4734249824c197b9ce0911a80ab1815b48cbcc3a Mon Sep 17 00:00:00 2001
From: takejohn <takejohn@takejohn.jp>
Date: Thu, 11 Sep 2025 15:58:37 +0900
Subject: [PATCH 1/8] =?UTF-8?q?CharStream=E3=81=A7=E3=82=B5=E3=83=AD?=
 =?UTF-8?q?=E3=82=B2=E3=83=BC=E3=83=88=E3=83=9A=E3=82=A2=E3=82=921?=
 =?UTF-8?q?=E6=96=87=E5=AD=97=E3=81=A8=E3=81=97=E3=81=A6=E6=89=B1=E3=81=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/parser/streams/char-stream.ts | 68 ++++++++++++++++++------
 src/utils/characters.ts           | 88 +++++++++++++++++++++++++++++++
 test/parser.ts                    | 51 ++++++++++++++++++
 3 files changed, 192 insertions(+), 15 deletions(-)
 create mode 100644 src/utils/characters.ts

diff --git a/src/parser/streams/char-stream.ts b/src/parser/streams/char-stream.ts
index 58b36793c..a79d2594f 100644
--- a/src/parser/streams/char-stream.ts
+++ b/src/parser/streams/char-stream.ts
@@ -1,3 +1,5 @@
+import { isSurrogatePair } from '../../utils/characters.js';
+
 /**
  * 入力文字列から文字を読み取るクラス
 */
@@ -6,11 +8,13 @@ export class CharStream {
 	private firstPageIndex: number;
 	private lastPageIndex: number;
 	private pageIndex: number;
+	/** based on UTF-16 code unit */
 	private address: number;
+	/** Unicode character */
 	private _char?: string;
-	/** zero-based number */
+	/** zero-based number, based on Unicode code points */
 	private line: number;
-	/** zero-based number */
+	/** zero-based number, based on Unicode code points */
 	private column: number;
 
 	constructor(source: string, opts?: { line?: number, column?: number }) {
@@ -70,8 +74,18 @@ export class CharStream {
 	 * カーソル位置を前の文字へ戻します。
 	*/
 	public prev(): void {
-		this.decAddr();
 		this.movePrev();
+		this.decAddr();
+		if (!this.startOfFile && this._char === '\n') {
+			this.line--;
+			const page = this.pages.get(this.pageIndex)!;
+			const lastLineBreak = page.lastIndexOf('\n', this.address - 1);
+			const lineStart = lastLineBreak >= 0 ? lastLineBreak + 1 : 0;
+			const line = page.slice(lineStart, this.address);
+			this.column = [...line].length - 1;
+		} else {
+			this.column--;
+		}
 	}
 
 	private get isFirstPage(): boolean {
@@ -87,6 +101,10 @@ export class CharStream {
 		return (this.address >= page.length);
 	}
 
+	private get startOfFile(): boolean {
+		return this.isFirstPage && this.address === 0;
+	}
+
 	private moveNext(): void {
 		this.loadChar();
 		while (true) {
@@ -101,7 +119,7 @@ export class CharStream {
 
 	private incAddr(): void {
 		if (!this.endOfPage) {
-			this.address++;
+			this.address += this._char!.length;
 		} else if (!this.isLastPage) {
 			this.pageIndex++;
 			this.address = 0;
@@ -109,23 +127,20 @@ export class CharStream {
 	}
 
 	private movePrev(): void {
-		this.loadChar();
-		while (true) {
-			if (!this.eof && this._char === '\r') {
-				this.decAddr();
-				this.loadChar();
-				continue;
-			}
-			break;
+		this.loadPrevChar();
+		while (!this.startOfFile && this._char === '\r') {
+			this.decAddr();
+			this.loadPrevChar();
 		}
 	}
 
 	private decAddr(): void {
 		if (this.address > 0) {
-			this.address--;
+			this.address -= getLastUnicodeChar(this.pages.get(this.pageIndex)!, this.address)!.length;
 		} else if (!this.isFirstPage) {
 			this.pageIndex--;
-			this.address = this.pages.get(this.pageIndex)!.length - 1;
+			const page = this.pages.get(this.pageIndex)!;
+			this.address = page.length - getLastUnicodeChar(page)!.length;
 		}
 	}
 
@@ -133,7 +148,30 @@ export class CharStream {
 		if (this.eof) {
 			this._char = undefined;
 		} else {
-			this._char = this.pages.get(this.pageIndex)![this.address]!;
+			this._char = getUnicodeChar(this.pages.get(this.pageIndex)!, this.address);
+		}
+	}
+
+	private loadPrevChar(): void {
+		if (this.address > 0) {
+			this._char = getLastUnicodeChar(this.pages.get(this.pageIndex)!, this.address)!;
+		} else if (!this.isFirstPage) {
+			const page = this.pages.get(this.pageIndex - 1)!;
+			this._char = getLastUnicodeChar(page)!;
 		}
 	}
 }
+
+function getUnicodeChar(string: string, position = 0): string | undefined {
+	if (isSurrogatePair(string, position)) {
+		return string.slice(position, position + 2);
+	}
+	return string[position];
+}
+
+function getLastUnicodeChar(string: string, position = string.length): string | undefined {
+	if (isSurrogatePair(string, position - 2)) {
+		return string.slice(position - 2, position);
+	}
+	return string[position - 1];
+}
diff --git a/src/utils/characters.ts b/src/utils/characters.ts
new file mode 100644
index 000000000..112faa81e
--- /dev/null
+++ b/src/utils/characters.ts
@@ -0,0 +1,88 @@
+const MIN_HIGH_SURROGATE = 0xD800;
+const MAX_HIGH_SURROGATE = 0xDBFF;
+const MIN_LOW_SURROGATE = 0xDC00;
+const MAX_LOW_SURROGATE = 0xDFFF;
+const UNICODE_LETTER = /^[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}$_]$/u;
+const UNICODE_COMBINING_MARK = /^[\p{Mn}\p{Mc}]$/u;
+const UNICODE_DIGIT = /^\p{Nd}$/u;
+const UNICODE_CONNECTOR_PUNCTUATION = /^\p{Pc}$/u;
+const ZERO_WIDTH_NON_JOINER = String.fromCodePoint(0x200C);
+const ZERO_WIDTH_JOINER = String.fromCharCode(0x200D);
+
+export function isHighSurrogate(string: string, index = 0): boolean {
+	if (index < 0 || index >= string.length) {
+		return false;
+	}
+	const charCode = string.charCodeAt(index);
+	return charCode >= MIN_HIGH_SURROGATE && charCode <= MAX_HIGH_SURROGATE;
+}
+
+export function isLowSurrogate(string: string, index = 0): boolean {
+	if (index < 0 || index >= string.length) {
+		return false;
+	}
+	const charCode = string.charCodeAt(index);
+	return charCode >= MIN_LOW_SURROGATE && charCode <= MAX_LOW_SURROGATE;
+}
+
+export function isSurrogatePair(string: string, start = 0): boolean {
+	return isHighSurrogate(string, start) && isLowSurrogate(string, start + 1);
+}
+
+export function isIdentifierStart(char: string): boolean {
+	return UNICODE_LETTER.test(char) || char === '$' || char === '_';
+}
+
+export function isIdentifierPart(char: string): boolean {
+	return UNICODE_LETTER.test(char)
+		|| UNICODE_COMBINING_MARK.test(char)
+		|| UNICODE_DIGIT.test(char)
+		|| UNICODE_CONNECTOR_PUNCTUATION.test(char)
+		|| char === ZERO_WIDTH_NON_JOINER
+		|| char === ZERO_WIDTH_JOINER;
+}
+
+export function decodeUnicodeEscapeSequence(string: string): string {
+	let result = '';
+	let state: 'string' | 'escape' | `digit` = 'string';
+	let digits = '';
+
+	for (let i = 0; i < string.length; i++) {
+		const char = string[i]!;
+
+		switch (state) {
+			case 'string': {
+				if (char === '\\') {
+					state = 'escape';
+				} else {
+					result += char;
+				}
+				break;
+			}
+
+			case 'escape': {
+				if (char !== 'u') {
+					throw new SyntaxError('invalid escape sequence');
+				}
+				state = 'digit';
+				break;
+			}
+
+			case 'digit': {
+				if ((char >= '0' && char <= '9') || (char >= 'a' && char <= 'f') || (char >= 'A' && char <= 'F')) {
+					digits += char;
+				} else {
+					throw new SyntaxError('invalid escape sequence');
+				}
+				if (digits.length === 4) {
+					result += String.fromCharCode(Number.parseInt(digits, 16));
+					state = 'string';
+					digits = '';
+				}
+				break;
+			}
+		}
+	}
+
+	return result;
+}
diff --git a/test/parser.ts b/test/parser.ts
index c2588b01f..47be63002 100644
--- a/test/parser.ts
+++ b/test/parser.ts
@@ -34,6 +34,49 @@ describe('CharStream', () => {
 			stream.prev();
 			assert.strictEqual('a', stream.char);
 		});
+
+		test.concurrent('line break', async () => {
+			const source = 'a\nb';
+			const stream = new CharStream(source);
+			assert.strictEqual('a', stream.char);
+			stream.next();
+			assert.strictEqual('\n', stream.char);
+			stream.next();
+			assert.strictEqual('b', stream.char);
+			stream.prev();
+			assert.strictEqual('\n', stream.char);
+			assert.deepStrictEqual(stream.getPos(), { line: 1, column: 1 });
+		});
+
+		test.concurrent('line breaks', async () => {
+			const source = '\n\nc';
+			const stream = new CharStream(source);
+			stream.next();
+			stream.next();
+			assert.strictEqual('c', stream.char);
+			stream.prev();
+			assert.strictEqual('\n', stream.char);
+			assert.deepStrictEqual(stream.getPos(), { line: 2, column: 0 });
+		});
+
+		test.concurrent('CRは読み飛ばされる', async () => {
+			const source = 'a\r\nb';
+			const stream = new CharStream(source);
+			stream.next();
+			assert.strictEqual('\n', stream.char);
+			stream.prev();
+			assert.strictEqual('a', stream.char);
+		});
+
+		test.concurrent('surrogate pair', async () => {
+			const source = '\ud83e\udd2f';
+			const stream = new CharStream(source);
+			assert.strictEqual('\ud83e\udd2f', stream.char);
+			stream.next();
+			assert.strictEqual(true, stream.eof);
+			stream.prev();
+			assert.strictEqual('\ud83e\udd2f', stream.char);
+		});
 	});
 
 	test.concurrent('eof', async () => {
@@ -71,6 +114,14 @@ describe('CharStream', () => {
 		stream.next();
 		assert.strictEqual(true, stream.eof);
 	});
+
+	test.concurrent('surrogate pair', async () => {
+		const source = '\ud83e\udd2f';
+		const stream = new CharStream(source);
+		assert.strictEqual('\ud83e\udd2f', stream.char);
+		stream.next();
+		assert.strictEqual(true, stream.eof);
+	});
 });
 
 describe('Scanner', () => {

From d89995ba9adb1483eb71b1cf1c9dccd3d9738cac Mon Sep 17 00:00:00 2001
From: takejohn <takejohn@takejohn.jp>
Date: Fri, 12 Sep 2025 15:48:43 +0900
Subject: [PATCH 2/8] =?UTF-8?q?JSON5=E3=81=AE=E8=AD=98=E5=88=A5=E5=AD=90?=
 =?UTF-8?q?=E3=82=92=E4=BD=BF=E3=81=88=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/parser/plugins/validate-keyword.ts |  94 +++++---
 src/parser/scanner.ts                  |  92 +++++++-
 src/utils/characters.ts                |  26 +-
 test/characters.ts                     | 187 +++++++++++++++
 test/identifiers.ts                    | 315 +++++++++++++++++++++++++
 test/keywords.ts                       | 167 -------------
 test/parser.ts                         |   2 +-
 unreleased/json5-identifiers.md        |  22 ++
 8 files changed, 684 insertions(+), 221 deletions(-)
 create mode 100644 test/characters.ts
 create mode 100644 test/identifiers.ts
 delete mode 100644 test/keywords.ts
 create mode 100644 unreleased/json5-identifiers.md

diff --git a/src/parser/plugins/validate-keyword.ts b/src/parser/plugins/validate-keyword.ts
index 0cbff01da..9446ed53b 100644
--- a/src/parser/plugins/validate-keyword.ts
+++ b/src/parser/plugins/validate-keyword.ts
@@ -3,7 +3,7 @@ import { visitNode } from '../visit.js';
 import type * as Ast from '../../node.js';
 
 // 予約語となっている識別子があるかを確認する。
-// - キーワードは字句解析の段階でそれぞれのKeywordトークンとなるため除外
+// - キーワードは字句解析の段階でそれぞれのKeywordトークンとなるが、エスケープシーケンスを含む場合はIdentifierトークンとなるので検証を行う。
 // - 文脈キーワードは識別子に利用できるため除外
 
 const reservedWord = [
@@ -52,25 +52,63 @@ const reservedWord = [
 	'new',
 ];
 
-function throwReservedWordError(name: string, loc: Ast.Loc): void {
-	throw new AiScriptSyntaxError(`Reserved word "${name}" cannot be used as variable name.`, loc.start);
+const keywords = [
+	'null',
+	'true',
+	'false',
+	'each',
+	'for',
+	'loop',
+	'do',
+	'while',
+	'break',
+	'continue',
+	'match',
+	'case',
+	'default',
+	'if',
+	'elif',
+	'else',
+	'return',
+	'eval',
+	'var',
+	'let',
+	'exists',
+];
+
+function validateName(name: string, pos: Ast.Pos): void {
+	if (reservedWord.includes(name)) {
+		throw new AiScriptSyntaxError(`Reserved word "${name}" cannot be used as identifier.`, pos);
+	}
+	if (keywords.includes(name)) {
+		throw new AiScriptSyntaxError(`Keyword "${name}" cannot be used as identifier.`, pos);
+	}
+}
+
+function validateTypeName(name: string, pos: Ast.Pos): void {
+	if (name === 'null') {
+		return;
+	}
+	validateName(name, pos);
+}
+
+function throwReservedWordError(name: string, pos: Ast.Pos): void {
+	throw new AiScriptSyntaxError(`Reserved word "${name}" cannot be used as variable name.`, pos);
 }
 
 function validateDest(node: Ast.Node): Ast.Node {
 	return visitNode(node, node => {
 		switch (node.type) {
 			case 'null': {
-				throwReservedWordError(node.type, node.loc);
+				throwReservedWordError(node.type, node.loc.start);
 				break;
 			}
 			case 'bool': {
-				throwReservedWordError(`${node.value}`, node.loc);
+				throwReservedWordError(`${node.value}`, node.loc.start);
 				break;
 			}
 			case 'identifier': {
-				if (reservedWord.includes(node.name)) {
-					throwReservedWordError(node.name, node.loc);
-				}
+				validateName(node.name, node.loc.start);
 				break;
 			}
 		}
@@ -81,9 +119,7 @@ function validateDest(node: Ast.Node): Ast.Node {
 
 function validateTypeParams(node: Ast.Fn | Ast.FnTypeSource): void {
 	for (const typeParam of node.typeParams) {
-		if (reservedWord.includes(typeParam.name)) {
-			throwReservedWordError(typeParam.name, node.loc);
-		}
+		validateTypeName(typeParam.name, node.loc.start);
 	}
 }
 
@@ -97,48 +133,46 @@ function validateNode(node: Ast.Node): Ast.Node {
 		case 'attr':
 		case 'identifier':
 		case 'prop': {
-			if (reservedWord.includes(node.name)) {
-				throwReservedWordError(node.name, node.loc);
-			}
+			validateName(node.name, node.loc.start);
 			break;
 		}
 		case 'meta': {
-			if (node.name != null && reservedWord.includes(node.name)) {
-				throwReservedWordError(node.name, node.loc);
+			if (node.name != null) {
+				validateName(node.name, node.loc.start);
 			}
 			break;
 		}
 		case 'each': {
-			if (node.label != null && reservedWord.includes(node.label)) {
-				throwReservedWordError(node.label, node.loc);
+			if (node.label != null) {
+				validateName(node.label, node.loc.start);
 			}
 			validateDest(node.var);
 			break;
 		}
 		case 'for': {
-			if (node.label != null && reservedWord.includes(node.label)) {
-				throwReservedWordError(node.label, node.loc);
+			if (node.label != null) {
+				validateName(node.label, node.loc.start);
 			}
-			if (node.var != null && reservedWord.includes(node.var)) {
-				throwReservedWordError(node.var, node.loc);
+			if (node.var != null) {
+				validateName(node.var, node.loc.start);
 			}
 			break;
 		}
 		case 'loop': {
-			if (node.label != null && reservedWord.includes(node.label)) {
-				throwReservedWordError(node.label, node.loc);
+			if (node.label != null) {
+				validateName(node.label, node.loc.start);
 			}
 			break;
 		}
 		case 'break': {
-			if (node.label != null && reservedWord.includes(node.label)) {
-				throwReservedWordError(node.label, node.loc);
+			if (node.label != null) {
+				validateName(node.label, node.loc.start);
 			}
 			break;
 		}
 		case 'continue': {
-			if (node.label != null && reservedWord.includes(node.label)) {
-				throwReservedWordError(node.label, node.loc);
+			if (node.label != null) {
+				validateName(node.label, node.loc.start);
 			}
 			break;
 		}
@@ -150,9 +184,7 @@ function validateNode(node: Ast.Node): Ast.Node {
 			break;
 		}
 		case 'namedTypeSource': {
-			if (reservedWord.includes(node.name)) {
-				throwReservedWordError(node.name, node.loc);
-			}
+			validateTypeName(node.name, node.loc.start);
 			break;
 		}
 		case 'fnTypeSource': {
diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts
index ac55843fa..20109b35c 100644
--- a/src/parser/scanner.ts
+++ b/src/parser/scanner.ts
@@ -1,4 +1,5 @@
 import { AiScriptSyntaxError, AiScriptUnexpectedEOFError } from '../error.js';
+import { decodeUnicodeEscapeSequence, isIdentifierPart, isIdentifierStart } from '../utils/characters.js';
 import { CharStream } from './streams/char-stream.js';
 import { TOKEN, TokenKind } from './token.js';
 import { unexpectedTokenError } from './utils.js';
@@ -9,7 +10,7 @@ import type { Token, TokenPosition } from './token.js';
 const spaceChars = [' ', '\t'];
 const lineBreakChars = ['\r', '\n'];
 const digit = /^[0-9]$/;
-const wordChar = /^[A-Za-z0-9_]$/;
+const hexDigit = /^[0-9a-fA-F]$/;
 const exponentIndicatorPattern = /^[eE]$/;
 
 /**
@@ -282,6 +283,11 @@ export class Scanner implements ITokenStream {
 				}
 				case '\\': {
 					this.stream.next();
+					if (!this.stream.eof && (this.stream.char as string) === 'u') {
+						this.stream.prev();
+						const wordToken = this.tryReadWord(hasLeftSpacing);
+						if (wordToken) return wordToken;
+					}
 					return TOKEN(TokenKind.BackSlash, pos, { hasLeftSpacing });
 				}
 				case ']': {
@@ -332,17 +338,39 @@ export class Scanner implements ITokenStream {
 
 	private tryReadWord(hasLeftSpacing: boolean): Token | undefined {
 		// read a word
-		let value = '';
+		if (this.stream.eof) {
+			return;
+		}
 
 		const pos = this.stream.getPos();
 
-		while (!this.stream.eof && wordChar.test(this.stream.char)) {
-			value += this.stream.char;
-			this.stream.next();
-		}
-		if (value.length === 0) {
+		let rawValue = this.tryReadIdentifierStart();
+		if (rawValue === undefined) {
 			return;
 		}
+		while (!(this.stream.eof as boolean)) {
+			const matchedIdentifierPart = this.tryReadIdentifierPart();
+			if (matchedIdentifierPart === undefined) {
+				break;
+			}
+			rawValue += matchedIdentifierPart;
+		}
+
+		const value = decodeUnicodeEscapeSequence(rawValue);
+		const [start, ...parts] = value;
+		if (!isIdentifierStart(start!)) {
+			throw new AiScriptSyntaxError(`Invalid identifier: "${value}"`, pos);
+		}
+		for (const part of parts) {
+			if (!isIdentifierPart(part)) {
+				throw new AiScriptSyntaxError(`Invalid identifier: "${value}"`, pos);
+			}
+		}
+
+		if (value !== rawValue) {
+			return TOKEN(TokenKind.Identifier, pos, { hasLeftSpacing, value });
+		}
+
 		// check word kind
 		switch (value) {
 			case 'null': {
@@ -414,6 +442,56 @@ export class Scanner implements ITokenStream {
 		}
 	}
 
+	private tryReadIdentifierStart(): string | undefined {
+		if (this.stream.eof) {
+			return;
+		}
+		if (isIdentifierStart(this.stream.char)) {
+			const value = this.stream.char;
+			this.stream.next();
+			return value;
+		}
+		if (this.stream.char === '\\') {
+			this.stream.next();
+			return '\\' + this.readUnicodeEscapeSequence();
+		}
+		return;
+	}
+
+	private tryReadIdentifierPart(): string | undefined {
+		if (this.stream.eof) {
+			return;
+		}
+		const matchedIdentifierStart = this.tryReadIdentifierStart();
+		if (matchedIdentifierStart !== undefined) {
+			return matchedIdentifierStart;
+		}
+		if (isIdentifierPart(this.stream.char)) {
+			const value = this.stream.char;
+			this.stream.next();
+			return value;
+		}
+		return;
+	}
+
+	private readUnicodeEscapeSequence(): `u${string}` {
+		if (this.stream.eof || (this.stream.char as string) !== 'u') {
+			throw new AiScriptSyntaxError('character "u" expected', this.stream.getPos());
+		}
+		this.stream.next();
+
+		let code = '';
+		for (let i = 0; i < 4; i++) {
+			if (this.stream.eof || !hexDigit.test(this.stream.char)) {
+				throw new AiScriptSyntaxError('hexadecimal digit expected', this.stream.getPos());
+			}
+			code += this.stream.char;
+			this.stream.next();
+		}
+
+		return `u${code}`;
+	}
+
 	private tryReadDigits(hasLeftSpacing: boolean): Token | undefined {
 		let wholeNumber = '';
 		let fractional = '';
diff --git a/src/utils/characters.ts b/src/utils/characters.ts
index 112faa81e..f8a7825a7 100644
--- a/src/utils/characters.ts
+++ b/src/utils/characters.ts
@@ -2,12 +2,9 @@ const MIN_HIGH_SURROGATE = 0xD800;
 const MAX_HIGH_SURROGATE = 0xDBFF;
 const MIN_LOW_SURROGATE = 0xDC00;
 const MAX_LOW_SURROGATE = 0xDFFF;
-const UNICODE_LETTER = /^[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}$_]$/u;
-const UNICODE_COMBINING_MARK = /^[\p{Mn}\p{Mc}]$/u;
-const UNICODE_DIGIT = /^\p{Nd}$/u;
-const UNICODE_CONNECTOR_PUNCTUATION = /^\p{Pc}$/u;
-const ZERO_WIDTH_NON_JOINER = String.fromCodePoint(0x200C);
-const ZERO_WIDTH_JOINER = String.fromCharCode(0x200D);
+const IDENTIFIER_START_PATTERN = /^[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}$_]$/u;
+const IDENTIFIER_PART_PATTERN = /^[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}$_\p{Mn}\p{Mc}\p{Nd}\p{Pc}\u200c\u200d]$/u;
+const HEX_DIGIT = /^[0-9a-fA-F]$/;
 
 export function isHighSurrogate(string: string, index = 0): boolean {
 	if (index < 0 || index >= string.length) {
@@ -30,21 +27,16 @@ export function isSurrogatePair(string: string, start = 0): boolean {
 }
 
 export function isIdentifierStart(char: string): boolean {
-	return UNICODE_LETTER.test(char) || char === '$' || char === '_';
+	return IDENTIFIER_START_PATTERN.test(char);
 }
 
 export function isIdentifierPart(char: string): boolean {
-	return UNICODE_LETTER.test(char)
-		|| UNICODE_COMBINING_MARK.test(char)
-		|| UNICODE_DIGIT.test(char)
-		|| UNICODE_CONNECTOR_PUNCTUATION.test(char)
-		|| char === ZERO_WIDTH_NON_JOINER
-		|| char === ZERO_WIDTH_JOINER;
+	return IDENTIFIER_PART_PATTERN.test(char);
 }
 
 export function decodeUnicodeEscapeSequence(string: string): string {
 	let result = '';
-	let state: 'string' | 'escape' | `digit` = 'string';
+	let state: 'string' | 'escape' | 'digit' = 'string';
 	let digits = '';
 
 	for (let i = 0; i < string.length; i++) {
@@ -69,7 +61,7 @@ export function decodeUnicodeEscapeSequence(string: string): string {
 			}
 
 			case 'digit': {
-				if ((char >= '0' && char <= '9') || (char >= 'a' && char <= 'f') || (char >= 'A' && char <= 'F')) {
+				if (HEX_DIGIT.test(char)) {
 					digits += char;
 				} else {
 					throw new SyntaxError('invalid escape sequence');
@@ -84,5 +76,9 @@ export function decodeUnicodeEscapeSequence(string: string): string {
 		}
 	}
 
+	if (state !== 'string') {
+		throw new SyntaxError('invalid escape sequence');
+	}
+
 	return result;
 }
diff --git a/test/characters.ts b/test/characters.ts
new file mode 100644
index 000000000..4d5925ad0
--- /dev/null
+++ b/test/characters.ts
@@ -0,0 +1,187 @@
+import { decodeUnicodeEscapeSequence, isHighSurrogate, isIdentifierPart, isIdentifierStart, isLowSurrogate, isSurrogatePair } from '../src/utils/characters';
+import { describe, expect, test } from 'vitest';
+
+describe('isHighSurrogate', () => {
+	const cases: [string, boolean][] = [
+		['', false],
+		['\ud7ff', false],
+		['\ud800', true],
+		['\udbff', true],
+		['\udc00', false],
+		['\udfff', false],
+		['\ue000', false],
+	];
+
+	test.concurrent.each(cases)('"%s" -> %s', (input, expected) => {
+		expect(isHighSurrogate(input)).toBe(expected);
+	});
+
+	test.concurrent('index out of range', () => {
+		expect(isHighSurrogate('\uD800', 1)).toBe(false);
+	});
+});
+
+describe('isLowSurrogate', () => {
+	const cases: [string, boolean][] = [
+		['', false],
+		['\ud7ff', false],
+		['\ud800', false],
+		['\udbff', false],
+		['\udc00', true],
+		['\udfff', true],
+		['\ue000', false],
+	];
+
+	test.concurrent.each(cases)('"%s" -> %s', (input, expected) => {
+		expect(isLowSurrogate(input)).toBe(expected);
+	});
+
+	test.concurrent('index out of range', () => {
+		expect(isLowSurrogate('\DC00', 1)).toBe(false);
+	});
+});
+
+describe('isSurrogatePair', () => {
+	const cases: [string, boolean][] = [
+		['\ud842\udfb7', true],
+		['\ud83e\udd2f', true],
+		['a', false],
+		['\u85cd', false],
+		['\ud842', false],
+		['\ud8000', false],
+		['0\udc00', false],
+		['_\ud842\udfb7', false],
+	];
+
+	test.concurrent.each(cases)('"%s" -> %s', (input, expected) => {
+		expect(isSurrogatePair(input)).toBe(expected);
+	});
+
+	test.concurrent.each(cases)('start given', () => {
+		expect(isSurrogatePair('_\ud842\udfb7', 1)).toBe(true);
+	});
+});
+
+describe('isIdentifierStart', () => {
+	const cases: [string, boolean][] = [
+		// UnicodeLetter
+		['\u0041', true], // U+0041 (LATIN CAPITAL LETTER A): Uppercase letter (Lu)
+		['\u0061', true], // U+0061 (LATIN SMALL LETTER A ): Lowercase letter (Ll)
+		['\u01c5', true], // U+01C5 (LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON): Titlecase letter (Lt)
+		['\u01c8', true], // U+01C8 (LATIN CAPITAL LETTER L WITH SMALL LETTER J): Titlecase letter (Lt)
+		['\u02b0', true], // U+02B0 (Modifier Letter Small H): Modifier letter (Lm)
+		['\u03a9', true], // U+03A9 (GREEK CAPITAL LETTER OMEGA): Uppercase letter (Lu)
+		['\u03b2', true], // U+03B2 (GREEK SMALL LETTER BETA): Lowercase letter (Ll)
+		['\u16ee', true], // U+16EE (Runic Arlaug Symbol): Letter number (Nl)
+		['\u2163', true], // U+2163 (Roman Numeral Four): Letter number (Nl)
+		['\u3005', true], // U+3005 (Ideographic Iteration Mark): Modifier letter (Lm)
+		['\u3042', true], // U+3042 (HIRAGANA LETTER A): Other letter (Lo)
+		['\u85cd', true], // U+85CD (CJK Unified Ideograph-85CD): Other letter (Lo)
+		['\ud842\udfb7', true], // U+20BB7 (CJK Unified Ideograph-20BB7): Other letter (Lo)
+
+		// $
+		['$', true],
+
+		// _
+		['_', true],
+
+		// Invalid characters
+		['\u0021', false], // U+0021 (Exclamation Mark): Other Punctuation (Po)
+		['\u0030', false], // U+0030 (Digit Zero): Decimal number (Nd)
+		['\u0301', false], // U+0301 (Combining Acute Accent): Non-spacing mark (Mn)
+		['\u093e', false], // U+093E (Devanagari Vowel Sign Aa): Combining spacing mark (Mc)
+		['\u200c', false], // U+200C (Zero Width Non-Joiner (ZWNJ)): Format (Cf)
+		['\u200d', false], // U+200D (Zero Width Joiner (ZWJ)): Format (Cf)
+		['\u203f', false], // U+203F (Undertie): Connector punctuation (Pc)
+		['\ud83e\udd2f', false], // U+1F92F (Shocked Face with Exploding Head): Other Symbol (So)
+	];
+
+	test.concurrent.each(cases)('"%s" -> %s', (input, expected) => {
+		expect(isIdentifierStart(input)).toBe(expected);
+	});
+});
+
+describe('isIdentifierPart', () => {
+	const cases: [string, boolean][] = [
+		// UnicodeLetter
+		['\u0041', true], // U+0041 (LATIN CAPITAL LETTER A): Uppercase letter (Lu)
+		['\u0061', true], // U+0061 (LATIN SMALL LETTER A ): Lowercase letter (Ll)
+		['\u01c5', true], // U+01C5 (LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON): Titlecase letter (Lt)
+		['\u01c8', true], // U+01C8 (LATIN CAPITAL LETTER L WITH SMALL LETTER J): Titlecase letter (Lt)
+		['\u02b0', true], // U+02B0 (Modifier Letter Small H): Modifier letter (Lm)
+		['\u03a9', true], // U+03A9 (GREEK CAPITAL LETTER OMEGA): Uppercase letter (Lu)
+		['\u03b2', true], // U+03B2 (GREEK SMALL LETTER BETA): Lowercase letter (Ll)
+		['\u16ee', true], // U+16EE (Runic Arlaug Symbol): Letter number (Nl)
+		['\u2163', true], // U+2163 (Roman Numeral Four): Letter number (Nl)
+		['\u3005', true], // U+3005 (Ideographic Iteration Mark): Modifier letter (Lm)
+		['\u3042', true], // U+3042 (HIRAGANA LETTER A): Other letter (Lo)
+		['\u85cd', true], // U+85CD (CJK Unified Ideograph-85CD): Other letter (Lo)
+		['\ud842\udfb7', true], // U+20BB7 (CJK Unified Ideograph-20BB7): Other letter (Lo)
+
+		// $
+		['$', true],
+
+		// _
+		['_', true],
+
+		// UnicodeCombiningMark
+		['\u0301', true], // U+0301 (Combining Acute Accent): Non-spacing mark (Mn)
+		['\u093e', true], // U+093E (Devanagari Vowel Sign Aa): Combining spacing mark (Mc)
+
+		// UnicodeDigit
+		//   Decimal number (Nd)
+		['\u0030', true], // U+0030 (Digit Zero): Decimal number (Nd)
+
+		// UnicodeConnectorPunctuation
+		//   Connector punctuation (Pc)
+		['\u203f', true], // U+203F (Undertie): Connector punctuation (Pc)
+
+		// ZWNJ
+		['\u200c', true], // U+200C (Zero Width Non-Joiner (ZWNJ)): Format (Cf)
+
+		// ZWJ
+		['\u200d', true], // U+200D (Zero Width Joiner (ZWJ)): Format (Cf)
+
+		// Invalid characters
+		['\u0021', false], // U+0021 (Exclamation Mark): Other Punctuation (Po)
+		['\ud83e\udd2f', false], // U+1F92F (Shocked Face with Exploding Head): Other Symbol (So)
+	];
+
+	test.concurrent.each(cases)('"%s" -> %s', (input, expected) => {
+		expect(isIdentifierPart(input)).toBe(expected);
+	});
+});
+
+describe('decodeUnicodeEscapeSequence', () => {
+	test('plain', () => {
+		expect(decodeUnicodeEscapeSequence('abc123')).toBe('abc123');
+	});
+
+	test('escape', () => {
+		expect(decodeUnicodeEscapeSequence('\\u0041')).toBe('A');
+	});
+
+	test('escape lowercase', () => {
+		expect(decodeUnicodeEscapeSequence('\\u85cd')).toBe('藍');
+	});
+
+	test('escape uppercase', () => {
+		expect(decodeUnicodeEscapeSequence('\\u85CD')).toBe('藍');
+	});
+
+	test('expects "u", unexpected end', () => {
+		expect(() => decodeUnicodeEscapeSequence('\\')).toThrow();
+	});
+
+	test('expects "u"', () => {
+		expect(() => decodeUnicodeEscapeSequence('\\0')).toThrow();
+	});
+
+	test('expects digit, unexpected end', () => {
+		expect(() => decodeUnicodeEscapeSequence('\\u00')).toThrow();
+	});
+
+	test('expects digit', () => {
+		expect(() => decodeUnicodeEscapeSequence('\\ug')).toThrow();
+	});
+});
diff --git a/test/identifiers.ts b/test/identifiers.ts
new file mode 100644
index 000000000..22155f460
--- /dev/null
+++ b/test/identifiers.ts
@@ -0,0 +1,315 @@
+import { describe, expect, test } from 'vitest';
+import { Parser } from '../src';
+import { AiScriptSyntaxError } from '../src/error';
+import { eq, exe } from './testutils';
+import { NULL, NUM, STR, Value } from '../src/interpreter/value';
+
+const reservedWords = [
+	// 使用中の語
+	'null',
+	'true',
+	'false',
+	'each',
+	'for',
+	'do',
+	'while',
+	'loop',
+	'break',
+	'continue',
+	'match',
+	'case',
+	'default',
+	'if',
+	'elif',
+	'else',
+	'return',
+	'eval',
+	'var',
+	'let',
+	'exists',
+
+	// 使用予定の語
+	// 文脈キーワードは識別子に利用できるため除外
+	'as',
+	'async',
+	'attr',
+	'attribute',
+	'await',
+	'catch',
+	'class',
+	// 'const',
+	'component',
+	'constructor',
+	// 'def',
+	'dictionary',
+	'enum',
+	'export',
+	'finally',
+	'fn',
+	// 'func',
+	// 'function',
+	'hash',
+	'in',
+	'interface',
+	'out',
+	'private',
+	'public',
+	'ref',
+	'static',
+	'struct',
+	'table',
+	'this',
+	'throw',
+	'trait',
+	'try',
+	'undefined',
+	'use',
+	'using',
+	'when',
+	'yield',
+	'import',
+	'is',
+	'meta',
+	'module',
+	'namespace',
+	'new',
+] as const;
+
+const validIdentifiers = [
+	// IdentifierStart
+	//   UnicodeLetter
+	//     Uppercase letter (Lu)
+	'A', // U+0041 (LATIN CAPITAL LETTER A)
+	'Ω', // U+03A9 (GREEK CAPITAL LETTER OMEGA)
+
+	//     Lowercase letter (Ll)
+	'a', // U+0061 (LATIN SMALL LETTER A )
+	'β', // U+03B2 (GREEK SMALL LETTER BETA)
+
+	//     Titlecase letter (Lt)
+	'ǅ', // U+01C5 (LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON)
+	'ǈ', // U+01C8 (LATIN CAPITAL LETTER L WITH SMALL LETTER J)
+
+	//     Modifier letter (Lm)
+	'ʰ', // U+02B0 (Modifier Letter Small H)
+	'々', // U+3005 (Ideographic Iteration Mark)
+
+	//     Other letter (Lo)
+	'あ', // U+3042 (HIRAGANA LETTER A)
+	'藍', // U+85CD (CJK Unified Ideograph-85CD)
+	'𠮷', // U+20BB7 (CJK Unified Ideograph-20BB7)
+
+	//     Letter number (Nl)
+	'ᛮ', // U+16EE (Runic Arlaug Symbol)
+	'Ⅳ', // U+2163 (Roman Numeral Four)
+
+	//   $
+	'$',
+
+	//   _
+	'_',
+
+	// IdentifierPart
+	//   IdentifierStart
+	'_A',
+	'_Ω',
+	'_a',
+	'_β',
+	'_ǅ',
+	'_ǈ',
+	'_ʰ',
+	'_々',
+	'_あ',
+	'_藍',
+	'_𠮷',
+	'_ᛮ',
+	'_Ⅳ',
+	'_$',
+	'__',
+
+	//   UnicodeCombiningMark
+	//     Non-spacing mark (Mn)
+	'á', // U+0301 (Combining Acute Accent)
+
+	//     Combining spacing mark (Mc)
+	'राम', // U+093E (Devanagari Vowel Sign Aa)
+
+	//   UnicodeDigit
+	//     Decimal number (Nd)
+	'a0', // U+0030 (Digit Zero)
+
+	//   UnicodeConnectorPunctuation
+	//     Connector punctuation (Pc)
+	'a‿b', // U+203F (Undertie)
+
+	// <ZWNJ>
+	'बि‌ना',
+
+	// <ZWJ>
+	'क‍्',
+];
+
+const validEscapeIdentifiers: [string, string][] = [
+	['\\u85cd', '藍'],
+	['\\u85CD', '藍'],
+	['\\ud842\\udfb7', '𠮷'],
+	['\\uD842\\uDFB7', '𠮷'],
+	['_\\u85cd', '_藍'],
+	['_\\u85CD', '_藍'],
+	['_\\ud842\\udfb7', '_𠮷'],
+	['_\\uD842\\uDFB7', '_𠮷'],
+];
+
+const invalidIdentifiers = [
+	'\\u',
+	'\\u000x',
+	'\\u0021', // "!": Other Punctuation (Po)
+	'\\u0069\\u0066', // "if"
+	'\\ud83e\\udd2f', '\\uD83E\\uDD2F', // U+1F92F (Shocked Face with Exploding Head): Other Symbol (So)
+	'_\\u',
+	'_\\u000x',
+	'_\\u0021',
+	'_\\ud83e\\udd2f',
+	'_\\uD83E\\uDD2F',
+];
+
+const sampleCodes = Object.entries<[(definedName: string, referredName: string) => string, Value]>({
+	variable: [(definedName, referredName) =>
+	`
+	let ${definedName} = "ai"
+	<: ${referredName}
+	`, STR("ai")],
+
+	function: [(definedName, referredName) =>
+	`
+	@${definedName}() { 'ai' }
+	<: ${referredName}()
+	`, STR("ai")],
+
+	attribute: [(definedName) =>
+	`
+	#[${definedName} 1]
+	@f() { 1 }
+	`, NULL],
+
+	namespace: [(definedName, referredName) =>
+	`
+	:: ${definedName} {
+		@f() { 1 }
+	}
+	<: ${referredName}:f()
+	`, NUM(1)],
+
+	prop: [(definedName, referredName) =>
+	`
+	let x = { ${definedName}: 1 }
+	x.${referredName}
+	`, NUM(1)],
+
+	meta: [(definedName) =>
+	`
+	### ${definedName} 1
+	`, NULL],
+
+	forBreak: [(definedName, referredName) =>
+	`
+	#${definedName}: for 1 {
+		break #${referredName}
+	}
+	`, NULL],
+
+	eachBreak: [(definedName, referredName) =>
+	`
+	#${definedName}: each let v, [0] {
+		break #${referredName}
+	}
+	`, NULL],
+
+	whileBreak: [(definedName, referredName) =>
+	`
+	#${definedName}: while false {
+		break #${referredName}
+	}
+	`, NULL],
+
+	forContinue: [(definedName, referredName) =>
+	`
+	#${definedName}: for 1 {
+		continue #${referredName}
+	}
+	`, NULL],
+
+	eachContinue: [(definedName, referredName) =>
+	`
+	#${definedName}: each let v, [0] {
+		break #${referredName}
+	}
+	`, NULL],
+
+	whileContinue: [(definedName, referredName) =>
+	`
+	var flag = true
+	#${definedName}: while flag {
+		flag = false
+		continue #${referredName}
+	}
+	`, NULL],
+
+	typeParam: [(definedName, referredName) =>
+	`
+	@f<${definedName}>(x): ${referredName} { x }
+	`, NULL],
+});
+
+const parser = new Parser();
+
+describe.each(
+	sampleCodes
+)('identifier validation on %s', (_, [sampleCode, expected]) => {
+
+	test.concurrent.each(
+		reservedWords
+	)('%s must be rejected', (word) => {
+		expect(() => parser.parse(sampleCode(word, word))).toThrow(AiScriptSyntaxError);
+	});
+
+	test.concurrent.each(
+		reservedWords
+	)('%scat must be allowed', (word) => {
+		const wordCat = word + 'cat';
+		parser.parse(sampleCode(wordCat, wordCat));
+	});
+
+	test.concurrent.each(
+		validIdentifiers
+	)('%s must be allowed', (word) => {
+		parser.parse(sampleCode(word, word));
+	});
+
+	test.concurrent.each(
+		validEscapeIdentifiers
+	)('$0 must be allowed (referred as $1)', (encoded, decoded) => {
+		parser.parse(sampleCode(encoded, decoded));
+	});
+
+	test.concurrent.each(
+		validEscapeIdentifiers
+	)('$1 must be allowed (referred as $0)', async (encoded, decoded) => {
+		const res = await exe(sampleCode(decoded, encoded));
+		eq(res, expected);
+	});
+
+	test.concurrent.each(
+		invalidIdentifiers
+	)('%s must be rejected', (word) => {
+		expect(() => parser.parse(sampleCode(word, word))).toThrow(AiScriptSyntaxError);
+	});
+});
+
+test.concurrent('Keyword cannot contain escape characters', async () => {
+	await expect(async () => await exe(`
+	\\u0069\\u0066 true {
+		<: 1
+	}
+	`)).rejects.toThrow();
+})
diff --git a/test/keywords.ts b/test/keywords.ts
deleted file mode 100644
index 983b8ea1f..000000000
--- a/test/keywords.ts
+++ /dev/null
@@ -1,167 +0,0 @@
-import { describe, expect, test } from 'vitest';
-import { Parser } from '../src';
-import { AiScriptSyntaxError } from '../src/error';
-
-const reservedWords = [
-	// 使用中の語
-	'null',
-	'true',
-	'false',
-	'each',
-	'for',
-	'do',
-	'while',
-	'loop',
-	'break',
-	'continue',
-	'match',
-	'case',
-	'default',
-	'if',
-	'elif',
-	'else',
-	'return',
-	'eval',
-	'var',
-	'let',
-	'exists',
-
-	// 使用予定の語
-	// 文脈キーワードは識別子に利用できるため除外
-	'as',
-	'async',
-	'attr',
-	'attribute',
-	'await',
-	'catch',
-	'class',
-	// 'const',
-	'component',
-	'constructor',
-	// 'def',
-	'dictionary',
-	'enum',
-	'export',
-	'finally',
-	'fn',
-	// 'func',
-	// 'function',
-	'hash',
-	'in',
-	'interface',
-	'out',
-	'private',
-	'public',
-	'ref',
-	'static',
-	'struct',
-	'table',
-	'this',
-	'throw',
-	'trait',
-	'try',
-	'undefined',
-	'use',
-	'using',
-	'when',
-	'yield',
-	'import',
-	'is',
-	'meta',
-	'module',
-	'namespace',
-	'new',
-] as const;
-
-const sampleCodes = Object.entries<(word: string) => string>({
-	variable: word =>
-	`
-	let ${word} = "ai"
-	${word}
-	`,
-
-	function: word =>
-	`
-	@${word}() { 'ai' }
-	${word}()
-	`,
-
-	attribute: word =>
-	`
-	#[${word} 1]
-	@f() { 1 }
-	`,
-
-	namespace: word =>
-	`
-	:: ${word} {
-		@f() { 1 }
-	}
-	${word}:f()
-	`,
-
-	prop: word =>
-	`
-	let x = { ${word}: 1 }
-	x.${word}
-	`,
-
-	meta: word =>
-	`
-	### ${word} 1
-	`,
-
-	for: word =>
-	`
-	#${word}: for 1 {}
-	`,
-
-	each: word =>
-	`
-	#${word}: each let v, [0] {}
-	`,
-
-	while: word =>
-	`
-	#${word}: while false {}
-	`,
-
-	break: word =>
-	`
-	#${word}: for 1 {
-		break #${word}
-	}
-	`,
-
-	continue: word =>
-	`
-	#${word}: for 1 {
-		continue #${word}
-	}
-	`,
-
-	typeParam: word =>
-	`
-	@f<${word}>(x): ${word} { x }
-	`,
-});
-
-const parser = new Parser();
-
-describe.each(
-	sampleCodes
-)('reserved word validation on %s', (_, sampleCode) => {
-
-	test.concurrent.each(
-		reservedWords
-	)('%s must be rejected', (word) => {
-		expect(() => parser.parse(sampleCode(word))).toThrow(AiScriptSyntaxError);
-	});
-
-	test.concurrent.each(
-		reservedWords
-	)('%scat must be allowed', (word) => {
-		parser.parse(sampleCode(word+'cat'));
-	});
-
-});
diff --git a/test/parser.ts b/test/parser.ts
index 47be63002..80cc15732 100644
--- a/test/parser.ts
+++ b/test/parser.ts
@@ -153,7 +153,7 @@ describe('Scanner', () => {
 		next(stream, TokenKind.EOF, { line: 1, column: 4 }, { });
 	});
 	test.concurrent('invalid token', async () => {
-		const source = '$';
+		const source = '~';
 		try {
 			const stream = new Scanner(source);
 		} catch (e) {
diff --git a/unreleased/json5-identifiers.md b/unreleased/json5-identifiers.md
new file mode 100644
index 000000000..1f69d641f
--- /dev/null
+++ b/unreleased/json5-identifiers.md
@@ -0,0 +1,22 @@
+- 識別子に使用できる文字の種類を追加
+	- 識別子には以下の文字を使用できます。
+		- 以下のUnicodeカテゴリに含まれる全ての文字
+			- Uppercase letter (Lu)
+			- Lowercase letter (Ll)
+			- Titlecase letter (Lt)
+			- Modifier letter (Lm)
+			- Other letter (Lo)
+			- Letter number (Nl)
+		- `$`
+		- `_`
+		- `\u`とそれに続く4桁の16進法の英数字
+			- Unicodeエスケープシーケンスとして、与えられた値を持つUTF-16コード単位として解釈されます。
+	- 識別子の最初を除いた部分では、以下の文字も使用できます。
+		- 以下のUnicodeカテゴリに含まれる全ての文字
+			- Non-spacing mark (Mn)
+			- Combining spacing mark (Mc)
+			- Decimal number (Nd)
+			- Connector punctuation (Pc)
+			- ゼロ幅非接合子 (ZWNJ)
+			- ゼロ幅接合子 (ZWJ)
+	- Unicodeエスケープシーケンスを用いないと上記の制約に抵触する文字列を、Unicodeエスケープシーケンスによって識別子とすることはできません。

From c8dc63189b95cb17e71db4c4452f50a5e98c0c3f Mon Sep 17 00:00:00 2001
From: takejohn <takejohn@takejohn.jp>
Date: Fri, 12 Sep 2025 16:05:58 +0900
Subject: [PATCH 3/8] =?UTF-8?q?column=E3=82=92UTF-16=E3=82=B3=E3=83=BC?=
 =?UTF-8?q?=E3=83=89=E5=8D=98=E4=BD=8D=E3=81=AB=E5=A4=89=E6=9B=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/parser/streams/char-stream.ts | 10 +++++-----
 test/parser.ts                    | 31 +++++++++++++++++++++++++++++--
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/src/parser/streams/char-stream.ts b/src/parser/streams/char-stream.ts
index a79d2594f..b60a477f1 100644
--- a/src/parser/streams/char-stream.ts
+++ b/src/parser/streams/char-stream.ts
@@ -12,9 +12,9 @@ export class CharStream {
 	private address: number;
 	/** Unicode character */
 	private _char?: string;
-	/** zero-based number, based on Unicode code points */
+	/** zero-based number */
 	private line: number;
-	/** zero-based number, based on Unicode code points */
+	/** zero-based number, based on UTF-16 code unit */
 	private column: number;
 
 	constructor(source: string, opts?: { line?: number, column?: number }) {
@@ -64,7 +64,7 @@ export class CharStream {
 			this.line++;
 			this.column = 0;
 		} else {
-			this.column++;
+			this.column += this._char!.length;
 		}
 		this.incAddr();
 		this.moveNext();
@@ -82,9 +82,9 @@ export class CharStream {
 			const lastLineBreak = page.lastIndexOf('\n', this.address - 1);
 			const lineStart = lastLineBreak >= 0 ? lastLineBreak + 1 : 0;
 			const line = page.slice(lineStart, this.address);
-			this.column = [...line].length - 1;
+			this.column = line.length;
 		} else {
-			this.column--;
+			this.column -= this._char!.length;
 		}
 	}
 
diff --git a/test/parser.ts b/test/parser.ts
index 80cc15732..a73ec87c9 100644
--- a/test/parser.ts
+++ b/test/parser.ts
@@ -45,7 +45,7 @@ describe('CharStream', () => {
 			assert.strictEqual('b', stream.char);
 			stream.prev();
 			assert.strictEqual('\n', stream.char);
-			assert.deepStrictEqual(stream.getPos(), { line: 1, column: 1 });
+			assert.deepStrictEqual(stream.getPos(), { line: 1, column: 2 });
 		});
 
 		test.concurrent('line breaks', async () => {
@@ -56,7 +56,7 @@ describe('CharStream', () => {
 			assert.strictEqual('c', stream.char);
 			stream.prev();
 			assert.strictEqual('\n', stream.char);
-			assert.deepStrictEqual(stream.getPos(), { line: 2, column: 0 });
+			assert.deepStrictEqual(stream.getPos(), { line: 2, column: 1 });
 		});
 
 		test.concurrent('CRは読み飛ばされる', async () => {
@@ -77,6 +77,26 @@ describe('CharStream', () => {
 			stream.prev();
 			assert.strictEqual('\ud83e\udd2f', stream.char);
 		});
+
+		test.concurrent('column is based on UTF-16 code unit', async () => {
+			const source = '\ud83e\udd2f!';
+			const stream = new CharStream(source);
+			stream.next();
+			stream.next();
+			stream.prev();
+			assert.strictEqual(stream.char, '!');
+			assert.deepStrictEqual(stream.getPos(), { line: 1, column: 3 });
+		});
+
+		test.concurrent('column is based on UTF-16 code unit, line break', async () => {
+			const source = '\ud83e\udd2f\n';
+			const stream = new CharStream(source);
+			stream.next();
+			stream.next();
+			stream.prev();
+			assert.strictEqual(stream.char, '\n');
+			assert.deepStrictEqual(stream.getPos(), { line: 1, column: 3 });
+		});
 	});
 
 	test.concurrent('eof', async () => {
@@ -122,6 +142,13 @@ describe('CharStream', () => {
 		stream.next();
 		assert.strictEqual(true, stream.eof);
 	});
+
+	test.concurrent('column is based on UTF-16 code unit', async () => {
+		const source = '\ud83e\udd2f';
+		const stream = new CharStream(source);
+		stream.next();
+		assert.deepStrictEqual(stream.getPos(), { line: 1, column: 3 });
+	});
 });
 
 describe('Scanner', () => {

From b47dcfeceff9af6fef27fb3661f38b6cc2f65ac2 Mon Sep 17 00:00:00 2001
From: takejohn <takejohn@takejohn.jp>
Date: Fri, 12 Sep 2025 16:30:09 +0900
Subject: [PATCH 4/8] =?UTF-8?q?=E4=BD=BF=E7=94=A8=E4=B8=AD=E3=80=81?=
 =?UTF-8?q?=E4=BD=BF=E7=94=A8=E4=BA=88=E5=AE=9A=E3=81=AE=E4=BA=88=E7=B4=84?=
 =?UTF-8?q?=E8=AA=9E=E3=81=AE=E3=82=A8=E3=83=A9=E3=83=BC=E3=82=92=E5=85=B1?=
 =?UTF-8?q?=E9=80=9A=E3=81=AB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/parser/plugins/validate-keyword.ts | 55 ++++++++++++--------------
 1 file changed, 26 insertions(+), 29 deletions(-)

diff --git a/src/parser/plugins/validate-keyword.ts b/src/parser/plugins/validate-keyword.ts
index 9446ed53b..ca79dc590 100644
--- a/src/parser/plugins/validate-keyword.ts
+++ b/src/parser/plugins/validate-keyword.ts
@@ -7,6 +7,30 @@ import type * as Ast from '../../node.js';
 // - 文脈キーワードは識別子に利用できるため除外
 
 const reservedWord = [
+	// 使用中の語
+	'null',
+	'true',
+	'false',
+	'each',
+	'for',
+	'loop',
+	'do',
+	'while',
+	'break',
+	'continue',
+	'match',
+	'case',
+	'default',
+	'if',
+	'elif',
+	'else',
+	'return',
+	'eval',
+	'var',
+	'let',
+	'exists',
+
+	// 使用予定の語
 	'as',
 	'async',
 	'attr',
@@ -52,36 +76,9 @@ const reservedWord = [
 	'new',
 ];
 
-const keywords = [
-	'null',
-	'true',
-	'false',
-	'each',
-	'for',
-	'loop',
-	'do',
-	'while',
-	'break',
-	'continue',
-	'match',
-	'case',
-	'default',
-	'if',
-	'elif',
-	'else',
-	'return',
-	'eval',
-	'var',
-	'let',
-	'exists',
-];
-
 function validateName(name: string, pos: Ast.Pos): void {
 	if (reservedWord.includes(name)) {
-		throw new AiScriptSyntaxError(`Reserved word "${name}" cannot be used as identifier.`, pos);
-	}
-	if (keywords.includes(name)) {
-		throw new AiScriptSyntaxError(`Keyword "${name}" cannot be used as identifier.`, pos);
+		throwReservedWordError(name, pos);
 	}
 }
 
@@ -92,7 +89,7 @@ function validateTypeName(name: string, pos: Ast.Pos): void {
 	validateName(name, pos);
 }
 
-function throwReservedWordError(name: string, pos: Ast.Pos): void {
+function throwReservedWordError(name: string, pos: Ast.Pos): never {
 	throw new AiScriptSyntaxError(`Reserved word "${name}" cannot be used as variable name.`, pos);
 }
 

From ee45ee03dbf95ee20396790462ba34a4a4d694f1 Mon Sep 17 00:00:00 2001
From: takejohn <takejohn@takejohn.jp>
Date: Fri, 12 Sep 2025 17:04:14 +0900
Subject: [PATCH 5/8] =?UTF-8?q?=E3=82=A8=E3=82=B9=E3=82=B1=E3=83=BC?=
 =?UTF-8?q?=E3=83=97=E3=81=95=E3=82=8C=E3=81=9F=E4=BA=88=E7=B4=84=E8=AA=9E?=
 =?UTF-8?q?=E3=82=92=E3=82=AD=E3=83=BC=E3=81=AB=E6=8C=81=E3=81=A4=E3=82=AA?=
 =?UTF-8?q?=E3=83=96=E3=82=B8=E3=82=A7=E3=82=AF=E3=83=88=E3=83=AA=E3=83=86?=
 =?UTF-8?q?=E3=83=A9=E3=83=AB=E3=81=AE=E3=83=86=E3=82=B9=E3=83=88?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/literals.ts | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/test/literals.ts b/test/literals.ts
index 72e2fc304..9b52da5df 100644
--- a/test/literals.ts
+++ b/test/literals.ts
@@ -241,6 +241,15 @@ describe('literal', () => {
 		});
 	});
 
+	test.concurrent('obj (escaped reserved word as key)', async () => {
+		const res = await exe(`
+		<: {
+			\\u0064\\u0065\\u0066\\u0061\\u0075\\u006c\\u0074: 42,
+		}
+		`);
+		eq(res, OBJ(new Map([['default', NUM(42)]])));
+	})
+
 	test.concurrent('obj (invalid key)', async () => {
 		assert.rejects(() => exe(`
 		<: {

From fef775bce7763af427c233bedca652ed452613d6 Mon Sep 17 00:00:00 2001
From: takejohn <takejohn@takejohn.jp>
Date: Sat, 13 Sep 2025 20:31:05 +0900
Subject: [PATCH 6/8] =?UTF-8?q?isIdentifierStart,=20isIdentifierPart?=
 =?UTF-8?q?=E9=96=A2=E6=95=B0=E3=82=92=E5=89=8A=E9=99=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/parser/scanner.ts   | 12 +++---
 src/utils/characters.ts | 10 -----
 test/characters.ts      | 92 +----------------------------------------
 3 files changed, 8 insertions(+), 106 deletions(-)

diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts
index 20109b35c..e3caa1bff 100644
--- a/src/parser/scanner.ts
+++ b/src/parser/scanner.ts
@@ -1,5 +1,5 @@
 import { AiScriptSyntaxError, AiScriptUnexpectedEOFError } from '../error.js';
-import { decodeUnicodeEscapeSequence, isIdentifierPart, isIdentifierStart } from '../utils/characters.js';
+import { decodeUnicodeEscapeSequence } from '../utils/characters.js';
 import { CharStream } from './streams/char-stream.js';
 import { TOKEN, TokenKind } from './token.js';
 import { unexpectedTokenError } from './utils.js';
@@ -10,6 +10,8 @@ import type { Token, TokenPosition } from './token.js';
 const spaceChars = [' ', '\t'];
 const lineBreakChars = ['\r', '\n'];
 const digit = /^[0-9]$/;
+const identifierStart = /^[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}$_]$/u;
+const identifierPart = /^[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}$_\p{Mn}\p{Mc}\p{Nd}\p{Pc}\u200c\u200d]$/u;
 const hexDigit = /^[0-9a-fA-F]$/;
 const exponentIndicatorPattern = /^[eE]$/;
 
@@ -358,11 +360,11 @@ export class Scanner implements ITokenStream {
 
 		const value = decodeUnicodeEscapeSequence(rawValue);
 		const [start, ...parts] = value;
-		if (!isIdentifierStart(start!)) {
+		if (!identifierStart.test(start!)) {
 			throw new AiScriptSyntaxError(`Invalid identifier: "${value}"`, pos);
 		}
 		for (const part of parts) {
-			if (!isIdentifierPart(part)) {
+			if (!identifierPart.test(part)) {
 				throw new AiScriptSyntaxError(`Invalid identifier: "${value}"`, pos);
 			}
 		}
@@ -446,7 +448,7 @@ export class Scanner implements ITokenStream {
 		if (this.stream.eof) {
 			return;
 		}
-		if (isIdentifierStart(this.stream.char)) {
+		if (identifierStart.test(this.stream.char)) {
 			const value = this.stream.char;
 			this.stream.next();
 			return value;
@@ -466,7 +468,7 @@ export class Scanner implements ITokenStream {
 		if (matchedIdentifierStart !== undefined) {
 			return matchedIdentifierStart;
 		}
-		if (isIdentifierPart(this.stream.char)) {
+		if (identifierPart.test(this.stream.char)) {
 			const value = this.stream.char;
 			this.stream.next();
 			return value;
diff --git a/src/utils/characters.ts b/src/utils/characters.ts
index f8a7825a7..2c5a170ab 100644
--- a/src/utils/characters.ts
+++ b/src/utils/characters.ts
@@ -2,8 +2,6 @@ const MIN_HIGH_SURROGATE = 0xD800;
 const MAX_HIGH_SURROGATE = 0xDBFF;
 const MIN_LOW_SURROGATE = 0xDC00;
 const MAX_LOW_SURROGATE = 0xDFFF;
-const IDENTIFIER_START_PATTERN = /^[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}$_]$/u;
-const IDENTIFIER_PART_PATTERN = /^[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}$_\p{Mn}\p{Mc}\p{Nd}\p{Pc}\u200c\u200d]$/u;
 const HEX_DIGIT = /^[0-9a-fA-F]$/;
 
 export function isHighSurrogate(string: string, index = 0): boolean {
@@ -26,14 +24,6 @@ export function isSurrogatePair(string: string, start = 0): boolean {
 	return isHighSurrogate(string, start) && isLowSurrogate(string, start + 1);
 }
 
-export function isIdentifierStart(char: string): boolean {
-	return IDENTIFIER_START_PATTERN.test(char);
-}
-
-export function isIdentifierPart(char: string): boolean {
-	return IDENTIFIER_PART_PATTERN.test(char);
-}
-
 export function decodeUnicodeEscapeSequence(string: string): string {
 	let result = '';
 	let state: 'string' | 'escape' | 'digit' = 'string';
diff --git a/test/characters.ts b/test/characters.ts
index 4d5925ad0..71ace67ea 100644
--- a/test/characters.ts
+++ b/test/characters.ts
@@ -1,4 +1,4 @@
-import { decodeUnicodeEscapeSequence, isHighSurrogate, isIdentifierPart, isIdentifierStart, isLowSurrogate, isSurrogatePair } from '../src/utils/characters';
+import { decodeUnicodeEscapeSequence, isHighSurrogate, isLowSurrogate, isSurrogatePair } from '../src/utils/characters';
 import { describe, expect, test } from 'vitest';
 
 describe('isHighSurrogate', () => {
@@ -62,96 +62,6 @@ describe('isSurrogatePair', () => {
 	});
 });
 
-describe('isIdentifierStart', () => {
-	const cases: [string, boolean][] = [
-		// UnicodeLetter
-		['\u0041', true], // U+0041 (LATIN CAPITAL LETTER A): Uppercase letter (Lu)
-		['\u0061', true], // U+0061 (LATIN SMALL LETTER A ): Lowercase letter (Ll)
-		['\u01c5', true], // U+01C5 (LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON): Titlecase letter (Lt)
-		['\u01c8', true], // U+01C8 (LATIN CAPITAL LETTER L WITH SMALL LETTER J): Titlecase letter (Lt)
-		['\u02b0', true], // U+02B0 (Modifier Letter Small H): Modifier letter (Lm)
-		['\u03a9', true], // U+03A9 (GREEK CAPITAL LETTER OMEGA): Uppercase letter (Lu)
-		['\u03b2', true], // U+03B2 (GREEK SMALL LETTER BETA): Lowercase letter (Ll)
-		['\u16ee', true], // U+16EE (Runic Arlaug Symbol): Letter number (Nl)
-		['\u2163', true], // U+2163 (Roman Numeral Four): Letter number (Nl)
-		['\u3005', true], // U+3005 (Ideographic Iteration Mark): Modifier letter (Lm)
-		['\u3042', true], // U+3042 (HIRAGANA LETTER A): Other letter (Lo)
-		['\u85cd', true], // U+85CD (CJK Unified Ideograph-85CD): Other letter (Lo)
-		['\ud842\udfb7', true], // U+20BB7 (CJK Unified Ideograph-20BB7): Other letter (Lo)
-
-		// $
-		['$', true],
-
-		// _
-		['_', true],
-
-		// Invalid characters
-		['\u0021', false], // U+0021 (Exclamation Mark): Other Punctuation (Po)
-		['\u0030', false], // U+0030 (Digit Zero): Decimal number (Nd)
-		['\u0301', false], // U+0301 (Combining Acute Accent): Non-spacing mark (Mn)
-		['\u093e', false], // U+093E (Devanagari Vowel Sign Aa): Combining spacing mark (Mc)
-		['\u200c', false], // U+200C (Zero Width Non-Joiner (ZWNJ)): Format (Cf)
-		['\u200d', false], // U+200D (Zero Width Joiner (ZWJ)): Format (Cf)
-		['\u203f', false], // U+203F (Undertie): Connector punctuation (Pc)
-		['\ud83e\udd2f', false], // U+1F92F (Shocked Face with Exploding Head): Other Symbol (So)
-	];
-
-	test.concurrent.each(cases)('"%s" -> %s', (input, expected) => {
-		expect(isIdentifierStart(input)).toBe(expected);
-	});
-});
-
-describe('isIdentifierPart', () => {
-	const cases: [string, boolean][] = [
-		// UnicodeLetter
-		['\u0041', true], // U+0041 (LATIN CAPITAL LETTER A): Uppercase letter (Lu)
-		['\u0061', true], // U+0061 (LATIN SMALL LETTER A ): Lowercase letter (Ll)
-		['\u01c5', true], // U+01C5 (LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON): Titlecase letter (Lt)
-		['\u01c8', true], // U+01C8 (LATIN CAPITAL LETTER L WITH SMALL LETTER J): Titlecase letter (Lt)
-		['\u02b0', true], // U+02B0 (Modifier Letter Small H): Modifier letter (Lm)
-		['\u03a9', true], // U+03A9 (GREEK CAPITAL LETTER OMEGA): Uppercase letter (Lu)
-		['\u03b2', true], // U+03B2 (GREEK SMALL LETTER BETA): Lowercase letter (Ll)
-		['\u16ee', true], // U+16EE (Runic Arlaug Symbol): Letter number (Nl)
-		['\u2163', true], // U+2163 (Roman Numeral Four): Letter number (Nl)
-		['\u3005', true], // U+3005 (Ideographic Iteration Mark): Modifier letter (Lm)
-		['\u3042', true], // U+3042 (HIRAGANA LETTER A): Other letter (Lo)
-		['\u85cd', true], // U+85CD (CJK Unified Ideograph-85CD): Other letter (Lo)
-		['\ud842\udfb7', true], // U+20BB7 (CJK Unified Ideograph-20BB7): Other letter (Lo)
-
-		// $
-		['$', true],
-
-		// _
-		['_', true],
-
-		// UnicodeCombiningMark
-		['\u0301', true], // U+0301 (Combining Acute Accent): Non-spacing mark (Mn)
-		['\u093e', true], // U+093E (Devanagari Vowel Sign Aa): Combining spacing mark (Mc)
-
-		// UnicodeDigit
-		//   Decimal number (Nd)
-		['\u0030', true], // U+0030 (Digit Zero): Decimal number (Nd)
-
-		// UnicodeConnectorPunctuation
-		//   Connector punctuation (Pc)
-		['\u203f', true], // U+203F (Undertie): Connector punctuation (Pc)
-
-		// ZWNJ
-		['\u200c', true], // U+200C (Zero Width Non-Joiner (ZWNJ)): Format (Cf)
-
-		// ZWJ
-		['\u200d', true], // U+200D (Zero Width Joiner (ZWJ)): Format (Cf)
-
-		// Invalid characters
-		['\u0021', false], // U+0021 (Exclamation Mark): Other Punctuation (Po)
-		['\ud83e\udd2f', false], // U+1F92F (Shocked Face with Exploding Head): Other Symbol (So)
-	];
-
-	test.concurrent.each(cases)('"%s" -> %s', (input, expected) => {
-		expect(isIdentifierPart(input)).toBe(expected);
-	});
-});
-
 describe('decodeUnicodeEscapeSequence', () => {
 	test('plain', () => {
 		expect(decodeUnicodeEscapeSequence('abc123')).toBe('abc123');

From a80a323b5d151c32034ca8ef5a240e91b48cdf95 Mon Sep 17 00:00:00 2001
From: takejohn <takejohn@takejohn.jp>
Date: Sat, 13 Sep 2025 22:29:18 +0900
Subject: [PATCH 7/8] =?UTF-8?q?=E6=96=87=E5=AD=97=E7=A8=AE=E3=81=AE?=
 =?UTF-8?q?=E8=BF=BD=E5=8A=A0=E3=82=92=E5=8F=96=E3=82=8A=E6=B6=88=E3=81=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/parser/scanner.ts |   6 +-
 test/identifiers.ts   | 135 +++++++++++++++++++++---------------------
 test/literals.ts      |   9 ++-
 3 files changed, 73 insertions(+), 77 deletions(-)

diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts
index e3caa1bff..1eec385d1 100644
--- a/src/parser/scanner.ts
+++ b/src/parser/scanner.ts
@@ -10,8 +10,8 @@ import type { Token, TokenPosition } from './token.js';
 const spaceChars = [' ', '\t'];
 const lineBreakChars = ['\r', '\n'];
 const digit = /^[0-9]$/;
-const identifierStart = /^[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}$_]$/u;
-const identifierPart = /^[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}$_\p{Mn}\p{Mc}\p{Nd}\p{Pc}\u200c\u200d]$/u;
+const identifierStart = /^[A-Za-z_]$/u;
+const identifierPart = /^[A-Za-z0-9_]$/u;
 const hexDigit = /^[0-9a-fA-F]$/;
 const exponentIndicatorPattern = /^[eE]$/;
 
@@ -370,7 +370,7 @@ export class Scanner implements ITokenStream {
 		}
 
 		if (value !== rawValue) {
-			return TOKEN(TokenKind.Identifier, pos, { hasLeftSpacing, value });
+			throw new AiScriptSyntaxError(`Cannot use escape characters in identifier: "${rawValue}"`, pos);
 		}
 
 		// check word kind
diff --git a/test/identifiers.ts b/test/identifiers.ts
index 22155f460..9b676b8ec 100644
--- a/test/identifiers.ts
+++ b/test/identifiers.ts
@@ -75,104 +75,108 @@ const reservedWords = [
 	'new',
 ] as const;
 
-const validIdentifiers = [
+// ['識別子', 使用可否]
+const identifierCases: [string, boolean][] = [
+
 	// IdentifierStart
 	//   UnicodeLetter
 	//     Uppercase letter (Lu)
-	'A', // U+0041 (LATIN CAPITAL LETTER A)
-	'Ω', // U+03A9 (GREEK CAPITAL LETTER OMEGA)
+	['A', true], // U+0041 (LATIN CAPITAL LETTER A)
+	['Ω', false], // U+03A9 (GREEK CAPITAL LETTER OMEGA)
 
 	//     Lowercase letter (Ll)
-	'a', // U+0061 (LATIN SMALL LETTER A )
-	'β', // U+03B2 (GREEK SMALL LETTER BETA)
+	['a', true], // U+0061 (LATIN SMALL LETTER A )
+	['β', false], // U+03B2 (GREEK SMALL LETTER BETA)
 
 	//     Titlecase letter (Lt)
-	'ǅ', // U+01C5 (LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON)
-	'ǈ', // U+01C8 (LATIN CAPITAL LETTER L WITH SMALL LETTER J)
+	['ǅ', false], // U+01C5 (LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON)
+	['ǈ', false], // U+01C8 (LATIN CAPITAL LETTER L WITH SMALL LETTER J)
 
 	//     Modifier letter (Lm)
-	'ʰ', // U+02B0 (Modifier Letter Small H)
-	'々', // U+3005 (Ideographic Iteration Mark)
+	['ʰ', false], // U+02B0 (Modifier Letter Small H)
+	['々', false], // U+3005 (Ideographic Iteration Mark)
 
 	//     Other letter (Lo)
-	'あ', // U+3042 (HIRAGANA LETTER A)
-	'藍', // U+85CD (CJK Unified Ideograph-85CD)
-	'𠮷', // U+20BB7 (CJK Unified Ideograph-20BB7)
+	['あ', false], // U+3042 (HIRAGANA LETTER A)
+	['藍', false], // U+85CD (CJK Unified Ideograph-85CD)
+	['𠮷', false], // U+20BB7 (CJK Unified Ideograph-20BB7)
 
 	//     Letter number (Nl)
-	'ᛮ', // U+16EE (Runic Arlaug Symbol)
-	'Ⅳ', // U+2163 (Roman Numeral Four)
+	['ᛮ', false], // U+16EE (Runic Arlaug Symbol)
+	['Ⅳ', false], // U+2163 (Roman Numeral Four)
 
 	//   $
-	'$',
+	['$', false],
 
 	//   _
-	'_',
+	['_', true],
 
 	// IdentifierPart
 	//   IdentifierStart
-	'_A',
-	'_Ω',
-	'_a',
-	'_β',
-	'_ǅ',
-	'_ǈ',
-	'_ʰ',
-	'_々',
-	'_あ',
-	'_藍',
-	'_𠮷',
-	'_ᛮ',
-	'_Ⅳ',
-	'_$',
-	'__',
+	['_A', true],
+	['_Ω', false],
+	['_a', true],
+	['_β', false],
+	['_ǅ', false],
+	['_ǈ', false],
+	['_ʰ', false],
+	['_々', false],
+	['_あ', false],
+	['_藍', false],
+	['_𠮷', false],
+	['_ᛮ', false],
+	['_Ⅳ', false],
+	['_$', false],
+	['__', true],
 
 	//   UnicodeCombiningMark
 	//     Non-spacing mark (Mn)
-	'á', // U+0301 (Combining Acute Accent)
+	['á', false], // U+0301 (Combining Acute Accent)
 
 	//     Combining spacing mark (Mc)
-	'राम', // U+093E (Devanagari Vowel Sign Aa)
+	['राम', false], // U+093E (Devanagari Vowel Sign Aa)
 
 	//   UnicodeDigit
 	//     Decimal number (Nd)
-	'a0', // U+0030 (Digit Zero)
+	['a0', true], // U+0030 (Digit Zero)
+	['a๑', false], // U+0E51 (Thai Digit One)
 
 	//   UnicodeConnectorPunctuation
 	//     Connector punctuation (Pc)
-	'a‿b', // U+203F (Undertie)
+	['a‿b', false], // U+203F (Undertie)
 
 	// <ZWNJ>
-	'बि‌ना',
+	['बि‌ना', false],
 
 	// <ZWJ>
-	'क‍्',
+	['क‍्', false],
+
+	['\\u', false],
+	['\\u000x', false],
+	['\\u0021', false], // "!": Other Punctuation (Po)
+	['\\u0069\\u0066', false], // "if"
+	['\\ud83e\\udd2f', false], // U+1F92F (Shocked Face with Exploding Head): Other Symbol (So)
+	['\\uD83E\\uDD2F', false],
+	['_\\u', false],
+	['_\\u000x', false],
+	['_\\u0021', false],
+	['_\\ud83e\\udd2f', false],
+	['_\\uD83E\\uDD2F', false],
 ];
 
-const validEscapeIdentifiers: [string, string][] = [
+const escapeIdentifiers: [string, string][] = [
+	['\\u0041', 'A'],
 	['\\u85cd', '藍'],
 	['\\u85CD', '藍'],
 	['\\ud842\\udfb7', '𠮷'],
 	['\\uD842\\uDFB7', '𠮷'],
+	['_\\u0041', '_A'],
 	['_\\u85cd', '_藍'],
 	['_\\u85CD', '_藍'],
 	['_\\ud842\\udfb7', '_𠮷'],
 	['_\\uD842\\uDFB7', '_𠮷'],
 ];
 
-const invalidIdentifiers = [
-	'\\u',
-	'\\u000x',
-	'\\u0021', // "!": Other Punctuation (Po)
-	'\\u0069\\u0066', // "if"
-	'\\ud83e\\udd2f', '\\uD83E\\uDD2F', // U+1F92F (Shocked Face with Exploding Head): Other Symbol (So)
-	'_\\u',
-	'_\\u000x',
-	'_\\u0021',
-	'_\\ud83e\\udd2f',
-	'_\\uD83E\\uDD2F',
-];
-
 const sampleCodes = Object.entries<[(definedName: string, referredName: string) => string, Value]>({
 	variable: [(definedName, referredName) =>
 	`
@@ -281,27 +285,20 @@ describe.each(
 	});
 
 	test.concurrent.each(
-		validIdentifiers
-	)('%s must be allowed', (word) => {
-		parser.parse(sampleCode(word, word));
-	});
-
-	test.concurrent.each(
-		validEscapeIdentifiers
-	)('$0 must be allowed (referred as $1)', (encoded, decoded) => {
-		parser.parse(sampleCode(encoded, decoded));
+		identifierCases
+	)('%s is allowed: %s', async (word, allowed) => {
+		expect.hasAssertions();
+		if (allowed) {
+			const res = await exe(sampleCode(word, word));
+			eq(res, expected);
+		} else {
+			expect(() => parser.parse(sampleCode(word, word))).toThrow(AiScriptSyntaxError);
+		}
 	});
 
 	test.concurrent.each(
-		validEscapeIdentifiers
-	)('$1 must be allowed (referred as $0)', async (encoded, decoded) => {
-		const res = await exe(sampleCode(decoded, encoded));
-		eq(res, expected);
-	});
-
-	test.concurrent.each(
-		invalidIdentifiers
-	)('%s must be rejected', (word) => {
+		escapeIdentifiers
+	)('escape sequence is not allowed: %s', async (word) => {
 		expect(() => parser.parse(sampleCode(word, word))).toThrow(AiScriptSyntaxError);
 	});
 });
diff --git a/test/literals.ts b/test/literals.ts
index 9b52da5df..c5f587e8e 100644
--- a/test/literals.ts
+++ b/test/literals.ts
@@ -1,8 +1,8 @@
 import * as assert from 'assert';
-import { describe, test } from 'vitest';
+import { describe, expect, test } from 'vitest';
 import { } from '../src';
 import { NUM, STR, NULL, ARR, OBJ, BOOL, TRUE, FALSE, ERROR ,FN_NATIVE } from '../src/interpreter/value';
-import { } from '../src/error';
+import { AiScriptSyntaxError } from '../src/error';
 import { exe, eq } from './testutils';
 
 describe('literal', () => {
@@ -242,12 +242,11 @@ describe('literal', () => {
 	});
 
 	test.concurrent('obj (escaped reserved word as key)', async () => {
-		const res = await exe(`
+		await expect(async () => await exe(`
 		<: {
 			\\u0064\\u0065\\u0066\\u0061\\u0075\\u006c\\u0074: 42,
 		}
-		`);
-		eq(res, OBJ(new Map([['default', NUM(42)]])));
+		`)).rejects.toThrow(AiScriptSyntaxError);
 	})
 
 	test.concurrent('obj (invalid key)', async () => {

From daeb89ce656640b792f3dd5c3921887d2c8f9981 Mon Sep 17 00:00:00 2001
From: takejohn <takejohn@takejohn.jp>
Date: Sat, 13 Sep 2025 22:41:57 +0900
Subject: [PATCH 8/8] =?UTF-8?q?Unicode=E3=82=A8=E3=82=B9=E3=82=B1=E3=83=BC?=
 =?UTF-8?q?=E3=83=97=E3=82=B7=E3=83=BC=E3=82=B1=E3=83=B3=E3=82=B9=E3=81=AE?=
 =?UTF-8?q?=E6=A4=9C=E8=A8=BC=E5=87=A6=E7=90=86=E3=82=92=E5=89=8A=E9=99=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/parser/scanner.ts | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts
index 1eec385d1..30dc4a8d3 100644
--- a/src/parser/scanner.ts
+++ b/src/parser/scanner.ts
@@ -359,18 +359,8 @@ export class Scanner implements ITokenStream {
 		}
 
 		const value = decodeUnicodeEscapeSequence(rawValue);
-		const [start, ...parts] = value;
-		if (!identifierStart.test(start!)) {
-			throw new AiScriptSyntaxError(`Invalid identifier: "${value}"`, pos);
-		}
-		for (const part of parts) {
-			if (!identifierPart.test(part)) {
-				throw new AiScriptSyntaxError(`Invalid identifier: "${value}"`, pos);
-			}
-		}
-
 		if (value !== rawValue) {
-			throw new AiScriptSyntaxError(`Cannot use escape characters in identifier: "${rawValue}"`, pos);
+			throw new AiScriptSyntaxError(`Invalid identifier: "${rawValue}"`, pos);
 		}
 
 		// check word kind