From f20ab827408a4ee034cbafa4886198d97fe4e3af Mon Sep 17 00:00:00 2001
From: tk1024 <tk1024@users.noreply.github.com>
Date: Sun, 22 Mar 2026 20:36:51 +0900
Subject: [PATCH 1/5] =?UTF-8?q?feat:=20=E6=BC=A2=E5=AD=97=E2=86=92?=
 =?UTF-8?q?=E3=81=8B=E3=81=AA=E5=A2=83=E7=95=8C=E3=83=92=E3=83=A5=E3=83=BC?=
 =?UTF-8?q?=E3=83=AA=E3=82=B9=E3=83=86=E3=82=A3=E3=83=83=E3=82=AF=E3=81=AB?=
 =?UTF-8?q?=E3=82=88=E3=82=8B=E4=BD=8E=E4=BF=A1=E9=A0=BC=E6=99=82=E3=81=AE?=
 =?UTF-8?q?=E5=88=86=E9=9B=A2=E6=94=B9=E5=96=84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

文字種境界（漢字→ひらがな/カタカナ）で分割される候補を、通常スコアが
閾値未満のときにフォールバックで救済する仕組みを追加。

- findSingleKanjiToKanaBoundary(): 遷移1回の漢字→かな境界を検出
- tryBoundaryFallback(): 姓側に辞書根拠がある場合のみ rescue
- confidence 0.8 で返す（通常の 1.0 と区別）
- 既存の辞書高信頼ケースには影響なし（MVP 94.7% 維持）

Closes #6

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/core/normalize.ts   | 45 +++++++++++++++++++++++
 src/core/splitter.ts    | 81 +++++++++++++++++++++++++++++++++++++++--
 test/unit/split.test.ts | 35 +++++++++++++++++-
 3 files changed, 156 insertions(+), 5 deletions(-)

diff --git a/src/core/normalize.ts b/src/core/normalize.ts
index 46b20c4..7d202f0 100644
--- a/src/core/normalize.ts
+++ b/src/core/normalize.ts
@@ -40,6 +40,51 @@ const VARIANT_MAP: Record<string, string> = {
   "條": "条", "圓": "円",
 };
 
+type ScriptType = "kanji" | "hiragana" | "katakana" | "other";
+
+function scriptOf(ch: string): ScriptType {
+  if (/[\u3041-\u3096]/.test(ch)) return "hiragana";
+  if (/[\u30A1-\u30F6\u30FC]/.test(ch)) return "katakana";
+  if (/[\p{Script=Han}々〆ヶ]/u.test(ch)) return "kanji";
+  return "other";
+}
+
+/**
+ * Find the split position where a single kanji→kana boundary occurs.
+ * Returns the character index (not byte index) of the boundary,
+ * or undefined if no unique boundary exists.
+ *
+ * Examples:
+ *   "夏色まつり" → 2 (漢字→ひらがな at index 2)
+ *   "白銀ノエル" → 2 (漢字→カタカナ at index 2)
+ *   "もこ田めめめ" → undefined (2 transitions)
+ *   "田中太郎" → undefined (all kanji, no transition)
+ */
+export function findSingleKanjiToKanaBoundary(fullName: string): number | undefined {
+  const chars = [...fullName];
+  let transitionCount = 0;
+  let splitIndex: number | undefined;
+  let fromScript: ScriptType | undefined;
+  let toScript: ScriptType | undefined;
+
+  for (let i = 1; i < chars.length; i++) {
+    const prev = scriptOf(chars[i - 1]);
+    const next = scriptOf(chars[i]);
+    if (prev === next) continue;
+    if (prev === "other" || next === "other") return undefined;
+    transitionCount++;
+    if (transitionCount > 1) return undefined;
+    splitIndex = i;
+    fromScript = prev;
+    toScript = next;
+  }
+
+  if (transitionCount !== 1) return undefined;
+  if (fromScript !== "kanji") return undefined;
+  if (toScript !== "hiragana" && toScript !== "katakana") return undefined;
+  return splitIndex;
+}
+
 /**
  * Fold variant kanji to their canonical forms.
  */
diff --git a/src/core/splitter.ts b/src/core/splitter.ts
index a4bd19b..22404dc 100644
--- a/src/core/splitter.ts
+++ b/src/core/splitter.ts
@@ -6,12 +6,18 @@ import type {
   SeimeiResult,
   SplitOptions,
 } from "./types.js";
-import { isAllHiragana, isAllKatakana, isNonJapanese } from "./normalize.js";
+import { isAllHiragana, isAllKatakana, isNonJapanese, findSingleKanjiToKanaBoundary } from "./normalize.js";
 import { calcScore, lookupMatch } from "./scorer.js";
 
 const CONFIDENCE_THRESHOLD = 6.0;
 const CONFIDENCE_GAP = 1.0;
 
+// Boundary heuristic: rescue candidates at kanji→kana boundaries
+// when normal scoring falls below threshold
+const BOUNDARY_RESCUE_BONUS = 3.0;
+const BOUNDARY_RESCUE_MIN_GAP = 0.5;
+const BOUNDARY_RESCUE_CONFIDENCE = 0.8;
+
 let defaultLexicon: PackedLexicon | undefined;
 let defaultReading: ReadingData | undefined;
 
@@ -129,18 +135,85 @@ export function analyze(fullName: string, options?: SplitOptions): AnalyzeResult
   const confident =
     best.score >= CONFIDENCE_THRESHOLD && gap >= CONFIDENCE_GAP;
 
-  if (confident || options?.allowLowConfidence) {
+  // 1. Normal confidence: dictionary-based high score
+  if (confident) {
+    return {
+      best: { sei: best.sei, mei: best.mei },
+      confidence: 1.0,
+      candidates,
+    };
+  }
+
+  // 2. Boundary fallback: rescue at kanji→kana boundary
+  const fallbackConfidence = tryBoundaryFallback(trimmed, candidates);
+  if (fallbackConfidence !== undefined) {
+    // Find the boundary candidate (may differ from score-based best)
+    const boundaryIndex = findSingleKanjiToKanaBoundary(trimmed);
+    const boundaryCandidate = candidates.find(
+      (c) => [...c.sei].length === boundaryIndex
+    );
+    if (boundaryCandidate) {
+      return {
+        best: { sei: boundaryCandidate.sei, mei: boundaryCandidate.mei },
+        confidence: fallbackConfidence,
+        candidates,
+      };
+    }
+  }
+
+  // 3. Low confidence mode
+  if (options?.allowLowConfidence) {
     return {
       best: { sei: best.sei, mei: best.mei },
-      confidence: confident ? 1.0 : best.score / CONFIDENCE_THRESHOLD,
+      confidence: best.score / CONFIDENCE_THRESHOLD,
       candidates,
     };
   }
 
-  // Not confident enough: return unsplit
+  // 4. Not confident enough: return unsplit
   return {
     best: { sei: trimmed, mei: "" },
     confidence: 0,
     candidates,
   };
 }
+
+/**
+ * Try to rescue a split using kanji→kana script boundary heuristic.
+ * Only applies when:
+ * - There is exactly one script transition (kanji → hiragana/katakana)
+ * - The boundary candidate has dictionary evidence on the surname side
+ * - The rescue score meets the confidence threshold
+ */
+function tryBoundaryFallback(
+  fullName: string,
+  candidates: SeimeiCandidate[],
+): number | undefined {
+  const boundaryIndex = findSingleKanjiToKanaBoundary(fullName);
+  if (boundaryIndex === undefined) return undefined;
+
+  const boundaryCandidate = candidates.find(
+    (c) => [...c.sei].length === boundaryIndex
+  );
+  if (!boundaryCandidate) return undefined;
+
+  // Require dictionary evidence on surname side
+  if (
+    boundaryCandidate.seiMatch === "none" ||
+    boundaryCandidate.seiMatch === "reading"
+  ) {
+    return undefined;
+  }
+
+  const rescueScore = boundaryCandidate.score + BOUNDARY_RESCUE_BONUS;
+  if (rescueScore < CONFIDENCE_THRESHOLD) return undefined;
+
+  // Check gap against other candidates' rescue scores
+  const otherBest = candidates
+    .filter((c) => [...c.sei].length !== boundaryIndex)
+    .reduce((max, c) => Math.max(max, c.score), -Infinity);
+  const rescueGap = rescueScore - otherBest;
+  if (rescueGap < BOUNDARY_RESCUE_MIN_GAP) return undefined;
+
+  return BOUNDARY_RESCUE_CONFIDENCE;
+}
diff --git a/test/unit/split.test.ts b/test/unit/split.test.ts
index 67ea94b..817d7a6 100644
--- a/test/unit/split.test.ts
+++ b/test/unit/split.test.ts
@@ -4,7 +4,7 @@ import type { PackedLexicon } from "../../src/core/types";
 
 // Minimal test lexicon
 const testLexicon: PackedLexicon = {
-  sei: ["田中", "佐藤", "大瀬良", "林", "勅使河原", "小鳥遊", "西園寺", "齋藤"],
+  sei: ["田中", "佐藤", "大瀬良", "林", "勅使河原", "小鳥遊", "西園寺", "齋藤", "綾瀬", "白銀", "夏色"],
   mei: ["太郎", "花子", "大地", "健太", "公望", "翔", "一郎"],
   folded: {
     "斎藤": ["齋藤"],
@@ -79,6 +79,39 @@ describe("split", () => {
     });
   });
 
+  describe("境界ヒューリスティック", () => {
+    it("漢字姓 + ひらがな名を境界フォールバックで救済する", () => {
+      const result = analyze("綾瀬はるか");
+      expect(result.best).toEqual({ sei: "綾瀬", mei: "はるか" });
+      expect(result.confidence).toBe(0.8);
+    });
+
+    it("漢字姓 + カタカナ名を救済する", () => {
+      const result = analyze("白銀ノエル");
+      expect(result.best).toEqual({ sei: "白銀", mei: "ノエル" });
+      expect(result.confidence).toBe(0.8);
+    });
+
+    it("漢字姓(辞書ヒット) + ひらがな名を救済する", () => {
+      expect(split("夏色まつり")).toEqual({ sei: "夏色", mei: "まつり" });
+    });
+
+    it("姓側に辞書根拠がない場合は救済しない", () => {
+      expect(split("東京はなこ")).toEqual({ sei: "東京はなこ", mei: "" });
+    });
+
+    it("文字種遷移が2回以上ある場合は境界救済しない", () => {
+      // "夢野あき子" — 遷移2回（漢字→ひらがな→漢字）、辞書ヒットもなし
+      expect(split("夢野あき子")).toEqual({ sei: "夢野あき子", mei: "" });
+    });
+
+    it("通常の辞書高信頼ケースは従来どおり confidence 1.0", () => {
+      const result = analyze("田中太郎");
+      expect(result.best).toEqual({ sei: "田中", mei: "太郎" });
+      expect(result.confidence).toBe(1.0);
+    });
+  });
+
   describe("analyze", () => {
     it("候補リストとconfidenceを返す", () => {
       const result = analyze("田中太郎");

From b673e3a8d2ff55f7483f06be4e195bf4bd206767 Mon Sep 17 00:00:00 2001
From: tk1024 <tk1024@users.noreply.github.com>
Date: Sun, 22 Mar 2026 22:01:52 +0900
Subject: [PATCH 2/5] =?UTF-8?q?feat:=20OOV=E5=A7=93=E3=81=AE=E6=96=87?=
 =?UTF-8?q?=E5=AD=97=E7=A8=AE=E6=B7=B7=E5=9C=A8=E3=83=9A=E3=83=8A=E3=83=AB?=
 =?UTF-8?q?=E3=83=86=E3=82=A3=E3=82=92=E8=BF=BD=E5=8A=A0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

辞書未ヒットの姓にひらがな/カタカナが含まれる場合に減点する。
日本人の苗字はほぼ漢字のみで構成されるため、
「宝鐘マ」のような漢字+かな混在の姓は不自然と判定できる。

- 漢字+カタカナ1文字末尾: -3.0
- 漢字+ひらがな1文字末尾: -2.5
- 漢字+かな2文字以上末尾: -1.5
- 辞書ヒットする姓には適用しない
- BOUNDARY_AFTER_PENALTY を -1.2 → -1.8 に強化

VTuber名 [lowConf]: 3.3% → 93.3%（28/30正解）
既存精度への影響なし（MVP 94.7%、誤分割0%）

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/core/scorer.ts      | 70 +++++++++++++++++++++++++++++++++++
 src/core/splitter.ts    | 82 ++++++++++-------------------------------
 test/unit/split.test.ts | 41 ++++++++++++++-------
 3 files changed, 117 insertions(+), 76 deletions(-)

diff --git a/src/core/scorer.ts b/src/core/scorer.ts
index e0ca84f..4177607 100644
--- a/src/core/scorer.ts
+++ b/src/core/scorer.ts
@@ -30,6 +30,17 @@ const MEI_LENGTH_SCORE: Record<number, number> = {
 const PAIR_BONUS = 0.8;
 const BOTH_SINGLE_CHAR_PENALTY = -1.0;
 
+// Script boundary scoring
+const BOUNDARY_MATCH_BONUS = 1.2;
+const BOUNDARY_MATCH_WITH_DICT_BONUS = 0.8;
+const BOUNDARY_BEFORE_PENALTY = -3.0;
+const BOUNDARY_AFTER_PENALTY = -1.8;
+
+// Sei mixed-script penalty: OOV surname containing kana is unnatural
+const SEI_MIXED_SINGLE_HIRA_PENALTY = -2.5;
+const SEI_MIXED_SINGLE_KATA_PENALTY = -3.0;
+const SEI_MIXED_MULTI_KANA_PENALTY = -1.5;
+
 // Cache for Set-based lookups built from string[]
 const setCache = new WeakMap<PackedLexicon, { sei: Set<string>; mei: Set<string> }>();
 
@@ -45,6 +56,41 @@ function getSets(lexicon: PackedLexicon): { sei: Set<string>; mei: Set<string> }
   return cached;
 }
 
+const RE_KANJI = /[\p{Script=Han}々〆ヶ]/u;
+const RE_HIRAGANA = /[\u3041-\u3096]/;
+const RE_KATAKANA = /[\u30A1-\u30F6\u30FC]/;
+
+function scriptOf(ch: string): "K" | "H" | "T" | "O" {
+  if (RE_KANJI.test(ch)) return "K";
+  if (RE_HIRAGANA.test(ch)) return "H";
+  if (RE_KATAKANA.test(ch)) return "T";
+  return "O";
+}
+
+function scriptPattern(s: string): string {
+  return [...s].map(scriptOf).join("");
+}
+
+/**
+ * Penalty for OOV surnames that contain kana (e.g. 宝鐘マ, 星街すい).
+ * Real Japanese surnames are almost always pure kanji.
+ * Only applied when the surname has no dictionary hit.
+ */
+function seiMixedScriptPenalty(sei: string, seiMatch: MatchType): number {
+  if (seiMatch !== "none") return 0;
+
+  const p = scriptPattern(sei);
+  if (!/^K+[HT]+$/.test(p)) return 0;
+
+  const suffix = p.match(/[HT]+$/)![0];
+  if (suffix.length === 1) {
+    return suffix[0] === "T"
+      ? SEI_MIXED_SINGLE_KATA_PENALTY
+      : SEI_MIXED_SINGLE_HIRA_PENALTY;
+  }
+  return SEI_MIXED_MULTI_KANA_PENALTY;
+}
+
 /**
  * Look up a candidate string in the lexicon.
  * Returns the match type: surface > folded > reading > none.
@@ -87,12 +133,19 @@ export function lookupMatch(
 
 /**
  * Calculate the score for a split candidate.
+ *
+ * @param sei - the surname candidate string
+ * @param splitIndex - the character index where this candidate splits (i.e. sei length)
+ * @param boundaryIndex - the unique kanji→kana boundary position, or undefined if none
  */
 export function calcScore(
+  sei: string,
   seiMatch: MatchType,
   meiMatch: MatchType,
   seiLen: number,
   meiLen: number,
+  splitIndex: number,
+  boundaryIndex: number | undefined,
 ): number {
   let score = 0;
 
@@ -124,5 +177,22 @@ export function calcScore(
     score = -Infinity;
   }
 
+  // Script boundary scoring
+  if (boundaryIndex !== undefined) {
+    if (splitIndex === boundaryIndex) {
+      score += BOUNDARY_MATCH_BONUS;
+      if (seiMatch === "surface" || seiMatch === "folded") {
+        score += BOUNDARY_MATCH_WITH_DICT_BONUS;
+      }
+    } else if (splitIndex < boundaryIndex) {
+      score += BOUNDARY_BEFORE_PENALTY;
+    } else {
+      score += BOUNDARY_AFTER_PENALTY;
+    }
+  }
+
+  // OOV surname mixed-script penalty
+  score += seiMixedScriptPenalty(sei, seiMatch);
+
   return score;
 }
diff --git a/src/core/splitter.ts b/src/core/splitter.ts
index 22404dc..9b1ea5f 100644
--- a/src/core/splitter.ts
+++ b/src/core/splitter.ts
@@ -12,11 +12,10 @@ import { calcScore, lookupMatch } from "./scorer.js";
 const CONFIDENCE_THRESHOLD = 6.0;
 const CONFIDENCE_GAP = 1.0;
 
-// Boundary heuristic: rescue candidates at kanji→kana boundaries
-// when normal scoring falls below threshold
-const BOUNDARY_RESCUE_BONUS = 3.0;
-const BOUNDARY_RESCUE_MIN_GAP = 0.5;
-const BOUNDARY_RESCUE_CONFIDENCE = 0.8;
+// Boundary confidence: when the best candidate aligns with a script boundary
+// and has dictionary evidence on sei side, grant confidence 0.8
+const BOUNDARY_CONFIDENCE = 0.8;
+const BOUNDARY_CONFIDENCE_GAP = 0.5;
 
 let defaultLexicon: PackedLexicon | undefined;
 let defaultReading: ReadingData | undefined;
@@ -79,7 +78,6 @@ export function analyze(fullName: string, options?: SplitOptions): AnalyzeResult
 
   const lexicon = options?.lexicon ?? defaultLexicon;
   if (!lexicon) {
-    // No lexicon loaded: return unsplit
     return {
       best: { sei: trimmed, mei: "" },
       confidence: 0,
@@ -100,6 +98,7 @@ export function analyze(fullName: string, options?: SplitOptions): AnalyzeResult
 
   const isKana = isAllHiragana(trimmed) || isAllKatakana(trimmed);
   const maxSplit = Math.min(lexicon.maxSeiLen, n - 1);
+  const boundaryIndex = findSingleKanjiToKanaBoundary(trimmed);
   const candidates: SeimeiCandidate[] = [];
 
   for (let i = 1; i <= maxSplit; i++) {
@@ -113,7 +112,7 @@ export function analyze(fullName: string, options?: SplitOptions): AnalyzeResult
     const readingData = options?.readingData ?? defaultReading;
     const seiMatch = lookupMatch(sei, "sei", lexicon, isKana, readingData);
     const meiMatch = lookupMatch(mei, "mei", lexicon, isKana, readingData);
-    const score = calcScore(seiMatch, meiMatch, seiLen, meiLen);
+    const score = calcScore(sei, seiMatch, meiMatch, seiLen, meiLen, i, boundaryIndex);
 
     candidates.push({ sei, mei, score, seiMatch, meiMatch });
   }
@@ -144,21 +143,20 @@ export function analyze(fullName: string, options?: SplitOptions): AnalyzeResult
     };
   }
 
-  // 2. Boundary fallback: rescue at kanji→kana boundary
-  const fallbackConfidence = tryBoundaryFallback(trimmed, candidates);
-  if (fallbackConfidence !== undefined) {
-    // Find the boundary candidate (may differ from score-based best)
-    const boundaryIndex = findSingleKanjiToKanaBoundary(trimmed);
-    const boundaryCandidate = candidates.find(
-      (c) => [...c.sei].length === boundaryIndex
-    );
-    if (boundaryCandidate) {
-      return {
-        best: { sei: boundaryCandidate.sei, mei: boundaryCandidate.mei },
-        confidence: fallbackConfidence,
-        candidates,
-      };
-    }
+  // 2. Boundary confidence: best candidate aligns with script boundary
+  //    and has dictionary evidence on sei side
+  if (
+    boundaryIndex !== undefined &&
+    [...best.sei].length === boundaryIndex &&
+    (best.seiMatch === "surface" || best.seiMatch === "folded") &&
+    best.score >= CONFIDENCE_THRESHOLD &&
+    gap >= BOUNDARY_CONFIDENCE_GAP
+  ) {
+    return {
+      best: { sei: best.sei, mei: best.mei },
+      confidence: BOUNDARY_CONFIDENCE,
+      candidates,
+    };
   }
 
   // 3. Low confidence mode
@@ -177,43 +175,3 @@ export function analyze(fullName: string, options?: SplitOptions): AnalyzeResult
     candidates,
   };
 }
-
-/**
- * Try to rescue a split using kanji→kana script boundary heuristic.
- * Only applies when:
- * - There is exactly one script transition (kanji → hiragana/katakana)
- * - The boundary candidate has dictionary evidence on the surname side
- * - The rescue score meets the confidence threshold
- */
-function tryBoundaryFallback(
-  fullName: string,
-  candidates: SeimeiCandidate[],
-): number | undefined {
-  const boundaryIndex = findSingleKanjiToKanaBoundary(fullName);
-  if (boundaryIndex === undefined) return undefined;
-
-  const boundaryCandidate = candidates.find(
-    (c) => [...c.sei].length === boundaryIndex
-  );
-  if (!boundaryCandidate) return undefined;
-
-  // Require dictionary evidence on surname side
-  if (
-    boundaryCandidate.seiMatch === "none" ||
-    boundaryCandidate.seiMatch === "reading"
-  ) {
-    return undefined;
-  }
-
-  const rescueScore = boundaryCandidate.score + BOUNDARY_RESCUE_BONUS;
-  if (rescueScore < CONFIDENCE_THRESHOLD) return undefined;
-
-  // Check gap against other candidates' rescue scores
-  const otherBest = candidates
-    .filter((c) => [...c.sei].length !== boundaryIndex)
-    .reduce((max, c) => Math.max(max, c.score), -Infinity);
-  const rescueGap = rescueScore - otherBest;
-  if (rescueGap < BOUNDARY_RESCUE_MIN_GAP) return undefined;
-
-  return BOUNDARY_RESCUE_CONFIDENCE;
-}
diff --git a/test/unit/split.test.ts b/test/unit/split.test.ts
index 817d7a6..5e2c69f 100644
--- a/test/unit/split.test.ts
+++ b/test/unit/split.test.ts
@@ -3,8 +3,10 @@ import { split, analyze, setLexicon } from "../../src/core/splitter";
 import type { PackedLexicon } from "../../src/core/types";
 
 // Minimal test lexicon
+// Note: 夏色 and 白銀 are NOT included as surnames — they must be resolved
+// by script boundary heuristic, not dictionary lookup
 const testLexicon: PackedLexicon = {
-  sei: ["田中", "佐藤", "大瀬良", "林", "勅使河原", "小鳥遊", "西園寺", "齋藤", "綾瀬", "白銀", "夏色"],
+  sei: ["田中", "佐藤", "大瀬良", "林", "勅使河原", "小鳥遊", "西園寺", "齋藤", "綾瀬", "夏", "周防"],
   mei: ["太郎", "花子", "大地", "健太", "公望", "翔", "一郎"],
   folded: {
     "斎藤": ["齋藤"],
@@ -79,37 +81,48 @@ describe("split", () => {
     });
   });
 
-  describe("境界ヒューリスティック", () => {
-    it("漢字姓 + ひらがな名を境界フォールバックで救済する", () => {
+  describe("文字種境界スコアリング", () => {
+    it("辞書ヒット姓 + かな名を境界で分離する（綾瀬はるか）", () => {
       const result = analyze("綾瀬はるか");
       expect(result.best).toEqual({ sei: "綾瀬", mei: "はるか" });
-      expect(result.confidence).toBe(0.8);
+      expect(result.confidence).toBeGreaterThanOrEqual(0.8);
     });
 
-    it("漢字姓 + カタカナ名を救済する", () => {
-      const result = analyze("白銀ノエル");
-      expect(result.best).toEqual({ sei: "白銀", mei: "ノエル" });
-      expect(result.confidence).toBe(0.8);
+    it("辞書ヒット姓 + カタカナ名を境界で分離する（周防パトラ）", () => {
+      const result = analyze("周防パトラ");
+      expect(result.best).toEqual({ sei: "周防", mei: "パトラ" });
+      expect(result.confidence).toBeGreaterThanOrEqual(0.8);
+    });
+
+    it("allowLowConfidence: 辞書未登録でも境界位置が最高スコアになる（夏色まつり）", () => {
+      // 夏色 is NOT in the dictionary, but boundary scoring should
+      // make 夏色/まつり rank higher than 夏/色まつり
+      const result = analyze("夏色まつり", { allowLowConfidence: true });
+      expect(result.best).toEqual({ sei: "夏色", mei: "まつり" });
     });
 
-    it("漢字姓(辞書ヒット) + ひらがな名を救済する", () => {
-      expect(split("夏色まつり")).toEqual({ sei: "夏色", mei: "まつり" });
+    it("allowLowConfidence: 漢字→カタカナ境界が勝つ（白銀ノエル）", () => {
+      const result = analyze("白銀ノエル", { allowLowConfidence: true });
+      expect(result.best).toEqual({ sei: "白銀", mei: "ノエル" });
     });
 
-    it("姓側に辞書根拠がない場合は救済しない", () => {
+    it("辞書根拠がない場合は通常モードで unsplit", () => {
       expect(split("東京はなこ")).toEqual({ sei: "東京はなこ", mei: "" });
     });
 
-    it("文字種遷移が2回以上ある場合は境界救済しない", () => {
-      // "夢野あき子" — 遷移2回（漢字→ひらがな→漢字）、辞書ヒットもなし
+    it("文字種遷移が2回以上ある場合は境界ボーナスなし", () => {
       expect(split("夢野あき子")).toEqual({ sei: "夢野あき子", mei: "" });
     });
 
-    it("通常の辞書高信頼ケースは従来どおり confidence 1.0", () => {
+    it("全漢字名は境界スコアの影響を受けない", () => {
       const result = analyze("田中太郎");
       expect(result.best).toEqual({ sei: "田中", mei: "太郎" });
       expect(result.confidence).toBe(1.0);
     });
+
+    it("1文字姓の正当な辞書ヒットは境界がなければ維持される（林一郎）", () => {
+      expect(split("林一郎")).toEqual({ sei: "林", mei: "一郎" });
+    });
   });
 
   describe("analyze", () => {

From 60bfa852d2a5690ff1fde88fd4ef6030b3df54bb Mon Sep 17 00:00:00 2001
From: tk1024 <tk1024@users.noreply.github.com>
Date: Sun, 22 Mar 2026 22:06:24 +0900
Subject: [PATCH 3/5] =?UTF-8?q?feat:=20=E3=81=8B=E3=81=AA=E2=86=92?=
 =?UTF-8?q?=E6=BC=A2=E5=AD=97=E5=A2=83=E7=95=8C=E3=82=B9=E3=82=B3=E3=82=A2?=
 =?UTF-8?q?=E3=83=AA=E3=83=B3=E3=82=B0=E3=82=92=E8=BF=BD=E5=8A=A0=E3=80=81?=
 =?UTF-8?q?mei=E5=81=B4=E3=81=AE=E6=B7=B7=E5=9C=A8=E3=83=9A=E3=83=8A?=
 =?UTF-8?q?=E3=83=AB=E3=83=86=E3=82=A3=E3=81=AF=E9=99=A4=E5=A4=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- findSingleScriptBoundary() で漢字→かな/かな→漢字の両方向を検出
- かな→漢字方向でも境界ボーナス/ペナルティが効くように
- mei 側の混在ペナルティは除外（よね子、ルミ子等の自然な名前を保護）
- かな姓+漢字名 [lowConf]: 56.3% → 87.5%
- 名前内部にかな混在 [lowConf]: 100% 維持（リグレッションなし）

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/core/normalize.ts | 44 +++++++++++++++++++++++++++++++++----------
 src/core/scorer.ts    | 24 ++++++++++++++++++++++-
 src/core/splitter.ts  | 39 +++++++++++++++++++++-----------------
 3 files changed, 79 insertions(+), 28 deletions(-)

diff --git a/src/core/normalize.ts b/src/core/normalize.ts
index 7d202f0..601cdc4 100644
--- a/src/core/normalize.ts
+++ b/src/core/normalize.ts
@@ -50,17 +50,23 @@ function scriptOf(ch: string): ScriptType {
 }
 
 /**
- * Find the split position where a single kanji→kana boundary occurs.
- * Returns the character index (not byte index) of the boundary,
- * or undefined if no unique boundary exists.
+ * Find the split position where a single script boundary occurs
+ * between kanji and kana (in either direction).
+ * Returns the character index and direction, or undefined if no unique boundary exists.
  *
  * Examples:
- *   "夏色まつり" → 2 (漢字→ひらがな at index 2)
- *   "白銀ノエル" → 2 (漢字→カタカナ at index 2)
+ *   "夏色まつり" → { index: 2, direction: "kanji-to-kana" }
+ *   "白銀ノエル" → { index: 2, direction: "kanji-to-kana" }
+ *   "デーモン閣下" → { index: 4, direction: "kana-to-kanji" }
  *   "もこ田めめめ" → undefined (2 transitions)
  *   "田中太郎" → undefined (all kanji, no transition)
  */
-export function findSingleKanjiToKanaBoundary(fullName: string): number | undefined {
+export interface ScriptBoundary {
+  index: number;
+  direction: "kanji-to-kana" | "kana-to-kanji";
+}
+
+export function findSingleScriptBoundary(fullName: string): ScriptBoundary | undefined {
   const chars = [...fullName];
   let transitionCount = 0;
   let splitIndex: number | undefined;
@@ -79,10 +85,28 @@ export function findSingleKanjiToKanaBoundary(fullName: string): number | undefi
     toScript = next;
   }
 
-  if (transitionCount !== 1) return undefined;
-  if (fromScript !== "kanji") return undefined;
-  if (toScript !== "hiragana" && toScript !== "katakana") return undefined;
-  return splitIndex;
+  if (transitionCount !== 1 || splitIndex === undefined) return undefined;
+
+  const fromIsKanji = fromScript === "kanji";
+  const toIsKanji = toScript === "kanji";
+  const fromIsKana = fromScript === "hiragana" || fromScript === "katakana";
+  const toIsKana = toScript === "hiragana" || toScript === "katakana";
+
+  if (fromIsKanji && toIsKana) {
+    return { index: splitIndex, direction: "kanji-to-kana" };
+  }
+  if (fromIsKana && toIsKanji) {
+    return { index: splitIndex, direction: "kana-to-kanji" };
+  }
+
+  return undefined;
+}
+
+/** @deprecated Use findSingleScriptBoundary instead */
+export function findSingleKanjiToKanaBoundary(fullName: string): number | undefined {
+  const result = findSingleScriptBoundary(fullName);
+  if (result?.direction === "kanji-to-kana") return result.index;
+  return undefined;
 }
 
 /**
diff --git a/src/core/scorer.ts b/src/core/scorer.ts
index 4177607..8225c6c 100644
--- a/src/core/scorer.ts
+++ b/src/core/scorer.ts
@@ -91,6 +91,26 @@ function seiMixedScriptPenalty(sei: string, seiMatch: MatchType): number {
   return SEI_MIXED_MULTI_KANA_PENALTY;
 }
 
+/**
+ * Penalty for OOV given names that start with kana followed by kanji (e.g. モン閣下, イク眞木).
+ * When a kana→kanji boundary exists, the mei side should be pure kanji.
+ * Only applied when the given name has no dictionary hit.
+ */
+function meiMixedScriptPenalty(mei: string, meiMatch: MatchType): number {
+  if (meiMatch !== "none") return 0;
+
+  const p = scriptPattern(mei);
+  if (!/^[HT]+K+$/.test(p)) return 0;
+
+  const prefix = p.match(/^[HT]+/)![0];
+  if (prefix.length === 1) {
+    return prefix[0] === "T"
+      ? SEI_MIXED_SINGLE_KATA_PENALTY
+      : SEI_MIXED_SINGLE_HIRA_PENALTY;
+  }
+  return SEI_MIXED_MULTI_KANA_PENALTY;
+}
+
 /**
  * Look up a candidate string in the lexicon.
  * Returns the match type: surface > folded > reading > none.
@@ -140,6 +160,7 @@ export function lookupMatch(
  */
 export function calcScore(
   sei: string,
+  mei: string,
   seiMatch: MatchType,
   meiMatch: MatchType,
   seiLen: number,
@@ -191,7 +212,8 @@ export function calcScore(
     }
   }
 
-  // OOV surname mixed-script penalty
+  // OOV surname mixed-script penalty (mei side is not penalized —
+  // names like よね子, ルミ子, 美つ子 naturally mix scripts)
   score += seiMixedScriptPenalty(sei, seiMatch);
 
   return score;
diff --git a/src/core/splitter.ts b/src/core/splitter.ts
index 9b1ea5f..88869a0 100644
--- a/src/core/splitter.ts
+++ b/src/core/splitter.ts
@@ -6,14 +6,14 @@ import type {
   SeimeiResult,
   SplitOptions,
 } from "./types.js";
-import { isAllHiragana, isAllKatakana, isNonJapanese, findSingleKanjiToKanaBoundary } from "./normalize.js";
+import { isAllHiragana, isAllKatakana, isNonJapanese, findSingleScriptBoundary } from "./normalize.js";
 import { calcScore, lookupMatch } from "./scorer.js";
 
 const CONFIDENCE_THRESHOLD = 6.0;
 const CONFIDENCE_GAP = 1.0;
 
 // Boundary confidence: when the best candidate aligns with a script boundary
-// and has dictionary evidence on sei side, grant confidence 0.8
+// and has dictionary evidence, grant confidence 0.8
 const BOUNDARY_CONFIDENCE = 0.8;
 const BOUNDARY_CONFIDENCE_GAP = 0.5;
 
@@ -98,7 +98,8 @@ export function analyze(fullName: string, options?: SplitOptions): AnalyzeResult
 
   const isKana = isAllHiragana(trimmed) || isAllKatakana(trimmed);
   const maxSplit = Math.min(lexicon.maxSeiLen, n - 1);
-  const boundaryIndex = findSingleKanjiToKanaBoundary(trimmed);
+  const boundary = findSingleScriptBoundary(trimmed);
+  const boundaryIndex = boundary?.index;
   const candidates: SeimeiCandidate[] = [];
 
   for (let i = 1; i <= maxSplit; i++) {
@@ -112,7 +113,7 @@ export function analyze(fullName: string, options?: SplitOptions): AnalyzeResult
     const readingData = options?.readingData ?? defaultReading;
     const seiMatch = lookupMatch(sei, "sei", lexicon, isKana, readingData);
     const meiMatch = lookupMatch(mei, "mei", lexicon, isKana, readingData);
-    const score = calcScore(sei, seiMatch, meiMatch, seiLen, meiLen, i, boundaryIndex);
+    const score = calcScore(sei, mei, seiMatch, meiMatch, seiLen, meiLen, i, boundaryIndex);
 
     candidates.push({ sei, mei, score, seiMatch, meiMatch });
   }
@@ -144,19 +145,23 @@ export function analyze(fullName: string, options?: SplitOptions): AnalyzeResult
   }
 
   // 2. Boundary confidence: best candidate aligns with script boundary
-  //    and has dictionary evidence on sei side
-  if (
-    boundaryIndex !== undefined &&
-    [...best.sei].length === boundaryIndex &&
-    (best.seiMatch === "surface" || best.seiMatch === "folded") &&
-    best.score >= CONFIDENCE_THRESHOLD &&
-    gap >= BOUNDARY_CONFIDENCE_GAP
-  ) {
-    return {
-      best: { sei: best.sei, mei: best.mei },
-      confidence: BOUNDARY_CONFIDENCE,
-      candidates,
-    };
+  //    and has dictionary evidence on the appropriate side
+  if (boundaryIndex !== undefined && [...best.sei].length === boundaryIndex) {
+    const hasDictEvidence = boundary?.direction === "kanji-to-kana"
+      ? (best.seiMatch === "surface" || best.seiMatch === "folded")
+      : (best.meiMatch === "surface" || best.meiMatch === "folded");
+
+    if (
+      hasDictEvidence &&
+      best.score >= CONFIDENCE_THRESHOLD &&
+      gap >= BOUNDARY_CONFIDENCE_GAP
+    ) {
+      return {
+        best: { sei: best.sei, mei: best.mei },
+        confidence: BOUNDARY_CONFIDENCE,
+        candidates,
+      };
+    }
   }
 
   // 3. Low confidence mode

From 4dd4aa2ef06456edb295c5c1e66ffd8b56159142 Mon Sep 17 00:00:00 2001
From: tk1024 <tk1024@users.noreply.github.com>
Date: Sun, 22 Mar 2026 22:12:35 +0900
Subject: [PATCH 4/5] =?UTF-8?q?fix:=20SEI=5FHIT=5FBONUS=20=E3=82=92=200.8?=
 =?UTF-8?q?=20=E2=86=92=200.5=20=E3=81=AB=E8=AA=BF=E6=95=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

0.8 だと1文字姓の辞書ヒットが強すぎて、上白石萌音、柳樂優弥等で
正しい複数文字姓に勝ってしまうリグレッションが発生していた。
0.5 で松井珠理奈等の改善を維持しつつリグレッションを解消。

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/core/scorer.ts | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/core/scorer.ts b/src/core/scorer.ts
index 8225c6c..cf9383e 100644
--- a/src/core/scorer.ts
+++ b/src/core/scorer.ts
@@ -30,6 +30,10 @@ const MEI_LENGTH_SCORE: Record<number, number> = {
 const PAIR_BONUS = 0.8;
 const BOTH_SINGLE_CHAR_PENALTY = -1.0;
 
+// Surname hit is stronger evidence than given name hit,
+// because surnames are a finite known set while given names are creative
+const SEI_HIT_BONUS = 0.5;
+
 // Script boundary scoring
 const BOUNDARY_MATCH_BONUS = 1.2;
 const BOUNDARY_MATCH_WITH_DICT_BONUS = 0.8;
@@ -174,6 +178,12 @@ export function calcScore(
   score += MATCH_SCORE[seiMatch];
   score += MATCH_SCORE[meiMatch];
 
+  // Surname hit bonus: surnames are a known finite set,
+  // so a dictionary hit on sei is stronger evidence than on mei
+  if (seiMatch === "surface" || seiMatch === "folded") {
+    score += SEI_HIT_BONUS;
+  }
+
   // Length scores (secondary signal)
   score += SEI_LENGTH_SCORE[Math.min(seiLen, 6)] ?? -0.5;
   score += MEI_LENGTH_SCORE[Math.min(meiLen, 6)] ?? -0.5;

From a02045385f8e366f7f32dd9cbb58981b97337895 Mon Sep 17 00:00:00 2001
From: tk1024 <tk1024@users.noreply.github.com>
Date: Sun, 22 Mar 2026 22:23:29 +0900
Subject: [PATCH 5/5] =?UTF-8?q?feat:=20=E3=82=AB=E3=82=BF=E3=82=AB?=
 =?UTF-8?q?=E3=83=8A=E5=A7=93=E3=81=AE=E4=BE=8B=E5=A4=96=E3=83=95=E3=83=AD?=
 =?UTF-8?q?=E3=83=BC=20+=20SEI=5FHIT=5FBONUS=E8=AA=BF=E6=95=B4=20+=20?=
 =?UTF-8?q?=E3=83=A6=E3=83=8B=E3=83=83=E3=83=88=E3=83=86=E3=82=B9=E3=83=88?=
 =?UTF-8?q?=E6=8B=A1=E5=85=85?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- カタカナ姓の例外フロー: 前半が全カタカナの場合、後半を姓辞書で照合
  して一致すれば芸名パターンとして採用（confidence 0.8）
  例: ジャガー/横田、ダン/池田、マイク/眞木
- SEI_HIT_BONUS: 0.8 → 0.5 に調整（1文字姓リグレッション防止）
- ユニットテストを大幅拡充:
  - findSingleScriptBoundary の関数テスト
  - OOV姓混在ペナルティのテスト
  - 姓ヒットボーナスのテスト
  - カタカナ姓例外フローのテスト
  84テスト全パス

かな姓+漢字名 [lowConf]: 81.3% → 100%
MVP [lowConf]: 99.5% (1 wrong: 池井戸潤)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/core/splitter.ts    | 24 ++++++++++-
 test/unit/split.test.ts | 95 ++++++++++++++++++++++++++++++++++++-----
 2 files changed, 107 insertions(+), 12 deletions(-)

diff --git a/src/core/splitter.ts b/src/core/splitter.ts
index 88869a0..aa615bd 100644
--- a/src/core/splitter.ts
+++ b/src/core/splitter.ts
@@ -164,7 +164,29 @@ export function analyze(fullName: string, options?: SplitOptions): AnalyzeResult
     }
   }
 
-  // 3. Low confidence mode
+  // 3. Katakana-sei exception: when the best candidate's sei is all katakana
+  //    (e.g. ジャガー/横田), real Japanese surnames are never pure katakana.
+  //    Re-score the boundary candidate by looking up mei in sei dict instead.
+  if (boundary?.direction === "kana-to-kanji" && boundaryIndex !== undefined) {
+    const boundaryCandidate = candidates.find(
+      (c) => [...c.sei].length === boundaryIndex && isAllKatakana(c.sei)
+    );
+    if (boundaryCandidate) {
+      const meiAsSei = lookupMatch(
+        boundaryCandidate.mei, "sei", lexicon, false,
+        options?.readingData ?? defaultReading,
+      );
+      if (meiAsSei === "surface" || meiAsSei === "folded") {
+        return {
+          best: { sei: boundaryCandidate.sei, mei: boundaryCandidate.mei },
+          confidence: BOUNDARY_CONFIDENCE,
+          candidates,
+        };
+      }
+    }
+  }
+
+  // 4. Low confidence mode
   if (options?.allowLowConfidence) {
     return {
       best: { sei: best.sei, mei: best.mei },
diff --git a/test/unit/split.test.ts b/test/unit/split.test.ts
index 5e2c69f..5fa1daa 100644
--- a/test/unit/split.test.ts
+++ b/test/unit/split.test.ts
@@ -1,18 +1,16 @@
 import { describe, it, expect, beforeAll } from "vitest";
 import { split, analyze, setLexicon } from "../../src/core/splitter";
+import { findSingleScriptBoundary } from "../../src/core/normalize";
 import type { PackedLexicon } from "../../src/core/types";
 
-// Minimal test lexicon
-// Note: 夏色 and 白銀 are NOT included as surnames — they must be resolved
-// by script boundary heuristic, not dictionary lookup
 const testLexicon: PackedLexicon = {
-  sei: ["田中", "佐藤", "大瀬良", "林", "勅使河原", "小鳥遊", "西園寺", "齋藤", "綾瀬", "夏", "周防"],
-  mei: ["太郎", "花子", "大地", "健太", "公望", "翔", "一郎"],
+  sei: ["田中", "佐藤", "大瀬良", "林", "勅使河原", "小鳥遊", "西園寺", "齋藤", "綾瀬", "夏", "周防", "横田", "池田", "秋山", "松村", "高峰"],
+  mei: ["太郎", "花子", "大地", "健太", "公望", "翔", "一郎", "リン", "田"],
   folded: {
     "斎藤": ["齋藤"],
   },
   maxSeiLen: 4,
-  maxMeiLen: 3,
+  maxMeiLen: 5,
 };
 
 describe("split", () => {
@@ -81,6 +79,36 @@ describe("split", () => {
     });
   });
 
+  describe("findSingleScriptBoundary", () => {
+    it("漢字→ひらがな境界を検出する", () => {
+      expect(findSingleScriptBoundary("夏色まつり")).toEqual({ index: 2, direction: "kanji-to-kana" });
+    });
+
+    it("漢字→カタカナ境界を検出する", () => {
+      expect(findSingleScriptBoundary("白銀ノエル")).toEqual({ index: 2, direction: "kanji-to-kana" });
+    });
+
+    it("カタカナ→漢字境界を検出する", () => {
+      expect(findSingleScriptBoundary("ジャガー横田")).toEqual({ index: 4, direction: "kana-to-kanji" });
+    });
+
+    it("ひらがな→漢字境界を検出する", () => {
+      expect(findSingleScriptBoundary("かたせ梨乃")).toEqual({ index: 3, direction: "kana-to-kanji" });
+    });
+
+    it("全漢字は undefined", () => {
+      expect(findSingleScriptBoundary("田中太郎")).toBeUndefined();
+    });
+
+    it("遷移2回は undefined", () => {
+      expect(findSingleScriptBoundary("もこ田めめめ")).toBeUndefined();
+    });
+
+    it("全ひらがなは undefined", () => {
+      expect(findSingleScriptBoundary("たなかたろう")).toBeUndefined();
+    });
+  });
+
   describe("文字種境界スコアリング", () => {
     it("辞書ヒット姓 + かな名を境界で分離する（綾瀬はるか）", () => {
       const result = analyze("綾瀬はるか");
@@ -94,14 +122,12 @@ describe("split", () => {
       expect(result.confidence).toBeGreaterThanOrEqual(0.8);
     });
 
-    it("allowLowConfidence: 辞書未登録でも境界位置が最高スコアになる（夏色まつり）", () => {
-      // 夏色 is NOT in the dictionary, but boundary scoring should
-      // make 夏色/まつり rank higher than 夏/色まつり
+    it("allowLowConfidence: 辞書未登録でも境界位置が最高スコアになる", () => {
       const result = analyze("夏色まつり", { allowLowConfidence: true });
       expect(result.best).toEqual({ sei: "夏色", mei: "まつり" });
     });
 
-    it("allowLowConfidence: 漢字→カタカナ境界が勝つ（白銀ノエル）", () => {
+    it("allowLowConfidence: 漢字→カタカナ境界が勝つ", () => {
       const result = analyze("白銀ノエル", { allowLowConfidence: true });
       expect(result.best).toEqual({ sei: "白銀", mei: "ノエル" });
     });
@@ -120,11 +146,58 @@ describe("split", () => {
       expect(result.confidence).toBe(1.0);
     });
 
-    it("1文字姓の正当な辞書ヒットは境界がなければ維持される（林一郎）", () => {
+    it("1文字姓の正当な辞書ヒットは境界がなければ維持される", () => {
       expect(split("林一郎")).toEqual({ sei: "林", mei: "一郎" });
     });
   });
 
+  describe("OOV姓の混在ペナルティ", () => {
+    it("漢字+カタカナ1文字の姓は大きく減点される", () => {
+      // 宝鐘マ/リン より 宝鐘/マリン が勝つべき
+      const result = analyze("宝鐘マリン", { allowLowConfidence: true });
+      expect(result.best).toEqual({ sei: "宝鐘", mei: "マリン" });
+    });
+
+    it("漢字+ひらがなの姓も減点される", () => {
+      const result = analyze("星街すいせい", { allowLowConfidence: true });
+      expect(result.best).toEqual({ sei: "星街", mei: "すいせい" });
+    });
+
+    it("辞書ヒットする姓には混在ペナルティが適用されない", () => {
+      // 綾瀬 is in dict — no penalty
+      const result = analyze("綾瀬はるか");
+      expect(result.best).toEqual({ sei: "綾瀬", mei: "はるか" });
+    });
+  });
+
+  describe("姓ヒットボーナス", () => {
+    it("姓辞書ヒットは名辞書ヒットより優先される", () => {
+      // 松村/沙友理 (sei=surface) vs 松村沙/友理 (mei=surface)
+      const result = analyze("松村沙友理", { allowLowConfidence: true });
+      expect(result.best.sei).toBe("松村");
+    });
+  });
+
+  describe("カタカナ姓の例外フロー", () => {
+    it("全カタカナ姓 + 漢字名は後半を姓辞書で照合する", () => {
+      // ジャガー/横田: 横田が姓辞書にヒット → 例外フローで採用
+      const result = analyze("ジャガー横田");
+      expect(result.best).toEqual({ sei: "ジャガー", mei: "横田" });
+      expect(result.confidence).toBe(0.8);
+    });
+
+    it("ダン/池田も例外フローで分離する", () => {
+      const result = analyze("ダン池田");
+      expect(result.best).toEqual({ sei: "ダン", mei: "池田" });
+      expect(result.confidence).toBe(0.8);
+    });
+
+    it("漢字姓+漢字名には例外フローが適用されない", () => {
+      const result = analyze("田中太郎");
+      expect(result.confidence).toBe(1.0);
+    });
+  });
+
   describe("analyze", () => {
     it("候補リストとconfidenceを返す", () => {
       const result = analyze("田中太郎");