Hugo0 · Hugo0 · Mar 14, 2026 · Mar 14, 2026 · Mar 14, 2026 · Mar 14, 2026
diff --git a/.gitignore b/.gitignore
@@ -41,3 +41,4 @@ webapp/static/word-history/
 
 # Generated share images (rebuilt on deploy)
 webapp/static/images/share/
+scripts/.freq_data
diff --git a/docs/WORD_DATA_ARCHITECTURE.md b/docs/WORD_DATA_ARCHITECTURE.md
diff --git a/frontend/src/game.ts b/frontend/src/game.ts
@@ -9,6 +9,7 @@ import { haptic, setHapticsEnabled } from './haptics';
 import { sound, setSoundEnabled } from './sounds';
 import { buildNormalizeMap, buildNormalizedWordMap, normalizeWord } from './diacritics';
 import { buildFinalFormReverseMap, toFinalForm, toRegularForm } from './positional';
+import { splitWord } from './graphemes';
 import analytics from './analytics';
 import { identifyUser, updateUserProperties } from './posthog';
 import { calculateCommunityPercentile } from './stats';
@@ -623,10 +624,14 @@ export const createGameApp = () => {
                     return fullNormalize(c1) === fullNormalize(c2);
                 };
 
+                // Split target word into characters (grapheme clusters for Hindi, codepoints otherwise)
+                const graphemeMode = this.config?.grapheme_mode === 'true';
+                const targetChars = splitWord(targetWord, graphemeMode);
+
                 // Count characters in target word using FULLY NORMALIZED forms
                 // This ensures "ä" and "a" are counted together, and "כ" and "ך" are counted together
                 const charCounts: Record<string, number> = {};
-                for (const char of targetWord) {
+                for (const char of targetChars) {
                     const normalizedChar = fullNormalize(char);
                     charCounts[normalizedChar] = (charCounts[normalizedChar] || 0) + 1;
                 }
@@ -641,7 +646,7 @@ export const createGameApp = () => {
                 // First pass: mark correct positions (using normalized comparison)
                 for (let i = 0; i < row.length; i++) {
                     const guessChar = row[i];
-                    const targetChar = targetWord[i];
+                    const targetChar = targetChars[i];
                     if (guessChar && targetChar && fullCharsMatch(guessChar, targetChar)) {
                         // Use splice for Vue 3 reactivity
                         classes.splice(i, 1, `correct ${baseClass}`);
@@ -666,9 +671,7 @@ export const createGameApp = () => {
                     const count = charCounts[normalizedGuess];
 
                     // Check if this normalized character exists in target (also normalized)
-                    const targetHasChar = [...targetWord].some((tc) =>
-                        fullCharsMatch(guessChar, tc)
-                    );
+                    const targetHasChar = targetChars.some((tc) => fullCharsMatch(guessChar, tc));
 
                     if (targetHasChar && count !== undefined && count > 0) {
                         // Use splice for Vue 3 reactivity
@@ -828,8 +831,10 @@ export const createGameApp = () => {
                         // Update tiles to show canonical form (with diacritics)
                         // This displays the correct accented letters after submission
                         if (row && canonicalWord !== typedWord) {
-                            for (let i = 0; i < canonicalWord.length; i++) {
-                                row.splice(i, 1, canonicalWord[i]);
+                            const graphemeMode = this.config?.grapheme_mode === 'true';
+                            const canonicalChars = splitWord(canonicalWord, graphemeMode);
+                            for (let i = 0; i < canonicalChars.length; i++) {
+                                row.splice(i, 1, canonicalChars[i]);
                             }
                         }
 

diff --git a/frontend/src/graphemes.ts b/frontend/src/graphemes.ts
@@ -0,0 +1,18 @@
+/**
+ * Grapheme cluster utilities for languages where one visual character
+ * spans multiple Unicode codepoints (e.g., Hindi/Devanagari aksharas).
+ *
+ * Uses Intl.Segmenter (ES2022) — supported in Chrome 87+, Firefox 104+, Safari 15.4+.
+ */
+
+const segmenter = new Intl.Segmenter(undefined, { granularity: 'grapheme' });
+
+/**
+ * Split a word into characters, respecting grapheme mode.
+ * When grapheme_mode is enabled, returns grapheme clusters.
+ * Otherwise returns individual codepoints (default behavior).
+ */
+export function splitWord(word: string, graphemeMode: boolean): string[] {
+    if (!graphemeMode) return [...word];
+    return [...segmenter.segment(word)].map((s) => s.segment);
+}
diff --git a/frontend/src/types/index.ts b/frontend/src/types/index.ts
@@ -66,6 +66,11 @@ export interface LanguageConfig {
      *  Example for Korean: { "KeyQ": "ㅂ", "ShiftKeyQ": "ㅃ", ... }
      */
     physical_key_map?: Record<string, string>;
+    /** When "true", word length is counted by grapheme clusters instead of codepoints.
+     *  Required for scripts like Devanagari where one visual character (akshar)
+     *  spans multiple Unicode codepoints.
+     */
+    grapheme_mode?: 'true' | 'false';
 }
 
 // =============================================================================

diff --git a/pyproject.toml b/pyproject.toml
@@ -7,6 +7,7 @@ dependencies = [
     "arabic-reshaper>=3.0.0",
     "flask>=3.1.0",
     "flask-cors>=6.0.0",
+    "grapheme>=0.6.0",
     "gunicorn>=24.0.0",
     "openai>=2.21.0",
     "pillow>=12.1.1",
Original file line number	Diff line number	Diff line change
Expand Up		@@ -41,3 +41,4 @@ webapp/static/word-history/

		# Generated share images (rebuilt on deploy)
		webapp/static/images/share/
		scripts/.freq_data