diff --git a/internal/spell/aff.go b/internal/spell/aff.go index 32b365384..5f72cc22a 100644 --- a/internal/spell/aff.go +++ b/internal/spell/aff.go @@ -62,13 +62,39 @@ type dictConfig struct { TryChars string WordChars string CompoundOnly string - AffixMap map[rune]affix + AffixMap map[string]affix CamelCase int CompoundMin int64 - compoundMap map[rune][]string + compoundMap map[string][]string NoSuggestFlag string } +// parseFlags splits a flag string into individual flags based on the FLAG type. +// +// Hunspell supports several flag formats: +// - "ASCII" (default): each character is a flag +// - "num": flags are comma-separated numbers (e.g., "14308,10482,4720") +// - "UTF-8": each UTF-8 character is a flag +// - "long": each pair of ASCII characters is a flag +func (a dictConfig) parseFlags(flagStr string) []string { + switch a.Flag { + case "num": + return strings.Split(flagStr, ",") + case "long": + flags := make([]string, 0, len(flagStr)/2) + for i := 0; i+1 < len(flagStr); i += 2 { + flags = append(flags, flagStr[i:i+2]) + } + return flags + default: // "ASCII" or "UTF-8" + flags := make([]string, 0, len(flagStr)) + for _, r := range flagStr { + flags = append(flags, string(r)) + } + return flags + } +} + // expand expands a word/affix using dictionary/affix rules // // This also supports CompoundRule flags @@ -87,11 +113,13 @@ func (a dictConfig) expand(wordAffix string, out []string) ([]string, error) { // safe word, keyString := wordAffix[:idx], wordAffix[idx+1:] + flags := a.parseFlags(keyString) + // check to see if any of the flags are in the // "compound only". If so then nothing to add compoundOnly := false - for _, key := range keyString { - if strings.ContainsRune(a.CompoundOnly, key) { + for _, key := range flags { + if key == a.CompoundOnly { compoundOnly = true continue } @@ -110,12 +138,9 @@ func (a dictConfig) expand(wordAffix string, out []string) ([]string, error) { out = append(out, word) prefixes := make([]affix, 0, 5) suffixes := make([]affix, 0, 5) - for _, key := range keyString { - // want keyString to []?something? - // then iterate over that + for _, key := range flags { af, ok := a.AffixMap[key] if !ok { - // TODO: How should we handle this? continue } if !af.CrossProduct { @@ -161,8 +186,8 @@ func isCrossProduct(val string) (bool, error) { func newDictConfig(file io.Reader) (*dictConfig, error) { //nolint:funlen aff := dictConfig{ Flag: "ASCII", - AffixMap: make(map[rune]affix), - compoundMap: make(map[rune][]string), + AffixMap: make(map[string]affix), + compoundMap: make(map[string][]string), CompoundMin: 3, // default in Hunspell } scanner := bufio.NewScanner(file) @@ -219,9 +244,9 @@ func newDictConfig(file io.Reader) (*dictConfig, error) { //nolint:funlen aff.CompoundRule = make([]string, 0, val) } else { aff.CompoundRule = append(aff.CompoundRule, parts[1]) - for _, char := range parts[1] { - if _, ok := aff.compoundMap[char]; !ok { - aff.compoundMap[char] = []string{} + for _, flag := range aff.parseFlags(parts[1]) { + if _, ok := aff.compoundMap[flag]; !ok { + aff.compoundMap[flag] = []string{} } } } @@ -248,8 +273,7 @@ func newDictConfig(file io.Reader) (*dictConfig, error) { //nolint:funlen sections := len(parts) if sections > 4 { - // does this need to be split out into suffix and prefix? - flag := rune(parts[1][0]) + flag := parts[1] a, ok := aff.AffixMap[flag] if !ok { return nil, fmt.Errorf("got rules for flag %q but no definition", flag) @@ -299,7 +323,7 @@ func newDictConfig(file io.Reader) (*dictConfig, error) { //nolint:funlen Type: atype, CrossProduct: cross, } - flag := rune(parts[1][0]) + flag := parts[1] aff.AffixMap[flag] = a } default: diff --git a/internal/spell/aff_test.go b/internal/spell/aff_test.go new file mode 100644 index 000000000..5bc1cfb66 --- /dev/null +++ b/internal/spell/aff_test.go @@ -0,0 +1,194 @@ +package spell + +import ( + "strings" + "testing" +) + +func TestParseFlagsASCII(t *testing.T) { + dc := dictConfig{Flag: "ASCII"} + flags := dc.parseFlags("ABC") + if len(flags) != 3 || flags[0] != "A" || flags[1] != "B" || flags[2] != "C" { + t.Errorf("ASCII parseFlags(%q) = %v, want [A B C]", "ABC", flags) + } +} + +func TestParseFlagsNum(t *testing.T) { + dc := dictConfig{Flag: "num"} + flags := dc.parseFlags("14308,10482,4720") + if len(flags) != 3 || flags[0] != "14308" || flags[1] != "10482" || flags[2] != "4720" { + t.Errorf("num parseFlags(%q) = %v, want [14308 10482 4720]", "14308,10482,4720", flags) + } +} + +func TestParseFlagsLong(t *testing.T) { + dc := dictConfig{Flag: "long"} + flags := dc.parseFlags("AABB") + if len(flags) != 2 || flags[0] != "AA" || flags[1] != "BB" { + t.Errorf("long parseFlags(%q) = %v, want [AA BB]", "AABB", flags) + } +} + +func TestParseFlagsUTF8(t *testing.T) { + dc := dictConfig{Flag: "UTF-8"} + flags := dc.parseFlags("AğB") + if len(flags) != 3 || flags[0] != "A" || flags[1] != "ğ" || flags[2] != "B" { + t.Errorf("UTF-8 parseFlags(%q) = %v, want [A ğ B]", "AğB", flags) + } +} + +func TestFlagNumAffixParsing(t *testing.T) { + // Minimal FLAG num AFF file + affContent := `SET UTF-8 +FLAG num + +SFX 100 N 1 +SFX 100 0 ler . + +SFX 200 N 1 +SFX 200 0 in . +` + aff, err := newDictConfig(strings.NewReader(affContent)) + if err != nil { + t.Fatalf("newDictConfig error: %v", err) + } + + if aff.Flag != "num" { + t.Errorf("Flag = %q, want %q", aff.Flag, "num") + } + + // Check that affix 100 exists with "ler" suffix + a100, ok := aff.AffixMap["100"] + if !ok { + t.Fatal("AffixMap missing flag 100") + } + if len(a100.Rules) != 1 || a100.Rules[0].AffixText != "ler" { + t.Errorf("flag 100 rules = %v, want [{ler}]", a100.Rules) + } + + // Check that affix 200 exists with "in" suffix + a200, ok := aff.AffixMap["200"] + if !ok { + t.Fatal("AffixMap missing flag 200") + } + if len(a200.Rules) != 1 || a200.Rules[0].AffixText != "in" { + t.Errorf("flag 200 rules = %v, want [{in}]", a200.Rules) + } +} + +func TestFlagNumExpand(t *testing.T) { + affContent := `SET UTF-8 +FLAG num + +SFX 100 N 1 +SFX 100 0 ler . + +SFX 200 N 1 +SFX 200 0 in . +` + aff, err := newDictConfig(strings.NewReader(affContent)) + if err != nil { + t.Fatalf("newDictConfig error: %v", err) + } + + // "belge/100,200" should expand to: belge, belgeler, belgein + words, err := aff.expand("belge/100,200", nil) + if err != nil { + t.Fatalf("expand error: %v", err) + } + + expected := map[string]bool{"belge": true, "belgeler": true, "belgein": true} + for _, w := range words { + if !expected[w] { + t.Errorf("unexpected word %q in expansion", w) + } + delete(expected, w) + } + for w := range expected { + t.Errorf("missing expected word %q", w) + } +} + +func TestFlagNumGoSpellReader(t *testing.T) { + affContent := `SET UTF-8 +FLAG num + +SFX 100 N 1 +SFX 100 0 ler . + +SFX 200 N 1 +SFX 200 0 nin . +` + dicContent := `2 +belge/100,200 +sistem/100,200 +` + + gs, err := newGoSpellReader( + strings.NewReader(affContent), + strings.NewReader(dicContent), + ) + if err != nil { + t.Fatalf("newGoSpellReader error: %v", err) + } + + tests := []struct { + word string + want bool + }{ + {"belge", true}, + {"belgeler", true}, + {"belgenin", true}, + {"sistem", true}, + {"sistemler", true}, + {"sistemnin", true}, + {"bilinmeyen", false}, + } + + for _, tt := range tests { + got := gs.spell(tt.word) + if got != tt.want { + t.Errorf("spell(%q) = %v, want %v", tt.word, got, tt.want) + } + } +} + +func TestASCIFlagBackwardCompatibility(t *testing.T) { + // Original ASCII flag format must still work + affContent := `SET UTF-8 + +SFX A N 1 +SFX A 0 s . + +SFX B N 1 +SFX B 0 ed . +` + dicContent := `1 +test/AB +` + + gs, err := newGoSpellReader( + strings.NewReader(affContent), + strings.NewReader(dicContent), + ) + if err != nil { + t.Fatalf("newGoSpellReader error: %v", err) + } + + tests := []struct { + word string + want bool + }{ + {"test", true}, + {"tests", true}, + {"tested", true}, + {"testing", false}, + } + + for _, tt := range tests { + got := gs.spell(tt.word) + if got != tt.want { + t.Errorf("spell(%q) = %v, want %v", tt.word, got, tt.want) + } + } +} diff --git a/internal/spell/gospell.go b/internal/spell/gospell.go index cdfec6cde..7eb8652f9 100644 --- a/internal/spell/gospell.go +++ b/internal/spell/gospell.go @@ -220,14 +220,17 @@ func newGoSpellReader(aff, dic io.Reader) (*goSpell, error) { for _, compoundRule := range affix.CompoundRule { pattern := "^" - for _, key := range compoundRule { - switch key { - case '(', ')', '+', '?', '*': - pattern += regexp.QuoteMeta(string(key)) - default: - groups := affix.compoundMap[key] - pattern = pattern + "(" + strings.Join(groups, "|") + ")" + for _, key := range affix.parseFlags(compoundRule) { + if len(key) == 1 { + r := rune(key[0]) + switch r { + case '(', ')', '+', '?', '*': + pattern += regexp.QuoteMeta(key) + continue + } } + groups := affix.compoundMap[key] + pattern = pattern + "(" + strings.Join(groups, "|") + ")" } pattern += "$"