Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 40 additions & 16 deletions internal/spell/aff.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,39 @@ type dictConfig struct {
TryChars string
WordChars string
CompoundOnly string
AffixMap map[rune]affix
AffixMap map[string]affix
CamelCase int
CompoundMin int64
compoundMap map[rune][]string
compoundMap map[string][]string
NoSuggestFlag string
}

// parseFlags splits a flag string into individual flags based on the FLAG type.
//
// Hunspell supports several flag formats:
// - "ASCII" (default): each character is a flag
// - "num": flags are comma-separated numbers (e.g., "14308,10482,4720")
// - "UTF-8": each UTF-8 character is a flag
// - "long": each pair of ASCII characters is a flag
func (a dictConfig) parseFlags(flagStr string) []string {
switch a.Flag {
case "num":
return strings.Split(flagStr, ",")
case "long":
flags := make([]string, 0, len(flagStr)/2)
for i := 0; i+1 < len(flagStr); i += 2 {
flags = append(flags, flagStr[i:i+2])
}
return flags
default: // "ASCII" or "UTF-8"
flags := make([]string, 0, len(flagStr))
for _, r := range flagStr {
flags = append(flags, string(r))
}
return flags
}
}

// expand expands a word/affix using dictionary/affix rules
//
// This also supports CompoundRule flags
Expand All @@ -87,11 +113,13 @@ func (a dictConfig) expand(wordAffix string, out []string) ([]string, error) {
// safe
word, keyString := wordAffix[:idx], wordAffix[idx+1:]

flags := a.parseFlags(keyString)

// check to see if any of the flags are in the
// "compound only". If so then nothing to add
compoundOnly := false
for _, key := range keyString {
if strings.ContainsRune(a.CompoundOnly, key) {
for _, key := range flags {
if key == a.CompoundOnly {
compoundOnly = true
continue
}
Expand All @@ -110,12 +138,9 @@ func (a dictConfig) expand(wordAffix string, out []string) ([]string, error) {
out = append(out, word)
prefixes := make([]affix, 0, 5)
suffixes := make([]affix, 0, 5)
for _, key := range keyString {
// want keyString to []?something?
// then iterate over that
for _, key := range flags {
af, ok := a.AffixMap[key]
if !ok {
// TODO: How should we handle this?
continue
}
if !af.CrossProduct {
Expand Down Expand Up @@ -161,8 +186,8 @@ func isCrossProduct(val string) (bool, error) {
func newDictConfig(file io.Reader) (*dictConfig, error) { //nolint:funlen
aff := dictConfig{
Flag: "ASCII",
AffixMap: make(map[rune]affix),
compoundMap: make(map[rune][]string),
AffixMap: make(map[string]affix),
compoundMap: make(map[string][]string),
CompoundMin: 3, // default in Hunspell
}
scanner := bufio.NewScanner(file)
Expand Down Expand Up @@ -219,9 +244,9 @@ func newDictConfig(file io.Reader) (*dictConfig, error) { //nolint:funlen
aff.CompoundRule = make([]string, 0, val)
} else {
aff.CompoundRule = append(aff.CompoundRule, parts[1])
for _, char := range parts[1] {
if _, ok := aff.compoundMap[char]; !ok {
aff.compoundMap[char] = []string{}
for _, flag := range aff.parseFlags(parts[1]) {
if _, ok := aff.compoundMap[flag]; !ok {
aff.compoundMap[flag] = []string{}
}
}
}
Expand All @@ -248,8 +273,7 @@ func newDictConfig(file io.Reader) (*dictConfig, error) { //nolint:funlen

sections := len(parts)
if sections > 4 {
// does this need to be split out into suffix and prefix?
flag := rune(parts[1][0])
flag := parts[1]
a, ok := aff.AffixMap[flag]
if !ok {
return nil, fmt.Errorf("got rules for flag %q but no definition", flag)
Expand Down Expand Up @@ -299,7 +323,7 @@ func newDictConfig(file io.Reader) (*dictConfig, error) { //nolint:funlen
Type: atype,
CrossProduct: cross,
}
flag := rune(parts[1][0])
flag := parts[1]
aff.AffixMap[flag] = a
}
default:
Expand Down
194 changes: 194 additions & 0 deletions internal/spell/aff_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
package spell

import (
"strings"
"testing"
)

func TestParseFlagsASCII(t *testing.T) {
dc := dictConfig{Flag: "ASCII"}
flags := dc.parseFlags("ABC")
if len(flags) != 3 || flags[0] != "A" || flags[1] != "B" || flags[2] != "C" {
t.Errorf("ASCII parseFlags(%q) = %v, want [A B C]", "ABC", flags)
}
}

func TestParseFlagsNum(t *testing.T) {
dc := dictConfig{Flag: "num"}
flags := dc.parseFlags("14308,10482,4720")
if len(flags) != 3 || flags[0] != "14308" || flags[1] != "10482" || flags[2] != "4720" {
t.Errorf("num parseFlags(%q) = %v, want [14308 10482 4720]", "14308,10482,4720", flags)
}
}

func TestParseFlagsLong(t *testing.T) {
dc := dictConfig{Flag: "long"}
flags := dc.parseFlags("AABB")
if len(flags) != 2 || flags[0] != "AA" || flags[1] != "BB" {
t.Errorf("long parseFlags(%q) = %v, want [AA BB]", "AABB", flags)
}
}

func TestParseFlagsUTF8(t *testing.T) {
dc := dictConfig{Flag: "UTF-8"}
flags := dc.parseFlags("AğB")
if len(flags) != 3 || flags[0] != "A" || flags[1] != "ğ" || flags[2] != "B" {
t.Errorf("UTF-8 parseFlags(%q) = %v, want [A ğ B]", "AğB", flags)
}
}

func TestFlagNumAffixParsing(t *testing.T) {
// Minimal FLAG num AFF file
affContent := `SET UTF-8
FLAG num

SFX 100 N 1
SFX 100 0 ler .

SFX 200 N 1
SFX 200 0 in .
`
aff, err := newDictConfig(strings.NewReader(affContent))
if err != nil {
t.Fatalf("newDictConfig error: %v", err)
}

if aff.Flag != "num" {
t.Errorf("Flag = %q, want %q", aff.Flag, "num")
}

// Check that affix 100 exists with "ler" suffix
a100, ok := aff.AffixMap["100"]
if !ok {
t.Fatal("AffixMap missing flag 100")
}
if len(a100.Rules) != 1 || a100.Rules[0].AffixText != "ler" {
t.Errorf("flag 100 rules = %v, want [{ler}]", a100.Rules)
}

// Check that affix 200 exists with "in" suffix
a200, ok := aff.AffixMap["200"]
if !ok {
t.Fatal("AffixMap missing flag 200")
}
if len(a200.Rules) != 1 || a200.Rules[0].AffixText != "in" {
t.Errorf("flag 200 rules = %v, want [{in}]", a200.Rules)
}
}

func TestFlagNumExpand(t *testing.T) {
affContent := `SET UTF-8
FLAG num

SFX 100 N 1
SFX 100 0 ler .

SFX 200 N 1
SFX 200 0 in .
`
aff, err := newDictConfig(strings.NewReader(affContent))
if err != nil {
t.Fatalf("newDictConfig error: %v", err)
}

// "belge/100,200" should expand to: belge, belgeler, belgein
words, err := aff.expand("belge/100,200", nil)
if err != nil {
t.Fatalf("expand error: %v", err)
}

expected := map[string]bool{"belge": true, "belgeler": true, "belgein": true}
for _, w := range words {
if !expected[w] {
t.Errorf("unexpected word %q in expansion", w)
}
delete(expected, w)
}
for w := range expected {
t.Errorf("missing expected word %q", w)
}
}

func TestFlagNumGoSpellReader(t *testing.T) {
affContent := `SET UTF-8
FLAG num

SFX 100 N 1
SFX 100 0 ler .

SFX 200 N 1
SFX 200 0 nin .
`
dicContent := `2
belge/100,200
sistem/100,200
`

gs, err := newGoSpellReader(
strings.NewReader(affContent),
strings.NewReader(dicContent),
)
if err != nil {
t.Fatalf("newGoSpellReader error: %v", err)
}

tests := []struct {
word string
want bool
}{
{"belge", true},
{"belgeler", true},
{"belgenin", true},
{"sistem", true},
{"sistemler", true},
{"sistemnin", true},
{"bilinmeyen", false},
}

for _, tt := range tests {
got := gs.spell(tt.word)
if got != tt.want {
t.Errorf("spell(%q) = %v, want %v", tt.word, got, tt.want)
}
}
}

func TestASCIFlagBackwardCompatibility(t *testing.T) {
// Original ASCII flag format must still work
affContent := `SET UTF-8

SFX A N 1
SFX A 0 s .

SFX B N 1
SFX B 0 ed .
`
dicContent := `1
test/AB
`

gs, err := newGoSpellReader(
strings.NewReader(affContent),
strings.NewReader(dicContent),
)
if err != nil {
t.Fatalf("newGoSpellReader error: %v", err)
}

tests := []struct {
word string
want bool
}{
{"test", true},
{"tests", true},
{"tested", true},
{"testing", false},
}

for _, tt := range tests {
got := gs.spell(tt.word)
if got != tt.want {
t.Errorf("spell(%q) = %v, want %v", tt.word, got, tt.want)
}
}
}
17 changes: 10 additions & 7 deletions internal/spell/gospell.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,17 @@ func newGoSpellReader(aff, dic io.Reader) (*goSpell, error) {

for _, compoundRule := range affix.CompoundRule {
pattern := "^"
for _, key := range compoundRule {
switch key {
case '(', ')', '+', '?', '*':
pattern += regexp.QuoteMeta(string(key))
default:
groups := affix.compoundMap[key]
pattern = pattern + "(" + strings.Join(groups, "|") + ")"
for _, key := range affix.parseFlags(compoundRule) {
if len(key) == 1 {
r := rune(key[0])
switch r {
case '(', ')', '+', '?', '*':
pattern += regexp.QuoteMeta(key)
continue
}
}
groups := affix.compoundMap[key]
pattern = pattern + "(" + strings.Join(groups, "|") + ")"
}
pattern += "$"

Expand Down