Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions document.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,26 @@ type TokenizableSpatialField interface {
// to override the spatial token generations during the analysis phase.
SetSpatialAnalyzerPlugin(SpatialAnalyzerPlugin)
}

// SynonymField represents a field that contains a list of synonyms for a set of terms.
// Each SynonymField is generated from a single synonym definition, and its name corresponds
// to the synonym source to which the synonym definition belongs.
type SynonymField interface {
Field
// IterateSynonyms iterates over the synonyms for the term in the field.
// The provided visitor function is called with each term and its corresponding synonyms.
IterateSynonyms(visitor func(term string, synonyms []string))
}

// SynonymFieldVisitor is a function type used to visit a SynonymField within a document.
type SynonymFieldVisitor func(SynonymField)

// SynonymDocument represents a special type of document that contains synonym fields.
// Each SynonymField is a field with a list of synonyms for a set of terms.
// These fields are derived from synonym definitions, and their names correspond to the synonym sources.
type SynonymDocument interface {
Document
// VisitSynonymFields allows iteration over all synonym fields in the document.
// The provided visitor function is called for each synonym field.
VisitSynonymFields(visitor SynonymFieldVisitor)
}
81 changes: 81 additions & 0 deletions index.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,41 @@ type CopyReader interface {
CloseCopyReader() error
}

// RegexAutomaton abstracts an automaton built using a regex pattern.
type RegexAutomaton interface {
// MatchesRegex returns true if the given string matches the regex pattern
// used to build the automaton.
MatchesRegex(string) bool
}

// IndexReaderRegexp provides functionality to work with regex-based field dictionaries.
type IndexReaderRegexp interface {
// FieldDictRegexp returns a FieldDict for terms matching the specified regex pattern
// in the dictionary of the given field.
FieldDictRegexp(field string, regex string) (FieldDict, error)

// FieldDictRegexpAutomaton returns a FieldDict and a RegexAutomaton that can be used
// to match strings against the regex pattern.
FieldDictRegexpAutomaton(field string, regex string) (FieldDict, RegexAutomaton, error)
}

// FuzzyAutomaton abstracts a Levenshtein automaton built using a term and a fuzziness value.
type FuzzyAutomaton interface {
// MatchAndDistance checks if the given string is within the fuzziness distance
// of the term used to build the automaton. It also returns the edit (Levenshtein)
// distance between the string and the term.
MatchAndDistance(term string) (bool, uint8)
}

// IndexReaderFuzzy provides functionality to work with fuzzy matching in field dictionaries.
type IndexReaderFuzzy interface {
// FieldDictFuzzy returns a FieldDict for terms that are within the specified fuzziness
// distance of the given term and match the specified prefix in the given field.
FieldDictFuzzy(field string, term string, fuzziness int, prefix string) (FieldDict, error)

// FieldDictFuzzyAutomaton returns a FieldDict and a FuzzyAutomaton that can be used
// to calculate the edit distance between the term and other strings.
FieldDictFuzzyAutomaton(field string, term string, fuzziness int, prefix string) (FieldDict, FuzzyAutomaton, error)
}

type IndexReaderContains interface {
Expand Down Expand Up @@ -252,3 +281,55 @@ type IndexBuilder interface {
Index(doc Document) error
Close() error
}

// ThesaurusTermReader is an interface for enumerating synonyms of a term in a thesaurus.
type ThesaurusTermReader interface {
// Next returns the next synonym of the term, or an error if something goes wrong.
// Returns nil when the enumeration is complete.
Next() (string, error)

// Close releases any resources associated with the reader.
Close() error

Size() int
}

// ThesaurusEntry represents a term in the thesaurus for which synonyms are stored.
type ThesaurusEntry struct {
Term string
}

// ThesaurusKeys is an interface for enumerating terms (keys) in a thesaurus.
type ThesaurusKeys interface {
// Next returns the next key in the thesaurus, or an error if something goes wrong.
// Returns nil when the enumeration is complete.
Next() (*ThesaurusEntry, error)

// Close releases any resources associated with the reader.
Close() error
}

// ThesaurusReader is an interface for accessing a thesaurus in the index.
type ThesaurusReader interface {
IndexReader

// ThesaurusTermReader returns a reader for the synonyms of a given term in the
// specified thesaurus.
ThesaurusTermReader(ctx context.Context, name string, term []byte) (ThesaurusTermReader, error)

// ThesaurusKeys returns a reader for all terms in the specified thesaurus.
ThesaurusKeys(name string) (ThesaurusKeys, error)

// ThesaurusKeysFuzzy returns a reader for terms in the specified thesaurus that
// match the given prefix and are within the specified fuzziness distance from
// the provided term.
ThesaurusKeysFuzzy(name string, term string, fuzziness int, prefix string) (ThesaurusKeys, error)

// ThesaurusKeysRegexp returns a reader for terms in the specified thesaurus that
// match the given regular expression pattern.
ThesaurusKeysRegexp(name string, regex string) (ThesaurusKeys, error)

// ThesaurusKeysPrefix returns a reader for terms in the specified thesaurus that
// start with the given prefix.
ThesaurusKeysPrefix(name string, termPrefix []byte) (ThesaurusKeys, error)
}
Loading