From 54384c796f9a952d5f7b335d52de505f040730df Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Tue, 9 Dec 2025 12:35:44 +0530 Subject: [PATCH] Refactor IndexInternalID usage --- index/scorch/snapshot_index.go | 32 ++++------------------ index/scorch/snapshot_index_doc.go | 5 ++-- index/scorch/snapshot_index_tfr.go | 9 +++--- index/scorch/snapshot_index_vr.go | 9 +++--- search/scorer/scorer_knn.go | 2 +- search/scorer/scorer_term.go | 6 ++-- search/searcher/search_disjunction_heap.go | 7 ++--- search/searcher/search_numeric_range.go | 2 +- 8 files changed, 24 insertions(+), 48 deletions(-) diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 3f2a330c5..8ffb90a4d 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -17,7 +17,6 @@ package scorch import ( "container/heap" "context" - "encoding/binary" "fmt" "os" "path/filepath" @@ -59,11 +58,11 @@ func init() { var err error lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true) if err != nil { - panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err)) + panic(fmt.Errorf("levenshtein automaton ed1 builder err: %v", err)) } lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true) if err != nil { - panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err)) + panic(fmt.Errorf("levenshtein automaton ed2 builder err: %v", err)) } } @@ -501,7 +500,7 @@ func (is *IndexSnapshot) Document(id string) (rv index.Document, err error) { return nil, nil } - docNum, err := docInternalToNumber(next.ID) + docNum, err := next.ID.Value() if err != nil { return nil, err } @@ -571,7 +570,7 @@ func (is *IndexSnapshot) segmentIndexAndLocalDocNumFromGlobal(docNum uint64) (in } func (is *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) { - docNum, err := docInternalToNumber(id) + docNum, err := id.Value() if err != nil { return "", err } @@ -589,7 +588,7 @@ func (is *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) { } func (is *IndexSnapshot) segmentIndexAndLocalDocNum(id index.IndexInternalID) (int, uint64, error) { - docNum, err := docInternalToNumber(id) + docNum, err := id.Value() if err != nil { return 0, 0, err } @@ -776,25 +775,6 @@ func (is *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReade is.m2.Unlock() } -func docNumberToBytes(buf []byte, in uint64) []byte { - if len(buf) != 8 { - if cap(buf) >= 8 { - buf = buf[0:8] - } else { - buf = make([]byte, 8) - } - } - binary.BigEndian.PutUint64(buf, in) - return buf -} - -func docInternalToNumber(in index.IndexInternalID) (uint64, error) { - if len(in) != 8 { - return 0, fmt.Errorf("wrong len for IndexInternalID: %q", in) - } - return binary.BigEndian.Uint64(in), nil -} - func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( segmentIndex int, localDocNum uint64, fields []string, cFields []string, visitor index.DocValueVisitor, dvs segment.DocVisitState) ( @@ -897,7 +877,7 @@ func (dvr *DocValueReader) BytesRead() uint64 { func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID, visitor index.DocValueVisitor, ) (err error) { - docNum, err := docInternalToNumber(id) + docNum, err := id.Value() if err != nil { return err } diff --git a/index/scorch/snapshot_index_doc.go b/index/scorch/snapshot_index_doc.go index 0a979bfb5..4048a199b 100644 --- a/index/scorch/snapshot_index_doc.go +++ b/index/scorch/snapshot_index_doc.go @@ -15,7 +15,6 @@ package scorch import ( - "bytes" "reflect" "github.com/RoaringBitmap/roaring/v2" @@ -49,7 +48,7 @@ func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) { next := i.iterators[i.segmentOffset].Next() // make segment number into global number by adding offset globalOffset := i.snapshot.offsets[i.segmentOffset] - return docNumberToBytes(nil, uint64(next)+globalOffset), nil + return index.NewIndexInternalID(nil, uint64(next)+globalOffset), nil } return nil, nil } @@ -63,7 +62,7 @@ func (i *IndexSnapshotDocIDReader) Advance(ID index.IndexInternalID) (index.Inde if next == nil { return nil, nil } - for bytes.Compare(next, ID) < 0 { + for next.Compare(ID) < 0 { next, err = i.Next() if err != nil { return nil, err diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go index cd4d82dce..08d423925 100644 --- a/index/scorch/snapshot_index_tfr.go +++ b/index/scorch/snapshot_index_tfr.go @@ -15,7 +15,6 @@ package scorch import ( - "bytes" "context" "fmt" "reflect" @@ -94,7 +93,7 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in // make segment number into global number by adding offset globalOffset := i.snapshot.offsets[i.segmentOffset] nnum := next.Number() - rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset) + rv.ID = index.NewIndexInternalID(rv.ID, nnum+globalOffset) i.postingToTermFieldDoc(next, rv) i.currID = rv.ID @@ -146,7 +145,7 @@ func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Postin func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { // FIXME do something better // for now, if we need to seek backwards, then restart from the beginning - if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 { + if i.currPosting != nil && i.currID.Compare(ID) >= 0 { // Check if the TFR is a special unadorned composite optimization. // Such a TFR will NOT have a valid `term` or `field` set, making it // impossible for the TFR to replace itself with a new one. @@ -171,7 +170,7 @@ func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAllo } } } - num, err := docInternalToNumber(ID) + num, err := ID.Value() if err != nil { return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err) } @@ -196,7 +195,7 @@ func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAllo if preAlloced == nil { preAlloced = &index.TermFieldDoc{} } - preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+ + preAlloced.ID = index.NewIndexInternalID(preAlloced.ID, next.Number()+ i.snapshot.offsets[segIndex]) i.postingToTermFieldDoc(next, preAlloced) i.currID = preAlloced.ID diff --git a/index/scorch/snapshot_index_vr.go b/index/scorch/snapshot_index_vr.go index bd57ad3e0..5e510c4d6 100644 --- a/index/scorch/snapshot_index_vr.go +++ b/index/scorch/snapshot_index_vr.go @@ -18,7 +18,6 @@ package scorch import ( - "bytes" "context" "encoding/json" "fmt" @@ -96,7 +95,7 @@ func (i *IndexSnapshotVectorReader) Next(preAlloced *index.VectorDoc) ( // make segment number into global number by adding offset globalOffset := i.snapshot.offsets[i.segmentOffset] nnum := next.Number() - rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset) + rv.ID = index.NewIndexInternalID(rv.ID, nnum+globalOffset) rv.Score = float64(next.Score()) i.currID = rv.ID @@ -113,7 +112,7 @@ func (i *IndexSnapshotVectorReader) Next(preAlloced *index.VectorDoc) ( func (i *IndexSnapshotVectorReader) Advance(ID index.IndexInternalID, preAlloced *index.VectorDoc) (*index.VectorDoc, error) { - if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 { + if i.currPosting != nil && i.currID.Compare(ID) >= 0 { i2, err := i.snapshot.VectorReader(i.ctx, i.vector, i.field, i.k, i.searchParams, i.eligibleSelector) if err != nil { @@ -124,7 +123,7 @@ func (i *IndexSnapshotVectorReader) Advance(ID index.IndexInternalID, *i = *(i2.(*IndexSnapshotVectorReader)) } - num, err := docInternalToNumber(ID) + num, err := ID.Value() if err != nil { return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err) } @@ -149,7 +148,7 @@ func (i *IndexSnapshotVectorReader) Advance(ID index.IndexInternalID, if preAlloced == nil { preAlloced = &index.VectorDoc{} } - preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+ + preAlloced.ID = index.NewIndexInternalID(preAlloced.ID, next.Number()+ i.snapshot.offsets[segIndex]) i.currID = preAlloced.ID i.currPosting = next diff --git a/search/scorer/scorer_knn.go b/search/scorer/scorer_knn.go index 8d9043427..06f50cd4a 100644 --- a/search/scorer/scorer_knn.go +++ b/search/scorer/scorer_knn.go @@ -123,7 +123,7 @@ func (sqs *KNNQueryScorer) Score(ctx *search.SearchContext, if sqs.options.Explain { rv.Expl = scoreExplanation } - rv.IndexInternalID = append(rv.IndexInternalID, knnMatch.ID...) + rv.IndexInternalID = index.NewIndexInternalIDFrom(rv.IndexInternalID, knnMatch.ID) return rv } diff --git a/search/scorer/scorer_term.go b/search/scorer/scorer_term.go index f5f8ec935..2f60dcecb 100644 --- a/search/scorer/scorer_term.go +++ b/search/scorer/scorer_term.go @@ -213,8 +213,8 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term childrenExplanations := s.scoreExplanation(tf, termMatch) scoreExplanation = &search.Explanation{ Value: score, - Message: fmt.Sprintf("fieldWeight(%s:%s in %s), as per %s model, "+ - "product of:", s.queryField, s.queryTerm, termMatch.ID, scoringModel), + Message: fmt.Sprintf("fieldWeight(%s:%s in %s), as per %s model, product of:", + s.queryField, s.queryTerm, termMatch.ID, scoringModel), Children: childrenExplanations, } } @@ -243,7 +243,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term } } - rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...) + rv.IndexInternalID = index.NewIndexInternalIDFrom(rv.IndexInternalID, termMatch.ID) if len(termMatch.Vectors) > 0 { if cap(rv.FieldTermLocations) < len(termMatch.Vectors) { diff --git a/search/searcher/search_disjunction_heap.go b/search/searcher/search_disjunction_heap.go index 3da876bd3..4c68e5691 100644 --- a/search/searcher/search_disjunction_heap.go +++ b/search/searcher/search_disjunction_heap.go @@ -15,7 +15,6 @@ package searcher import ( - "bytes" "container/heap" "context" "math" @@ -169,7 +168,7 @@ func (s *DisjunctionHeapSearcher) updateMatches() error { matchingIdxs = append(matchingIdxs, next.matchingIdx) // now as long as top of heap matches, keep popping - for len(s.heap) > 0 && bytes.Compare(next.curr.IndexInternalID, s.heap[0].curr.IndexInternalID) == 0 { + for len(s.heap) > 0 && next.curr.IndexInternalID.Equals(s.heap[0].curr.IndexInternalID) { next = heap.Pop(s).(*SearcherCurr) matching = append(matching, next.curr) matchingCurrs = append(matchingCurrs, next) @@ -264,7 +263,7 @@ func (s *DisjunctionHeapSearcher) Advance(ctx *search.SearchContext, // find all searchers that actually need to be advanced // advance them, using s.matchingCurrs as temp storage - for len(s.heap) > 0 && bytes.Compare(s.heap[0].curr.IndexInternalID, ID) < 0 { + for len(s.heap) > 0 && s.heap[0].curr.IndexInternalID.Compare(ID) < 0 { searcherCurr := heap.Pop(s).(*SearcherCurr) ctx.DocumentMatchPool.Put(searcherCurr.curr) curr, err := searcherCurr.searcher.Advance(ctx, ID) @@ -347,7 +346,7 @@ func (s *DisjunctionHeapSearcher) Less(i, j int) bool { } else if s.heap[j].curr == nil { return false } - return bytes.Compare(s.heap[i].curr.IndexInternalID, s.heap[j].curr.IndexInternalID) < 0 + return s.heap[i].curr.IndexInternalID.Compare(s.heap[j].curr.IndexInternalID) < 0 } func (s *DisjunctionHeapSearcher) Swap(i, j int) { diff --git a/search/searcher/search_numeric_range.go b/search/searcher/search_numeric_range.go index f086051c1..cd8f00719 100644 --- a/search/searcher/search_numeric_range.go +++ b/search/searcher/search_numeric_range.go @@ -132,7 +132,7 @@ func filterCandidateTerms(indexReader index.IndexReader, for err == nil && tfd != nil { termBytes := []byte(tfd.Term) i := sort.Search(len(terms), func(i int) bool { return bytes.Compare(terms[i], termBytes) >= 0 }) - if i < len(terms) && bytes.Compare(terms[i], termBytes) == 0 { + if i < len(terms) && bytes.Equal(terms[i], termBytes) { rv = append(rv, terms[i]) } terms = terms[i:]