From f8ff63742519ea51a5bca6ad9a844325fb319b58 Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Tue, 15 Jul 2025 18:42:41 +0530 Subject: [PATCH 01/18] draft 1 --- document.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/document.go b/document.go index bc91c6c..3040e5f 100644 --- a/document.go +++ b/document.go @@ -36,6 +36,10 @@ type Document interface { Indexed() bool } +type DocumentAnalyzer interface { + Analyze(document Document) error +} + type FieldVisitor func(Field) type Field interface { @@ -124,3 +128,9 @@ type SynonymDocument interface { // The provided visitor function is called for each synonym field. VisitSynonymFields(visitor SynonymFieldVisitor) } + +type NestedField interface { + Field + NumChildren() int + VisitChildren(visitor func(arrayPosition int, document Document)) +} From 7d784dd6ea024f050bb1767259db48860478c1fc Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Mon, 4 Aug 2025 20:53:19 +0530 Subject: [PATCH 02/18] tester --- document.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/document.go b/document.go index 3040e5f..3535aac 100644 --- a/document.go +++ b/document.go @@ -129,8 +129,19 @@ type SynonymDocument interface { VisitSynonymFields(visitor SynonymFieldVisitor) } +// NestedFieldVisitor is a function type used to visit a NestedField within a document. +type NestedFieldVisitor func(NestedField) + +// NestedDocument represents a special type of document that contains nested fields. +type NestedDocument interface { + Document + // VisitNestedFields allows iteration over all nested fields in the document. + // The provided visitor function is called for each nested field. + VisitNestedFields(visitor NestedFieldVisitor) +} + type NestedField interface { Field NumChildren() int - VisitChildren(visitor func(arrayPosition int, document Document)) + VisitChildren(visitor func(arrayPosition int, document NestedDocument)) } From de6546b285c8b2eccc75e8a39c8d58c392254ad0 Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Sat, 9 Aug 2025 09:38:40 +0530 Subject: [PATCH 03/18] test --- document.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/document.go b/document.go index 3535aac..7254c50 100644 --- a/document.go +++ b/document.go @@ -138,10 +138,13 @@ type NestedDocument interface { // VisitNestedFields allows iteration over all nested fields in the document. // The provided visitor function is called for each nested field. VisitNestedFields(visitor NestedFieldVisitor) + + // returns the root document without any nested fields + WithoutNestedFields() Document } type NestedField interface { Field NumChildren() int - VisitChildren(visitor func(arrayPosition int, document NestedDocument)) + VisitChildren(visitor func(arrayPosition int, document Document)) } From ca2d4b35516bd6254f12725668db65e87344f031 Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Fri, 22 Aug 2025 16:51:19 +0530 Subject: [PATCH 04/18] new interfaces --- document.go | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/document.go b/document.go index 7254c50..2e89e13 100644 --- a/document.go +++ b/document.go @@ -129,22 +129,10 @@ type SynonymDocument interface { VisitSynonymFields(visitor SynonymFieldVisitor) } -// NestedFieldVisitor is a function type used to visit a NestedField within a document. -type NestedFieldVisitor func(NestedField) - -// NestedDocument represents a special type of document that contains nested fields. +// NestedDocument is a document that contains other documents inside it. type NestedDocument interface { Document - // VisitNestedFields allows iteration over all nested fields in the document. - // The provided visitor function is called for each nested field. - VisitNestedFields(visitor NestedFieldVisitor) - - // returns the root document without any nested fields - WithoutNestedFields() Document -} - -type NestedField interface { - Field - NumChildren() int - VisitChildren(visitor func(arrayPosition int, document Document)) + // VisitNestedDocuments allows iteration over all nested documents in the document. + // The provided visitor function is called for each nested document. + VisitNestedDocuments(visitor func(doc Document)) } From 0e1601ee610fe606f7bb695d8b7f00188ae95e02 Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Fri, 29 Aug 2025 20:12:42 +0530 Subject: [PATCH 05/18] new apis --- index.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/index.go b/index.go index 12d907e..72dc771 100644 --- a/index.go +++ b/index.go @@ -17,6 +17,8 @@ package index import ( "bytes" "context" + "encoding/binary" + "fmt" "reflect" ) @@ -188,6 +190,18 @@ func (tfv *TermFieldVector) Size() int { // IndexInternalID is an opaque document identifier interal to the index impl type IndexInternalID []byte +func NewIndexInternalID(buf []byte, in uint64) IndexInternalID { + if len(buf) != 8 { + if cap(buf) >= 8 { + buf = buf[0:8] + } else { + buf = make([]byte, 8) + } + } + binary.BigEndian.PutUint64(buf, in) + return buf +} + func (id IndexInternalID) Equals(other IndexInternalID) bool { return id.Compare(other) == 0 } @@ -196,6 +210,13 @@ func (id IndexInternalID) Compare(other IndexInternalID) int { return bytes.Compare(id, other) } +func (id IndexInternalID) Value() (uint64, error) { + if len(id) != 8 { + return 0, fmt.Errorf("wrong len for IndexInternalID: %q", id) + } + return binary.BigEndian.Uint64(id), nil +} + type TermFieldDoc struct { Term string ID IndexInternalID From 0f9d3004554d6e9600a6b7c1af95a88233fa5932 Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Sat, 13 Sep 2025 20:20:33 +0530 Subject: [PATCH 06/18] cleanup --- document.go | 4 ---- index.go | 10 ++++++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/document.go b/document.go index 2e89e13..e3fd488 100644 --- a/document.go +++ b/document.go @@ -36,10 +36,6 @@ type Document interface { Indexed() bool } -type DocumentAnalyzer interface { - Analyze(document Document) error -} - type FieldVisitor func(Field) type Field interface { diff --git a/index.go b/index.go index 72dc771..b1dc155 100644 --- a/index.go +++ b/index.go @@ -387,6 +387,7 @@ type EligibleDocumentSelector interface { SegmentEligibleDocs(segmentID int) []uint64 } +<<<<<<< HEAD // ----------------------------------------------------------------------------- type TermFreq struct { @@ -411,4 +412,13 @@ type IndexInsightsReader interface { // Obtains a maximum limit number of centroid vectors from IVF indexes sorted based on // cluster densities (or cardinalities) CentroidCardinalities(field string, limit int, descending bool) (cenCards []CentroidCardinality, err error) +======= +// NestedReader is an extended index reader that supports hierarchical document structures. +type NestedReader interface { + IndexReader + // Ancestors returns the ancestral chain for a given document ID in the index. + // For nested documents, this method retrieves all parent documents in the hierarchy + // leading up to the specified document ID. + Ancestors(id IndexInternalID) ([]IndexInternalID, error) +>>>>>>> c63531c (cleanup) } From 48c3e18c6d731507787486373a59421adf4b6eb3 Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Tue, 11 Nov 2025 11:38:21 +0530 Subject: [PATCH 07/18] HasNestedDocs API --- index.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/index.go b/index.go index b1dc155..1d25f00 100644 --- a/index.go +++ b/index.go @@ -416,6 +416,8 @@ type IndexInsightsReader interface { // NestedReader is an extended index reader that supports hierarchical document structures. type NestedReader interface { IndexReader + // HasNestedDocs checks if the index has any documents with ancestry (i.e., nested documents). + HasNestedDocs() bool // Ancestors returns the ancestral chain for a given document ID in the index. // For nested documents, this method retrieves all parent documents in the hierarchy // leading up to the specified document ID. From e7b28f711576b1e80ba48ebbb33567457c51c910 Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Tue, 11 Nov 2025 13:29:08 +0530 Subject: [PATCH 08/18] remove API --- index.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/index.go b/index.go index 1d25f00..b1dc155 100644 --- a/index.go +++ b/index.go @@ -416,8 +416,6 @@ type IndexInsightsReader interface { // NestedReader is an extended index reader that supports hierarchical document structures. type NestedReader interface { IndexReader - // HasNestedDocs checks if the index has any documents with ancestry (i.e., nested documents). - HasNestedDocs() bool // Ancestors returns the ancestral chain for a given document ID in the index. // For nested documents, this method retrieves all parent documents in the hierarchy // leading up to the specified document ID. From d0117e58da44b7d25971ba911af7c29816d6ac38 Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Mon, 17 Nov 2025 10:18:33 +0530 Subject: [PATCH 09/18] fix conflict --- index.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/index.go b/index.go index b1dc155..489e762 100644 --- a/index.go +++ b/index.go @@ -387,7 +387,6 @@ type EligibleDocumentSelector interface { SegmentEligibleDocs(segmentID int) []uint64 } -<<<<<<< HEAD // ----------------------------------------------------------------------------- type TermFreq struct { @@ -412,7 +411,9 @@ type IndexInsightsReader interface { // Obtains a maximum limit number of centroid vectors from IVF indexes sorted based on // cluster densities (or cardinalities) CentroidCardinalities(field string, limit int, descending bool) (cenCards []CentroidCardinality, err error) -======= +} + +// ----------------------------------------------------------------------------- // NestedReader is an extended index reader that supports hierarchical document structures. type NestedReader interface { IndexReader @@ -420,5 +421,4 @@ type NestedReader interface { // For nested documents, this method retrieves all parent documents in the hierarchy // leading up to the specified document ID. Ancestors(id IndexInternalID) ([]IndexInternalID, error) ->>>>>>> c63531c (cleanup) } From 05dcbde95e9c2c99caaab75f3edaf23f4a7faf71 Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Wed, 26 Nov 2025 20:19:30 +0530 Subject: [PATCH 10/18] new ancestorID --- index.go | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/index.go b/index.go index 489e762..f71c45c 100644 --- a/index.go +++ b/index.go @@ -419,6 +419,41 @@ type NestedReader interface { IndexReader // Ancestors returns the ancestral chain for a given document ID in the index. // For nested documents, this method retrieves all parent documents in the hierarchy - // leading up to the specified document ID. - Ancestors(id IndexInternalID) ([]IndexInternalID, error) + // leading up to the root document ID. + Ancestors(id IndexInternalID) ([]AncestorID, error) +} + +// AncestorID represents the identifier of an ancestor document in an ancestor chain. +type AncestorID uint64 + +// NewAncestorID creates a new AncestorID from the given uint64 value. +func NewAncestorID(val uint64) AncestorID { + return AncestorID(val) +} + +// Compare compares two AncestorID values. +func (a AncestorID) Compare(b AncestorID) int { + switch { + case a < b: + return -1 + case a > b: + return 1 + default: + return 0 + } +} + +// Equals checks if two AncestorID values are equal. +func (a AncestorID) Equals(b AncestorID) bool { + return a == b +} + +// Add returns a new AncestorID by adding the given uint64 value to the current AncestorID. +func (a AncestorID) Add(n uint64) AncestorID { + return AncestorID(uint64(a) + n) +} + +// ToIndexInternalID converts the AncestorID to an IndexInternalID. +func (a AncestorID) ToIndexInternalID() IndexInternalID { + return NewIndexInternalID(nil, uint64(a)) } From 9fa25f1e3b0d9abcc573b9dca29cd3fcdd20f4fd Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Thu, 27 Nov 2025 12:17:46 +0530 Subject: [PATCH 11/18] allow prealloc --- index.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.go b/index.go index f71c45c..e6ba49f 100644 --- a/index.go +++ b/index.go @@ -420,7 +420,7 @@ type NestedReader interface { // Ancestors returns the ancestral chain for a given document ID in the index. // For nested documents, this method retrieves all parent documents in the hierarchy // leading up to the root document ID. - Ancestors(id IndexInternalID) ([]AncestorID, error) + Ancestors(id IndexInternalID, prealloc []AncestorID) ([]AncestorID, error) } // AncestorID represents the identifier of an ancestor document in an ancestor chain. From 5b44c347212ca50f7c5ab68839caa84d50e55685 Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Fri, 28 Nov 2025 00:59:11 +0530 Subject: [PATCH 12/18] allow prealloc --- index.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/index.go b/index.go index e6ba49f..8a0841e 100644 --- a/index.go +++ b/index.go @@ -454,6 +454,6 @@ func (a AncestorID) Add(n uint64) AncestorID { } // ToIndexInternalID converts the AncestorID to an IndexInternalID. -func (a AncestorID) ToIndexInternalID() IndexInternalID { - return NewIndexInternalID(nil, uint64(a)) +func (a AncestorID) ToIndexInternalID(prealloc IndexInternalID) IndexInternalID { + return NewIndexInternalID(prealloc, uint64(a)) } From 5d6b24088dec86379469a48ebc0e18231de11b22 Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Fri, 28 Nov 2025 15:07:36 +0530 Subject: [PATCH 13/18] new api --- index.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/index.go b/index.go index 8a0841e..1284a29 100644 --- a/index.go +++ b/index.go @@ -202,6 +202,10 @@ func NewIndexInternalID(buf []byte, in uint64) IndexInternalID { return buf } +func NewIndexInternalIDFrom(buf IndexInternalID, other IndexInternalID) IndexInternalID { + return append(buf, other...) +} + func (id IndexInternalID) Equals(other IndexInternalID) bool { return id.Compare(other) == 0 } From bc76bfdde53fef8e764ac4b2c3553a33e8d3f479 Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Tue, 2 Dec 2025 07:19:13 +0530 Subject: [PATCH 14/18] add comments --- index.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/index.go b/index.go index 1284a29..6929535 100644 --- a/index.go +++ b/index.go @@ -190,6 +190,7 @@ func (tfv *TermFieldVector) Size() int { // IndexInternalID is an opaque document identifier interal to the index impl type IndexInternalID []byte +// NewIndexInternalID encodes a uint64 into an 8-byte big-endian ID, reusing `buf` when possible. func NewIndexInternalID(buf []byte, in uint64) IndexInternalID { if len(buf) != 8 { if cap(buf) >= 8 { @@ -202,18 +203,22 @@ func NewIndexInternalID(buf []byte, in uint64) IndexInternalID { return buf } +// NewIndexInternalIDFrom creates a new IndexInternalID by copying from `other`, reusing `buf` when possible. func NewIndexInternalIDFrom(buf IndexInternalID, other IndexInternalID) IndexInternalID { return append(buf, other...) } +// Equals checks if two IndexInternalID values are equal. func (id IndexInternalID) Equals(other IndexInternalID) bool { return id.Compare(other) == 0 } +// Compare compares two IndexInternalID values, inherently comparing the encoded uint64 values. func (id IndexInternalID) Compare(other IndexInternalID) int { return bytes.Compare(id, other) } +// Value returns the uint64 value encoded in the IndexInternalID. func (id IndexInternalID) Value() (uint64, error) { if len(id) != 8 { return 0, fmt.Errorf("wrong len for IndexInternalID: %q", id) From e9fea55419aa009e6946386a90217e60638f7e3f Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Tue, 2 Dec 2025 15:09:05 +0530 Subject: [PATCH 15/18] update workflows --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 496c91e..e0c425b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,7 +8,7 @@ jobs: test: strategy: matrix: - go-version: [1.20.x, 1.21.x, 1.22.x] + go-version: [1.23.x, 1.24.x, 1.25.x] platform: [ubuntu-latest, macos-latest, windows-latest] runs-on: ${{ matrix.platform }} steps: From 256717daf4a2971f9ef05d1b0dd8a07dc44789bc Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Tue, 2 Dec 2025 15:15:56 +0530 Subject: [PATCH 16/18] Update index.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- index.go | 1 + 1 file changed, 1 insertion(+) diff --git a/index.go b/index.go index 6929535..8ce0834 100644 --- a/index.go +++ b/index.go @@ -205,6 +205,7 @@ func NewIndexInternalID(buf []byte, in uint64) IndexInternalID { // NewIndexInternalIDFrom creates a new IndexInternalID by copying from `other`, reusing `buf` when possible. func NewIndexInternalIDFrom(buf IndexInternalID, other IndexInternalID) IndexInternalID { + buf = buf[:0] return append(buf, other...) } From e8d9c5935dbc7788b3b33c606ddc508065107895 Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Tue, 2 Dec 2025 15:16:15 +0530 Subject: [PATCH 17/18] Update index.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- index.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.go b/index.go index 8ce0834..005d721 100644 --- a/index.go +++ b/index.go @@ -187,7 +187,7 @@ func (tfv *TermFieldVector) Size() int { len(tfv.Field) + len(tfv.ArrayPositions)*sizeOfUint64 } -// IndexInternalID is an opaque document identifier interal to the index impl +// IndexInternalID is an opaque document identifier internal to the index impl type IndexInternalID []byte // NewIndexInternalID encodes a uint64 into an 8-byte big-endian ID, reusing `buf` when possible. From ae3c04eea837088d4076f29afb26920beade32f5 Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Tue, 2 Dec 2025 13:27:03 -0700 Subject: [PATCH 18/18] Update workflow actions --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e0c425b..e2cdef8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,7 @@ jobs: runs-on: ${{ matrix.platform }} steps: - name: Install Go - uses: actions/setup-go@v1 + uses: actions/setup-go@v5 with: go-version: ${{ matrix.go-version }} - name: Checkout code