diff --git a/rust/cuvs/src/brute_force.rs b/rust/cuvs/src/brute_force.rs index a0759ee49b..1440bb3205 100644 --- a/rust/cuvs/src/brute_force.rs +++ b/rust/cuvs/src/brute_force.rs @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ //! Brute Force KNN @@ -62,7 +62,7 @@ impl Index { /// * `neighbors` - Matrix in device memory that receives the indices of the nearest neighbors /// * `distances` - Matrix in device memory that receives the distances of the nearest neighbors pub fn search( - self, + &self, res: &Resources, queries: &ManagedTensor, neighbors: &ManagedTensor, @@ -89,7 +89,7 @@ impl Index { impl Drop for Index { fn drop(&mut self) { if let Err(e) = check_cuvs(unsafe { ffi::cuvsBruteForceIndexDestroy(self.0) }) { - write!(stderr(), "failed to call cagraIndexDestroy {:?}", e) + write!(stderr(), "failed to call bruteForceIndexDestroy {:?}", e) .expect("failed to write to stderr"); } } @@ -172,4 +172,11 @@ mod tests { fn test_l2() { test_bfknn(DistanceType::L2Expanded); } + + // NOTE: brute_force multiple-search test is omitted here because the C++ + // brute_force::index stores a non-owning view into the dataset. Building + // from device data via `build()` drops the ManagedTensor after the call, + // leaving a dangling pointer. A follow-up PR will add dataset lifetime + // enforcement (DatasetOwnership<'a>) to make this safe. + // See: https://github.com/rapidsai/cuvs/issues/1838 } diff --git a/rust/cuvs/src/cagra/index.rs b/rust/cuvs/src/cagra/index.rs index 42f55659bd..789f72b603 100644 --- a/rust/cuvs/src/cagra/index.rs +++ b/rust/cuvs/src/cagra/index.rs @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -59,7 +59,7 @@ impl Index { /// * `neighbors` - Matrix in device memory that receives the indices of the nearest neighbors /// * `distances` - Matrix in device memory that receives the distances of the nearest neighbors pub fn search( - self, + &self, res: &Resources, params: &SearchParams, queries: &ManagedTensor, @@ -167,4 +167,59 @@ mod tests { .set_compression(CompressionParams::new().unwrap()); test_cagra(build_params); } + + /// Test that an index can be searched multiple times without rebuilding. + /// This validates that search() takes &self instead of self. + #[test] + fn test_cagra_multiple_searches() { + let res = Resources::new().unwrap(); + let build_params = IndexParams::new().unwrap(); + + // Create a random dataset + let n_datapoints = 256; + let n_features = 16; + let dataset = + ndarray::Array::::random((n_datapoints, n_features), Uniform::new(0., 1.0)); + + // Build the index once + let index = + Index::build(&res, &build_params, &dataset).expect("failed to create cagra index"); + + let search_params = SearchParams::new().unwrap(); + let k = 5; + + // Perform multiple searches on the same index + for search_iter in 0..3 { + let n_queries = 4; + let queries = dataset.slice(s![0..n_queries, ..]); + let queries = ManagedTensor::from(&queries).to_device(&res).unwrap(); + + let mut neighbors_host = ndarray::Array::::zeros((n_queries, k)); + let neighbors = ManagedTensor::from(&neighbors_host) + .to_device(&res) + .unwrap(); + + let mut distances_host = ndarray::Array::::zeros((n_queries, k)); + let distances = ManagedTensor::from(&distances_host) + .to_device(&res) + .unwrap(); + + // This should work on every iteration because search() takes &self + index + .search(&res, &search_params, &queries, &neighbors, &distances) + .expect(&format!("search iteration {} failed", search_iter)); + + // Copy back to host memory + distances.to_host(&res, &mut distances_host).unwrap(); + neighbors.to_host(&res, &mut neighbors_host).unwrap(); + + // Verify results are consistent across searches + assert_eq!( + neighbors_host[[0, 0]], + 0, + "iteration {}: first query should find itself", + search_iter + ); + } + } } diff --git a/rust/cuvs/src/ivf_flat/index.rs b/rust/cuvs/src/ivf_flat/index.rs index fa630a917c..c38be828de 100644 --- a/rust/cuvs/src/ivf_flat/index.rs +++ b/rust/cuvs/src/ivf_flat/index.rs @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -59,7 +59,7 @@ impl Index { /// * `neighbors` - Matrix in device memory that receives the indices of the nearest neighbors /// * `distances` - Matrix in device memory that receives the distances of the nearest neighbors pub fn search( - self, + &self, res: &Resources, params: &SearchParams, queries: &ManagedTensor, @@ -157,4 +157,61 @@ mod tests { assert_eq!(neighbors_host[[2, 0]], 2); assert_eq!(neighbors_host[[3, 0]], 3); } + + /// Test that an index can be searched multiple times without rebuilding. + /// This validates that search() takes &self instead of self. + #[test] + fn test_ivf_flat_multiple_searches() { + let build_params = IndexParams::new().unwrap().set_n_lists(64); + let res = Resources::new().unwrap(); + + // Create a random dataset + let n_datapoints = 1024; + let n_features = 16; + let dataset = + ndarray::Array::::random((n_datapoints, n_features), Uniform::new(0., 1.0)); + + let dataset_device = ManagedTensor::from(&dataset).to_device(&res).unwrap(); + + // Build the index once + let index = Index::build(&res, &build_params, dataset_device) + .expect("failed to create ivf-flat index"); + + let search_params = SearchParams::new().unwrap(); + let k = 5; + + // Perform multiple searches on the same index + for search_iter in 0..3 { + let n_queries = 4; + let queries = dataset.slice(s![0..n_queries, ..]); + let queries = ManagedTensor::from(&queries).to_device(&res).unwrap(); + + let mut neighbors_host = ndarray::Array::::zeros((n_queries, k)); + let neighbors = ManagedTensor::from(&neighbors_host) + .to_device(&res) + .unwrap(); + + let mut distances_host = ndarray::Array::::zeros((n_queries, k)); + let distances = ManagedTensor::from(&distances_host) + .to_device(&res) + .unwrap(); + + // This should work on every iteration because search() takes &self + index + .search(&res, &search_params, &queries, &neighbors, &distances) + .expect(&format!("search iteration {} failed", search_iter)); + + // Copy back to host memory + distances.to_host(&res, &mut distances_host).unwrap(); + neighbors.to_host(&res, &mut neighbors_host).unwrap(); + + // Verify results are consistent + assert_eq!( + neighbors_host[[0, 0]], + 0, + "iteration {}: first query should find itself", + search_iter + ); + } + } } diff --git a/rust/cuvs/src/ivf_pq/index.rs b/rust/cuvs/src/ivf_pq/index.rs index 3a66b3d457..f61e3b771a 100644 --- a/rust/cuvs/src/ivf_pq/index.rs +++ b/rust/cuvs/src/ivf_pq/index.rs @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -59,7 +59,7 @@ impl Index { /// * `neighbors` - Matrix in device memory that receives the indices of the nearest neighbors /// * `distances` - Matrix in device memory that receives the distances of the nearest neighbors pub fn search( - self, + &self, res: &Resources, params: &SearchParams, queries: &ManagedTensor, @@ -151,4 +151,61 @@ mod tests { assert_eq!(neighbors_host[[2, 0]], 2); assert_eq!(neighbors_host[[3, 0]], 3); } + + /// Test that an index can be searched multiple times without rebuilding. + /// This validates that search() takes &self instead of self. + #[test] + fn test_ivf_pq_multiple_searches() { + let build_params = IndexParams::new().unwrap().set_n_lists(64); + let res = Resources::new().unwrap(); + + // Create a random dataset + let n_datapoints = 1024; + let n_features = 16; + let dataset = + ndarray::Array::::random((n_datapoints, n_features), Uniform::new(0., 1.0)); + + let dataset_device = ManagedTensor::from(&dataset).to_device(&res).unwrap(); + + // Build the index once + let index = Index::build(&res, &build_params, dataset_device) + .expect("failed to create ivf-pq index"); + + let search_params = SearchParams::new().unwrap(); + let k = 5; + + // Perform multiple searches on the same index + for search_iter in 0..3 { + let n_queries = 4; + let queries = dataset.slice(s![0..n_queries, ..]); + let queries = ManagedTensor::from(&queries).to_device(&res).unwrap(); + + let mut neighbors_host = ndarray::Array::::zeros((n_queries, k)); + let neighbors = ManagedTensor::from(&neighbors_host) + .to_device(&res) + .unwrap(); + + let mut distances_host = ndarray::Array::::zeros((n_queries, k)); + let distances = ManagedTensor::from(&distances_host) + .to_device(&res) + .unwrap(); + + // This should work on every iteration because search() takes &self + index + .search(&res, &search_params, &queries, &neighbors, &distances) + .expect(&format!("search iteration {} failed", search_iter)); + + // Copy back to host memory + distances.to_host(&res, &mut distances_host).unwrap(); + neighbors.to_host(&res, &mut neighbors_host).unwrap(); + + // Verify results are consistent + assert_eq!( + neighbors_host[[0, 0]], + 0, + "iteration {}: first query should find itself", + search_iter + ); + } + } }