Skip to content
Merged
13 changes: 10 additions & 3 deletions rust/cuvs/src/brute_force.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
//! Brute Force KNN
Expand Down Expand Up @@ -62,7 +62,7 @@ impl Index {
/// * `neighbors` - Matrix in device memory that receives the indices of the nearest neighbors
/// * `distances` - Matrix in device memory that receives the distances of the nearest neighbors
pub fn search(
self,
&self,
res: &Resources,
queries: &ManagedTensor,
neighbors: &ManagedTensor,
Expand All @@ -89,7 +89,7 @@ impl Index {
impl Drop for Index {
fn drop(&mut self) {
if let Err(e) = check_cuvs(unsafe { ffi::cuvsBruteForceIndexDestroy(self.0) }) {
write!(stderr(), "failed to call cagraIndexDestroy {:?}", e)
write!(stderr(), "failed to call bruteForceIndexDestroy {:?}", e)
.expect("failed to write to stderr");
}
}
Expand Down Expand Up @@ -172,4 +172,11 @@ mod tests {
fn test_l2() {
test_bfknn(DistanceType::L2Expanded);
}

// NOTE: brute_force multiple-search test is omitted here because the C++
// brute_force::index stores a non-owning view into the dataset. Building
// from device data via `build()` drops the ManagedTensor after the call,
// leaving a dangling pointer. A follow-up PR will add dataset lifetime
// enforcement (DatasetOwnership<'a>) to make this safe.
// See: https://github.com/rapidsai/cuvs/issues/1838
}
59 changes: 57 additions & 2 deletions rust/cuvs/src/cagra/index.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -59,7 +59,7 @@ impl Index {
/// * `neighbors` - Matrix in device memory that receives the indices of the nearest neighbors
/// * `distances` - Matrix in device memory that receives the distances of the nearest neighbors
pub fn search(
self,
&self,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch! thanks for fixing this

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem!

res: &Resources,
params: &SearchParams,
queries: &ManagedTensor,
Expand Down Expand Up @@ -167,4 +167,59 @@ mod tests {
.set_compression(CompressionParams::new().unwrap());
test_cagra(build_params);
}

/// Test that an index can be searched multiple times without rebuilding.
/// This validates that search() takes &self instead of self.
#[test]
fn test_cagra_multiple_searches() {
let res = Resources::new().unwrap();
let build_params = IndexParams::new().unwrap();

// Create a random dataset
let n_datapoints = 256;
let n_features = 16;
let dataset =
ndarray::Array::<f32, _>::random((n_datapoints, n_features), Uniform::new(0., 1.0));

// Build the index once
let index =
Index::build(&res, &build_params, &dataset).expect("failed to create cagra index");

let search_params = SearchParams::new().unwrap();
let k = 5;

// Perform multiple searches on the same index
for search_iter in 0..3 {
let n_queries = 4;
let queries = dataset.slice(s![0..n_queries, ..]);
let queries = ManagedTensor::from(&queries).to_device(&res).unwrap();

let mut neighbors_host = ndarray::Array::<u32, _>::zeros((n_queries, k));
let neighbors = ManagedTensor::from(&neighbors_host)
.to_device(&res)
.unwrap();

let mut distances_host = ndarray::Array::<f32, _>::zeros((n_queries, k));
let distances = ManagedTensor::from(&distances_host)
.to_device(&res)
.unwrap();

// This should work on every iteration because search() takes &self
index
.search(&res, &search_params, &queries, &neighbors, &distances)
.expect(&format!("search iteration {} failed", search_iter));

// Copy back to host memory
distances.to_host(&res, &mut distances_host).unwrap();
neighbors.to_host(&res, &mut neighbors_host).unwrap();

// Verify results are consistent across searches
assert_eq!(
neighbors_host[[0, 0]],
0,
"iteration {}: first query should find itself",
search_iter
);
}
}
}
61 changes: 59 additions & 2 deletions rust/cuvs/src/ivf_flat/index.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -59,7 +59,7 @@ impl Index {
/// * `neighbors` - Matrix in device memory that receives the indices of the nearest neighbors
/// * `distances` - Matrix in device memory that receives the distances of the nearest neighbors
pub fn search(
self,
&self,
res: &Resources,
params: &SearchParams,
queries: &ManagedTensor,
Expand Down Expand Up @@ -157,4 +157,61 @@ mod tests {
assert_eq!(neighbors_host[[2, 0]], 2);
assert_eq!(neighbors_host[[3, 0]], 3);
}

/// Test that an index can be searched multiple times without rebuilding.
/// This validates that search() takes &self instead of self.
#[test]
fn test_ivf_flat_multiple_searches() {
let build_params = IndexParams::new().unwrap().set_n_lists(64);
let res = Resources::new().unwrap();

// Create a random dataset
let n_datapoints = 1024;
let n_features = 16;
let dataset =
ndarray::Array::<f32, _>::random((n_datapoints, n_features), Uniform::new(0., 1.0));

let dataset_device = ManagedTensor::from(&dataset).to_device(&res).unwrap();

// Build the index once
let index = Index::build(&res, &build_params, dataset_device)
.expect("failed to create ivf-flat index");

let search_params = SearchParams::new().unwrap();
let k = 5;

// Perform multiple searches on the same index
for search_iter in 0..3 {
let n_queries = 4;
let queries = dataset.slice(s![0..n_queries, ..]);
let queries = ManagedTensor::from(&queries).to_device(&res).unwrap();

let mut neighbors_host = ndarray::Array::<i64, _>::zeros((n_queries, k));
let neighbors = ManagedTensor::from(&neighbors_host)
.to_device(&res)
.unwrap();

let mut distances_host = ndarray::Array::<f32, _>::zeros((n_queries, k));
let distances = ManagedTensor::from(&distances_host)
.to_device(&res)
.unwrap();

// This should work on every iteration because search() takes &self
index
.search(&res, &search_params, &queries, &neighbors, &distances)
.expect(&format!("search iteration {} failed", search_iter));

// Copy back to host memory
distances.to_host(&res, &mut distances_host).unwrap();
neighbors.to_host(&res, &mut neighbors_host).unwrap();

// Verify results are consistent
assert_eq!(
neighbors_host[[0, 0]],
0,
"iteration {}: first query should find itself",
search_iter
);
}
}
}
61 changes: 59 additions & 2 deletions rust/cuvs/src/ivf_pq/index.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -59,7 +59,7 @@ impl Index {
/// * `neighbors` - Matrix in device memory that receives the indices of the nearest neighbors
/// * `distances` - Matrix in device memory that receives the distances of the nearest neighbors
pub fn search(
self,
&self,
res: &Resources,
params: &SearchParams,
queries: &ManagedTensor,
Expand Down Expand Up @@ -151,4 +151,61 @@ mod tests {
assert_eq!(neighbors_host[[2, 0]], 2);
assert_eq!(neighbors_host[[3, 0]], 3);
}

/// Test that an index can be searched multiple times without rebuilding.
/// This validates that search() takes &self instead of self.
#[test]
fn test_ivf_pq_multiple_searches() {
let build_params = IndexParams::new().unwrap().set_n_lists(64);
let res = Resources::new().unwrap();

// Create a random dataset
let n_datapoints = 1024;
let n_features = 16;
let dataset =
ndarray::Array::<f32, _>::random((n_datapoints, n_features), Uniform::new(0., 1.0));

let dataset_device = ManagedTensor::from(&dataset).to_device(&res).unwrap();

// Build the index once
let index = Index::build(&res, &build_params, dataset_device)
.expect("failed to create ivf-pq index");

let search_params = SearchParams::new().unwrap();
let k = 5;

// Perform multiple searches on the same index
for search_iter in 0..3 {
let n_queries = 4;
let queries = dataset.slice(s![0..n_queries, ..]);
let queries = ManagedTensor::from(&queries).to_device(&res).unwrap();

let mut neighbors_host = ndarray::Array::<i64, _>::zeros((n_queries, k));
let neighbors = ManagedTensor::from(&neighbors_host)
.to_device(&res)
.unwrap();

let mut distances_host = ndarray::Array::<f32, _>::zeros((n_queries, k));
let distances = ManagedTensor::from(&distances_host)
.to_device(&res)
.unwrap();

// This should work on every iteration because search() takes &self
index
.search(&res, &search_params, &queries, &neighbors, &distances)
.expect(&format!("search iteration {} failed", search_iter));

// Copy back to host memory
distances.to_host(&res, &mut distances_host).unwrap();
neighbors.to_host(&res, &mut neighbors_host).unwrap();

// Verify results are consistent
assert_eq!(
neighbors_host[[0, 0]],
0,
"iteration {}: first query should find itself",
search_iter
);
}
}
}