Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion cpp/include/cuvs/neighbors/cagra.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <cuvs/core/c_api.h>
#include <cuvs/distance/distance.h>
#include <cuvs/neighbors/common.h>
#include <dlpack/dlpack.h>
#include <stdbool.h>
#include <stdint.h>
Expand Down Expand Up @@ -388,13 +389,16 @@ cuvsError_t cuvsCagraBuild(cuvsResources_t res,
* @param[in] queries DLManagedTensor* queries dataset to search
* @param[out] neighbors DLManagedTensor* output `k` neighbors for queries
* @param[out] distances DLManagedTensor* output `k` distances for queries
* @param[in] filter cuvsFilter input filter that can be used
to filter queries and neighbors based on the given bitset.
*/
cuvsError_t cuvsCagraSearch(cuvsResources_t res,
cuvsCagraSearchParams_t params,
cuvsCagraIndex_t index,
DLManagedTensor* queries,
DLManagedTensor* neighbors,
DLManagedTensor* distances);
DLManagedTensor* distances,
cuvsFilter filter);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This API change needs to be propagated to:

  • the python package
  • the example C project (cuvs/example/c)
  • probably the rust package

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done👍


/**
* @}
Expand Down
38 changes: 31 additions & 7 deletions cpp/src/neighbors/cagra_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <cuvs/core/interop.hpp>
#include <cuvs/neighbors/cagra.h>
#include <cuvs/neighbors/cagra.hpp>
#include <cuvs/neighbors/common.h>

#include <fstream>

Expand Down Expand Up @@ -92,7 +93,8 @@ void _search(cuvsResources_t res,
cuvsCagraIndex index,
DLManagedTensor* queries_tensor,
DLManagedTensor* neighbors_tensor,
DLManagedTensor* distances_tensor)
DLManagedTensor* distances_tensor,
cuvsFilter filter)
{
auto res_ptr = reinterpret_cast<raft::resources*>(res);
auto index_ptr = reinterpret_cast<cuvs::neighbors::cagra::index<T, uint32_t>*>(index.addr);
Expand All @@ -118,8 +120,26 @@ void _search(cuvsResources_t res,
auto queries_mds = cuvs::core::from_dlpack<queries_mdspan_type>(queries_tensor);
auto neighbors_mds = cuvs::core::from_dlpack<neighbors_mdspan_type>(neighbors_tensor);
auto distances_mds = cuvs::core::from_dlpack<distances_mdspan_type>(distances_tensor);
cuvs::neighbors::cagra::search(
*res_ptr, search_params, *index_ptr, queries_mds, neighbors_mds, distances_mds);
if (filter.type == NO_FILTER) {
cuvs::neighbors::cagra::search(
*res_ptr, search_params, *index_ptr, queries_mds, neighbors_mds, distances_mds);
} else if (filter.type == BITSET) {
using filter_mdspan_type = raft::device_vector_view<std::uint32_t, int64_t, raft::row_major>;
Copy link
Copy Markdown
Contributor Author

@ajit283 ajit283 Nov 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the build fails because of the std::uint32_t as the first type argument. The type arguments should be <index_t, index_t>, see raft bitset.hpp:

template <typename bitset_t = uint32_t, typename index_t = uint32_t>
struct bitset {
  static constexpr index_t bitset_element_size = sizeof(bitset_t) * 8;

  /**
   * @brief Construct a new bitset object with a list of indices to unset.
   *
   * @param res RAFT resources
   * @param mask_index List of indices to unset in the bitset
   * @param bitset_len Length of the bitset
   * @param default_value Default value to set the bits to. Default is true.
   */
  bitset(const raft::resources& res,
         raft::device_vector_view<const index_t, index_t> mask_index,
         index_t bitset_len,
         bool default_value = true);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm actually I missed this. The function you are using here is creating a bitset from a list of indices and I don't think it is the workflow that we expect.
The C++ function accepts a bitset_view, not a bitset, so at this point the memory for the bitset should already allocated and we just need to transfer the pointer and the length of the bitset. The C function should also assume that the filter given in input is a bitset already allocated and filled, instead of a list of neighbors to filter. So the filter taken as a parameter in this function should be manipulated as a bitset_view object.

auto removed_indices_tensor = reinterpret_cast<DLManagedTensor*>(filter.addr);
auto removed_indices = cuvs::core::from_dlpack<filter_mdspan_type>(removed_indices_tensor);
cuvs::core::bitset_view<std::uint32_t, int64_t> removed_indices_bitset(
removed_indices, index_ptr->dataset().extent(0));
auto bitset_filter_obj = cuvs::neighbors::filtering::bitset_filter(removed_indices_bitset);
cuvs::neighbors::cagra::search(*res_ptr,
search_params,
*index_ptr,
queries_mds,
neighbors_mds,
distances_mds,
bitset_filter_obj);
} else {
RAFT_FAIL("Unsupported filter type: BITMAP");
}
}

template <typename T>
Expand Down Expand Up @@ -214,7 +234,8 @@ extern "C" cuvsError_t cuvsCagraSearch(cuvsResources_t res,
cuvsCagraIndex_t index_c_ptr,
DLManagedTensor* queries_tensor,
DLManagedTensor* neighbors_tensor,
DLManagedTensor* distances_tensor)
DLManagedTensor* distances_tensor,
cuvsFilter filter)
{
return cuvs::core::translate_exceptions([=] {
auto queries = queries_tensor->dl_tensor;
Expand All @@ -237,11 +258,14 @@ extern "C" cuvsError_t cuvsCagraSearch(cuvsResources_t res,
RAFT_EXPECTS(queries.dtype.code == index.dtype.code, "type mismatch between index and queries");

if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) {
_search<float>(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor);
_search<float>(
res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter);
} else if (queries.dtype.code == kDLInt && queries.dtype.bits == 8) {
_search<int8_t>(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor);
_search<int8_t>(
res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter);
} else if (queries.dtype.code == kDLUInt && queries.dtype.bits == 8) {
_search<uint8_t>(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor);
_search<uint8_t>(
res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter);
} else {
RAFT_FAIL("Unsupported queries DLtensor dtype: %d and bits: %d",
queries.dtype.code,
Expand Down
122 changes: 121 additions & 1 deletion cpp/test/neighbors/ann_cagra_c.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,14 @@ float queries[4][2] = {{0.48216683, 0.0428398},
{0.51260436, 0.2643005},
{0.05198065, 0.5789965}};

uint32_t filter[1] = {0b1001}; // index 1 and 2 are removed

uint32_t neighbors_exp[4] = {3, 0, 3, 1};
float distances_exp[4] = {0.03878258, 0.12472608, 0.04776672, 0.15224178};

uint32_t neighbors_exp_filtered[4] = {3, 0, 3, 0};
float distances_exp_filtered[4] = {0.03878258, 0.12472608, 0.04776672, 0.59063464};

TEST(CagraC, BuildSearch)
{
// create cuvsResources_t
Expand Down Expand Up @@ -109,10 +114,15 @@ TEST(CagraC, BuildSearch)
distances_tensor.dl_tensor.shape = distances_shape;
distances_tensor.dl_tensor.strides = nullptr;

cuvsFilter filter;
filter.type = NO_FILTER;
filter.addr = (uintptr_t)NULL;

// search index
cuvsCagraSearchParams_t search_params;
cuvsCagraSearchParamsCreate(&search_params);
cuvsCagraSearch(res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor);
cuvsCagraSearch(
res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor, filter);

// verify output
ASSERT_TRUE(
Expand All @@ -126,3 +136,113 @@ TEST(CagraC, BuildSearch)
cuvsCagraIndexDestroy(index);
cuvsResourcesDestroy(res);
}

TEST(CagraC, BuildSearchFiltered)
{
// create cuvsResources_t
cuvsResources_t res;
cuvsResourcesCreate(&res);
cudaStream_t stream;
cuvsStreamGet(res, &stream);

// create dataset DLTensor
DLManagedTensor dataset_tensor;
dataset_tensor.dl_tensor.data = dataset;
dataset_tensor.dl_tensor.device.device_type = kDLCPU;
dataset_tensor.dl_tensor.ndim = 2;
dataset_tensor.dl_tensor.dtype.code = kDLFloat;
dataset_tensor.dl_tensor.dtype.bits = 32;
dataset_tensor.dl_tensor.dtype.lanes = 1;
int64_t dataset_shape[2] = {4, 2};
dataset_tensor.dl_tensor.shape = dataset_shape;
dataset_tensor.dl_tensor.strides = nullptr;

// create index
cuvsCagraIndex_t index;
cuvsCagraIndexCreate(&index);

// build index
cuvsCagraIndexParams_t build_params;
cuvsCagraIndexParamsCreate(&build_params);
cuvsCagraBuild(res, build_params, &dataset_tensor, index);

// create queries DLTensor
rmm::device_uvector<float> queries_d(4 * 2, stream);
raft::copy(queries_d.data(), (float*)queries, 4 * 2, stream);

DLManagedTensor queries_tensor;
queries_tensor.dl_tensor.data = queries_d.data();
queries_tensor.dl_tensor.device.device_type = kDLCUDA;
queries_tensor.dl_tensor.ndim = 2;
queries_tensor.dl_tensor.dtype.code = kDLFloat;
queries_tensor.dl_tensor.dtype.bits = 32;
queries_tensor.dl_tensor.dtype.lanes = 1;
int64_t queries_shape[2] = {4, 2};
queries_tensor.dl_tensor.shape = queries_shape;
queries_tensor.dl_tensor.strides = nullptr;

// create neighbors DLTensor
rmm::device_uvector<uint32_t> neighbors_d(4, stream);

DLManagedTensor neighbors_tensor;
neighbors_tensor.dl_tensor.data = neighbors_d.data();
neighbors_tensor.dl_tensor.device.device_type = kDLCUDA;
neighbors_tensor.dl_tensor.ndim = 2;
neighbors_tensor.dl_tensor.dtype.code = kDLUInt;
neighbors_tensor.dl_tensor.dtype.bits = 32;
neighbors_tensor.dl_tensor.dtype.lanes = 1;
int64_t neighbors_shape[2] = {4, 1};
neighbors_tensor.dl_tensor.shape = neighbors_shape;
neighbors_tensor.dl_tensor.strides = nullptr;

// create distances DLTensor
rmm::device_uvector<float> distances_d(4, stream);

DLManagedTensor distances_tensor;
distances_tensor.dl_tensor.data = distances_d.data();
distances_tensor.dl_tensor.device.device_type = kDLCUDA;
distances_tensor.dl_tensor.ndim = 2;
distances_tensor.dl_tensor.dtype.code = kDLFloat;
distances_tensor.dl_tensor.dtype.bits = 32;
distances_tensor.dl_tensor.dtype.lanes = 1;
int64_t distances_shape[2] = {4, 1};
distances_tensor.dl_tensor.shape = distances_shape;
distances_tensor.dl_tensor.strides = nullptr;

// create filter DLTensor
rmm::device_uvector<uint32_t> filter_d(1, stream);
raft::copy(filter_d.data(), filter, 1, stream);

cuvsFilter filter;

DLManagedTensor filter_tensor;
filter_tensor.dl_tensor.data = filter_d.data();
filter_tensor.dl_tensor.device.device_type = kDLCUDA;
filter_tensor.dl_tensor.ndim = 1;
filter_tensor.dl_tensor.dtype.code = kDLUInt;
filter_tensor.dl_tensor.dtype.bits = 32;
filter_tensor.dl_tensor.dtype.lanes = 1;
int64_t filter_shape[1] = {1};
filter_tensor.dl_tensor.shape = filter_shape;
filter_tensor.dl_tensor.strides = nullptr;

filter.type = BITSET;
filter.addr = (uintptr_t)&filter_tensor;

// search index
cuvsCagraSearchParams_t search_params;
cuvsCagraSearchParamsCreate(&search_params);
cuvsCagraSearch(
res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor, filter);
// verify output
ASSERT_TRUE(cuvs::devArrMatchHost(
neighbors_exp_filtered, neighbors_d.data(), 4, cuvs::Compare<uint32_t>()));
ASSERT_TRUE(cuvs::devArrMatchHost(
distances_exp_filtered, distances_d.data(), 4, cuvs::CompareApprox<float>(0.001f)));

// de-allocate index and res
cuvsCagraSearchParamsDestroy(search_params);
cuvsCagraIndexParamsDestroy(build_params);
cuvsCagraIndexDestroy(index);
cuvsResourcesDestroy(res);
}
12 changes: 8 additions & 4 deletions examples/c/src/cagra_c_example.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ void cagra_build_search_simple() {
// Allocate memory for `queries`, `neighbors` and `distances` output
uint32_t *neighbors;
float *distances, *queries_d;
cuvsRMMAlloc(res, (void**) &queries_d, sizeof(float) * n_queries * n_cols);
cuvsRMMAlloc(res, (void**) &neighbors, sizeof(uint32_t) * n_queries * topk);
cuvsRMMAlloc(res, (void**) &distances, sizeof(float) * n_queries * topk);
cuvsRMMAlloc(res, (void **)&queries_d, sizeof(float) * n_queries * n_cols);
cuvsRMMAlloc(res, (void **)&neighbors, sizeof(uint32_t) * n_queries * topk);
cuvsRMMAlloc(res, (void **)&distances, sizeof(float) * n_queries * topk);

// Use DLPack to represent `queries`, `neighbors` and `distances` as tensors
cudaMemcpy(queries_d, queries, sizeof(float) * 4 * 2, cudaMemcpyDefault);
Expand Down Expand Up @@ -111,8 +111,12 @@ void cagra_build_search_simple() {
cuvsCagraSearchParams_t search_params;
cuvsCagraSearchParamsCreate(&search_params);

cuvsFilter filter;
filter.type = NO_FILTER;
filter.addr = (uintptr_t)NULL;

cuvsCagraSearch(res, search_params, index, &queries_tensor, &neighbors_tensor,
&distances_tensor);
&distances_tensor, filter);

// print results
uint32_t *neighbors_h =
Expand Down
4 changes: 3 additions & 1 deletion python/cuvs/cuvs/neighbors/cagra/cagra.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ from libcpp cimport bool
from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
from cuvs.distance_type cimport cuvsDistanceType
from cuvs.neighbors.filters.filters cimport cuvsFilter


cdef extern from "cuvs/neighbors/cagra.h" nogil:
Expand Down Expand Up @@ -115,7 +116,8 @@ cdef extern from "cuvs/neighbors/cagra.h" nogil:
cuvsCagraIndex_t index,
DLManagedTensor* queries,
DLManagedTensor* neighbors,
DLManagedTensor* distances) except +
DLManagedTensor* distances,
cuvsFilter filter) except +

cuvsError_t cuvsCagraSerialize(cuvsResources_t res,
const char * filename,
Expand Down
13 changes: 11 additions & 2 deletions python/cuvs/cuvs/neighbors/cagra/cagra.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ from libc.stdint cimport (
)

from cuvs.common.exceptions import check_cuvs
from cuvs.neighbors.filters import no_filter


cdef class CompressionParams:
Expand Down Expand Up @@ -484,7 +485,8 @@ def search(SearchParams search_params,
k,
neighbors=None,
distances=None,
resources=None):
resources=None,
filter=None):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add this parameter to the python documentation

"""
Find the k nearest neighbors for each query.

Expand All @@ -503,6 +505,9 @@ def search(SearchParams search_params,
distances : Optional CUDA array interface compliant matrix shape
(n_queries, k) If supplied, the distances to the
neighbors will be written here in-place. (default None)
filter: Optional cuvs.neighbors.cuvsFilter can be used to filter
neighbors based on a given bitset.
(default None)
{resources_docstring}

Examples
Expand Down Expand Up @@ -557,6 +562,9 @@ def search(SearchParams search_params,
_check_input_array(distances_cai, [np.dtype('float32')],
exp_rows=n_queries, exp_cols=k)

if filter is None:
filter = no_filter()

cdef cuvsCagraSearchParams* params = &search_params.params
cdef cydlpack.DLManagedTensor* queries_dlpack = \
cydlpack.dlpack_c(queries_cai)
Expand All @@ -573,7 +581,8 @@ def search(SearchParams search_params,
index.index,
queries_dlpack,
neighbors_dlpack,
distances_dlpack
distances_dlpack,
filter.prefilter
))

return (distances, neighbors)
Expand Down
4 changes: 2 additions & 2 deletions python/cuvs/cuvs/neighbors/filters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# limitations under the License.


from .filters import Prefilter, from_bitmap, no_filter
from .filters import Prefilter, from_bitmap, from_bitset, no_filter

__all__ = ["no_filter", "from_bitmap", "Prefilter"]
__all__ = ["no_filter", "from_bitmap", "from_bitset", "Prefilter"]
Loading