diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index 9b9e8eb0e6..7c07dcfbb4 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -437,7 +437,187 @@ struct index : cuvs::neighbors::index { { } + /** Construct an index from strided_dataset and knn_graph + * + * The strided_dataset must already satisfy the alignment requirements (16-byte aligned). + * Use make_strided_dataset_zerocopy() or make_strided_dataset_owning() to create a properly + * aligned strided_dataset. + * + * The index stores a non-owning reference to the dataset. It is the caller's responsibility + * to ensure that the dataset stays alive as long as the index. + * + * Usage examples: + * + * - Cagra index is normally created by the cagra::build + * @code{.cpp} + * using namespace cuvs::neighbors; + * auto dataset_matrix = raft::make_host_matrix(n_rows, n_cols); + * load_dataset(dataset_matrix.view()); + * // Create properly aligned dataset + * auto dataset = make_strided_dataset_owning(res, dataset_matrix.view()); + * // use default index parameters + * cagra::index_params index_params; + * // create and fill the index from the strided dataset + * auto index = cagra::build(res, index_params, *dataset); + * // use default search parameters + * cagra::search_params search_params; + * // search K nearest neighbours + * auto neighbors = raft::make_device_matrix(res, n_queries, k); + * auto distances = raft::make_device_matrix(res, n_queries, k); + * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); + * @endcode + * + * - Constructing index using existing knn-graph + * @code{.cpp} + * using namespace cuvs::neighbors; + * + * auto dataset_matrix = raft::make_device_matrix(res, n_rows, n_cols); + * auto knn_graph = raft::make_device_matrix(res, n_rows, graph_degree); + * + * // custom loading and graph creation + * // load_dataset(dataset_matrix.view()); + * // create_knn_graph(knn_graph.view()); + * + * // Create strided dataset (zero-copy if aligned, throws if not) + * auto dataset = make_strided_dataset_zerocopy(dataset_matrix.view()); + * + * // Wrap the existing strided dataset into an index structure + * cagra::index index(res, metric, *dataset, + * raft::make_const_mdspan(knn_graph.view())); + * + * // Both dataset and knn_graph objects have to be in scope while the index is used because + * // the index only stores a reference to these. + * cagra::search(res, search_params, index, queries, neighbors, distances); + * @endcode + */ + template + index(raft::resources const& res, + cuvs::distance::DistanceType metric, + strided_dataset const& dataset, + raft::mdspan, + raft::row_major, + graph_accessor> knn_graph) + : cuvs::neighbors::index(), + metric_(metric), + graph_(raft::make_device_matrix(res, 0, 0)), + dataset_(new non_owning_dataset(dataset.view())), + dataset_norms_(std::nullopt) + { + RAFT_EXPECTS(dataset.n_rows() == knn_graph.extent(0), + "Dataset and knn_graph must have equal number of rows"); + + // Validate stride alignment (16-byte alignment) + constexpr uint32_t kAlignBytes = 16; + RAFT_EXPECTS((dataset.stride() * sizeof(T)) % kAlignBytes == 0, + "Dataset stride must satisfy 16-byte alignment (stride * sizeof(T) must be " + "divisible by 16). Use make_strided_dataset_zerocopy() or " + "make_strided_dataset_owning() to create a properly aligned dataset."); + + update_graph(res, knn_graph); + + if (metric_ == cuvs::distance::DistanceType::CosineExpanded) { + if (dataset.n_rows() > 0) { compute_dataset_norms_(res); } + } + + raft::resource::sync_stream(res); + } + + /** Construct an index from strided_dataset and knn_graph (taking ownership of the dataset object) + * + * The strided_dataset must already satisfy the alignment requirements (16-byte aligned). + * Use make_strided_dataset_zerocopy() or make_strided_dataset_owning() to create a properly + * aligned strided_dataset. + * + * The index takes ownership of the strided_dataset *object* via std::unique_ptr. The caller + * transfers ownership using std::move(). Note that: + * - If the dataset is an owning_dataset, the index owns both the dataset object and the data. + * - If the dataset is a non_owning_dataset, the index owns the dataset object (the view), but + * the caller must still ensure the underlying data remains valid for the index lifetime. + * + * Usage examples: + * + * - Cagra index with fully owned dataset (owns both object and data) + * @code{.cpp} + * using namespace cuvs::neighbors; + * auto dataset_matrix = raft::make_host_matrix(n_rows, n_cols); + * load_dataset(dataset_matrix.view()); + * // Create owning dataset (always copies for alignment) + * auto dataset = make_strided_dataset_owning(res, dataset_matrix.view()); + * auto knn_graph = raft::make_device_matrix(res, n_rows, graph_degree); + * // create_knn_graph(knn_graph.view()); + * // Transfer ownership to the index - index owns everything + * cagra::index index(res, metric, std::move(dataset), + * raft::make_const_mdspan(knn_graph.view())); + * // dataset is now moved-from; dataset_matrix can go out of scope + * cagra::search(res, search_params, index, queries, neighbors, distances); + * @endcode + * + * - Cagra index with non-owning dataset (owns view object but not underlying data) + * @code{.cpp} + * using namespace cuvs::neighbors; + * + * auto dataset_matrix = raft::make_device_matrix(res, n_rows, n_cols); + * auto knn_graph = raft::make_device_matrix(res, n_rows, graph_degree); + * + * // custom loading and graph creation + * // load_dataset(dataset_matrix.view()); + * // create_knn_graph(knn_graph.view()); + * + * // Create non-owning dataset (zero-copy) + * auto dataset = make_strided_dataset_zerocopy(dataset_matrix.view()); + * + * // Transfer ownership of the dataset object to the index + * cagra::index index(res, metric, std::move(dataset), + * raft::make_const_mdspan(knn_graph.view())); + * + * // dataset is now moved-from + * // dataset_matrix and knn_graph must remain in scope while the index is used + * cagra::search(res, search_params, index, queries, neighbors, distances); + * @endcode + */ + template + index(raft::resources const& res, + cuvs::distance::DistanceType metric, + std::unique_ptr> dataset, + raft::mdspan, + raft::row_major, + graph_accessor> knn_graph) + : cuvs::neighbors::index(), + metric_(metric), + graph_(raft::make_device_matrix(res, 0, 0)), + dataset_(std::move(dataset)), + dataset_norms_(std::nullopt) + { + RAFT_EXPECTS(dataset_->n_rows() == knn_graph.extent(0), + "Dataset and knn_graph must have equal number of rows"); + + // Validate stride alignment (16-byte alignment) + constexpr uint32_t kAlignBytes = 16; + auto p = dynamic_cast*>(dataset_.get()); + if (p) { + RAFT_EXPECTS((p->stride() * sizeof(T)) % kAlignBytes == 0, + "Dataset stride must satisfy 16-byte alignment (stride * sizeof(T) must be " + "divisible by 16). Use make_strided_dataset_zerocopy() or " + "make_strided_dataset_owning() to create a properly aligned dataset."); + } + + update_graph(res, knn_graph); + + if (metric_ == cuvs::distance::DistanceType::CosineExpanded) { + if (p && p->n_rows() > 0) { compute_dataset_norms_(res); } + } + + raft::resource::sync_stream(res); + } + /** Construct an index from dataset and knn_graph arrays + * + * @deprecated This constructor may implicitly copy the dataset if not 16-byte aligned. + * Use the constructor that accepts strided_dataset for explicit control over copying. + * Create a strided_dataset using make_strided_dataset_zerocopy() or + * make_strided_dataset_owning() first. * * If the dataset and graph is already in GPU memory, then the index is just a thin wrapper around * these that stores a non-owning a reference to the arrays. @@ -519,9 +699,42 @@ struct index : cuvs::neighbors::index { raft::resource::sync_stream(res); } + /** + * Replace the dataset with a new strided_dataset (non-owning). + * + * The strided_dataset must already satisfy the alignment requirements (16-byte aligned). + * Use make_strided_dataset_zerocopy() or make_strided_dataset_owning() to create a properly + * aligned strided_dataset. + * + * The index stores only a reference to the dataset. It is the caller's responsibility to ensure + * that dataset stays alive as long as the index. It is expected that the same set of vectors are + * used for update_dataset and index build. + * + * Note: This will clear any precomputed dataset norms. + */ + void update_dataset(raft::resources const& res, strided_dataset const& dataset) + { + // Validate stride alignment (16-byte alignment) + constexpr uint32_t kAlignBytes = 16; + RAFT_EXPECTS((dataset.stride() * sizeof(T)) % kAlignBytes == 0, + "Dataset stride must satisfy 16-byte alignment (stride * sizeof(T) must be " + "divisible by 16). Use make_strided_dataset_zerocopy() or " + "make_strided_dataset_owning() to create a properly aligned dataset."); + + dataset_ = std::make_unique>(dataset.view()); + dataset_norms_.reset(); + + if (metric() == cuvs::distance::DistanceType::CosineExpanded) { + if (dataset.n_rows() > 0) { compute_dataset_norms_(res); } + } + } + /** * Replace the dataset with a new dataset. * + * @deprecated Use update_dataset(raft::resources const& res, strided_dataset const& + * dataset) instead. + * * If the new dataset rows are aligned on 16 bytes, then only a reference is stored to the * dataset. It is the caller's responsibility to ensure that dataset stays alive as long as the * index. It is expected that the same set of vectors are used for update_dataset and index build. @@ -539,7 +752,13 @@ struct index : cuvs::neighbors::index { } } - /** Set the dataset reference explicitly to a device matrix view with padding. */ + /** + * Set the dataset reference explicitly to a device matrix view with padding. + * + * @deprecated Use update_dataset(raft::resources const& res, strided_dataset const& + * dataset) instead. + * + */ void update_dataset(raft::resources const& res, raft::device_matrix_view dataset) { @@ -835,6 +1054,45 @@ struct index : cuvs::neighbors::index { * Usage example: * @code{.cpp} * using namespace cuvs::neighbors; + * // Create a properly aligned dataset + * auto dataset_matrix = raft::make_device_matrix(res, n_rows, dim); + * // load data into dataset_matrix... + * auto dataset = make_strided_dataset_owning(res, dataset_matrix.view()); + * // use default index parameters + * cagra::index_params index_params; + * // create and fill the index from the strided dataset + * auto index = cagra::build(res, index_params, *dataset); + * @endcode + * + * @param[in] res + * @param[in] params parameters for building the index + * @param[in] dataset a strided dataset + * + * @return the constructed cagra index + */ +auto build(raft::resources const& res, + const cuvs::neighbors::cagra::index_params& params, + strided_dataset const& dataset) + -> cuvs::neighbors::cagra::index; + +/** + * @brief Build the index from the dataset for efficient search. + * + * @deprecated Use build(raft::resources const& res, const cuvs::neighbors::cagra::index_params& + * params, strided_dataset const& dataset) instead. + * + * The build consist of two steps: build an intermediate knn-graph, and optimize it to + * create the final graph. The index_params struct controls the node degree of these + * graphs. + * + * The following distance metrics are supported: + * - L2 + * - InnerProduct (currently only supported with IVF-PQ as the build algorithm) + * - CosineExpanded + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; * // use default index parameters * cagra::index_params index_params; * // create and fill the index from a [N, D] dataset @@ -906,6 +1164,45 @@ auto build(raft::resources const& res, * The following distance metrics are supported: * - L2 * - InnerProduct (currently only supported with IVF-PQ as the build algorithm) + * - CosineExpanded + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // Create a properly aligned dataset + * auto dataset_matrix = raft::make_device_matrix(res, n_rows, dim); + * // load data into dataset_matrix... + * auto dataset = make_strided_dataset_owning(res, dataset_matrix.view()); + * // use default index parameters + * cagra::index_params index_params; + * // create and fill the index from the strided dataset + * auto index = cagra::build(res, index_params, *dataset); + * @endcode + * + * @param[in] res + * @param[in] params parameters for building the index + * @param[in] dataset a strided dataset + * + * @return the constructed cagra index + */ +auto build(raft::resources const& res, + const cuvs::neighbors::cagra::index_params& params, + strided_dataset const& dataset) + -> cuvs::neighbors::cagra::index; + +/** + * @brief Build the index from the dataset for efficient search. + * + * @deprecated Use build(raft::resources const& res, const cuvs::neighbors::cagra::index_params& + * params, strided_dataset const& dataset) instead. + * + * The build consist of two steps: build an intermediate knn-graph, and optimize it to + * create the final graph. The index_params struct controls the node degree of these + * graphs. + * + * The following distance metrics are supported: + * - L2 + * - InnerProduct (currently only supported with IVF-PQ as the build algorithm) * - CosineExpanded (dataset norms are computed as float regardless of input data type) * * Usage example: @@ -980,6 +1277,45 @@ auto build(raft::resources const& res, * * The following distance metrics are supported: * - L2 + * - InnerProduct (currently only supported with IVF-PQ as the build algorithm) + * - CosineExpanded + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // Create a properly aligned dataset + * auto dataset_matrix = raft::make_device_matrix(res, n_rows, dim); + * // load data into dataset_matrix... + * auto dataset = make_strided_dataset_owning(res, dataset_matrix.view()); + * // use default index parameters + * cagra::index_params index_params; + * // create and fill the index from the strided dataset + * auto index = cagra::build(res, index_params, *dataset); + * @endcode + * + * @param[in] res + * @param[in] params parameters for building the index + * @param[in] dataset a strided dataset + * + * @return the constructed cagra index + */ +auto build(raft::resources const& res, + const cuvs::neighbors::cagra::index_params& params, + strided_dataset const& dataset) + -> cuvs::neighbors::cagra::index; + +/** + * @brief Build the index from the dataset for efficient search. + * + * @deprecated Use build(raft::resources const& res, const cuvs::neighbors::cagra::index_params& + * params, strided_dataset const& dataset) instead. + * + * The build consist of two steps: build an intermediate knn-graph, and optimize it to + * create the final graph. The index_params struct controls the node degree of these + * graphs. + * + * The following distance metrics are supported: + * - L2 * - CosineExpanded (dataset norms are computed as float regardless of input data type) * * Usage example: @@ -1056,6 +1392,45 @@ auto build(raft::resources const& res, * The following distance metrics are supported: * - L2 * - InnerProduct (currently only supported with IVF-PQ as the build algorithm) + * - CosineExpanded + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // Create a properly aligned dataset + * auto dataset_matrix = raft::make_device_matrix(res, n_rows, dim); + * // load data into dataset_matrix... + * auto dataset = make_strided_dataset_owning(res, dataset_matrix.view()); + * // use default index parameters + * cagra::index_params index_params; + * // create and fill the index from the strided dataset + * auto index = cagra::build(res, index_params, *dataset); + * @endcode + * + * @param[in] res + * @param[in] params parameters for building the index + * @param[in] dataset a strided dataset + * + * @return the constructed cagra index + */ +auto build(raft::resources const& res, + const cuvs::neighbors::cagra::index_params& params, + strided_dataset const& dataset) + -> cuvs::neighbors::cagra::index; + +/** + * @brief Build the index from the dataset for efficient search. + * + * @deprecated Use build(raft::resources const& res, const cuvs::neighbors::cagra::index_params& + * params, strided_dataset const& dataset) instead. + * + * The build consist of two steps: build an intermediate knn-graph, and optimize it to + * create the final graph. The index_params struct controls the node degree of these + * graphs. + * + * The following distance metrics are supported: + * - L2 + * - InnerProduct (currently only supported with IVF-PQ as the build algorithm) * - CosineExpanded (dataset norms are computed as float regardless of input data type) * * Usage example: diff --git a/cpp/include/cuvs/neighbors/common.hpp b/cpp/include/cuvs/neighbors/common.hpp index b2da48aaa1..25f81ffe22 100644 --- a/cpp/include/cuvs/neighbors/common.hpp +++ b/cpp/include/cuvs/neighbors/common.hpp @@ -135,6 +135,79 @@ struct merge_params { /** @} */ // end group neighbors_index +/** + * @brief Check if a matrix satisfies byte alignment requirements (for cuVS indexes). + * + * This function checks whether a matrix is device-accessible, row-major, and has stride that meets + * the specified byte alignment requirements. These properties are necessary for zero-copy dataset + * construction in cuVS indexes. + * + * @tparam MatrixT An mdarray or mdspan-like type with: + * - `value_type` type alias + * - `data_handle()` method returning a pointer + * - `extent(dim)` method returning dimension sizes + * - `stride(dim)` method returning dimension strides + * + * @param matrix Matrix (mdarray or mdspan) to check + * @param align_bytes Required row alignment in bytes (default 16 for CAGRA) + * @return true if the matrix is device-accessible, row-major, and properly aligned + * false otherwise + */ +template +bool is_matrix_aligned(const MatrixT& matrix, uint32_t align_bytes = 16) +{ + using value_type = typename MatrixT::value_type; + + cudaPointerAttributes ptr_attrs; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&ptr_attrs, matrix.data_handle())); + auto* device_ptr = reinterpret_cast(ptr_attrs.devicePointer); + const uint32_t required_stride = + raft::round_up_safe(matrix.extent(1) * sizeof(value_type), + std::lcm(align_bytes, sizeof(value_type))) / + sizeof(value_type); + const uint32_t actual_stride = matrix.stride(0) > 0 ? matrix.stride(0) : matrix.extent(1); + + const bool device_accessible = device_ptr != nullptr; + const bool row_major = matrix.stride(1) <= 1; + const bool stride_matches = required_stride == actual_stride; + + return device_accessible && row_major && stride_matches; +} + +/** + * @brief Check if a matrix satisfies stride requirements (for cuVS indexes). + * + * This function checks whether a matrix is device-accessible, row-major, and has stride that meets + * the specified stride requirements. These properties are necessary for zero-copy dataset + * construction in cuVS indexes. + * + * @tparam MatrixT An mdarray or mdspan-like type with: + * - `value_type` type alias + * - `data_handle()` method returning a pointer + * - `extent(dim)` method returning dimension sizes + * - `stride(dim)` method returning dimension strides + * + * @param matrix Matrix (mdarray or mdspan) to check + * @param requested_stride Required stride + * @return true if the matrix is device-accessible, row-major, and properly strided + * false otherwise + */ +template +bool is_matrix_strided(const MatrixT& matrix, uint32_t requested_stride) +{ + using value_type = typename MatrixT::value_type; + + cudaPointerAttributes ptr_attrs; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&ptr_attrs, matrix.data_handle())); + auto* device_ptr = reinterpret_cast(ptr_attrs.devicePointer); + const uint32_t actual_stride = matrix.stride(0) > 0 ? matrix.stride(0) : matrix.extent(1); + const bool device_accessible = device_ptr != nullptr; + const bool row_major = matrix.stride(1) <= 1; + const bool stride_matches = requested_stride == actual_stride; + + return device_accessible && row_major && stride_matches; +} + /** Two-dimensional dataset; maybe owning, maybe compressed, maybe strided. */ template struct dataset { @@ -227,9 +300,114 @@ struct is_strided_dataset inline constexpr bool is_strided_dataset_v = is_strided_dataset::value; +/** + * @brief Contstruct a non-owning (zero-copy) strided matrix from any mdarray or mdspan. + * + * This function requires the input matrix to satisfy two conditions: + * + * 1) The data is accessible from the current device + * 2) The memory layout is the same as expected (row-major matrix with the required stride) + * + * @tparam SrcT the source mdarray or mdspan + * + * @param[in] res raft resources handle + * @param[in] src the source mdarray or mdspan + * @param[in] required_stride the leading dimension (in elements) + * @return non-owning (zero-copy) current-device-accessible strided matrix + */ +template +auto make_strided_dataset_view(const raft::resources& res, const SrcT& src, uint32_t required_stride) + -> std::unique_ptr> +{ + using extents_type = typename SrcT::extents_type; + using value_type = typename SrcT::value_type; + using index_type = typename SrcT::index_type; + using layout_type = typename SrcT::layout_type; + static_assert(extents_type::rank() == 2, "The input must be a matrix."); + static_assert(std::is_same_v || + std::is_same_v> || + std::is_same_v, + "The input must be row-major"); + RAFT_EXPECTS(src.extent(1) <= required_stride, + "The input row length must be not larger than the desired stride."); + cudaPointerAttributes ptr_attrs; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&ptr_attrs, src.data_handle())); + auto* device_ptr = reinterpret_cast(ptr_attrs.devicePointer); + const uint32_t src_stride = src.stride(0) > 0 ? src.stride(0) : src.extent(1); + const bool device_accessible = device_ptr != nullptr; + const bool row_major = src.stride(1) <= 1; + const bool stride_matches = required_stride == src_stride; + + RAFT_EXPECTS(device_accessible && row_major && stride_matches, "The input matrix is not properly strided for zero-copy."); + // Everything matches: make a non-owning dataset + return std::make_unique>( + raft::make_device_strided_matrix_view( + device_ptr, src.extent(0), src.extent(1), required_stride)); +} + +/** + * @brief Contstruct an owning strided matrix from any mdarray or mdspan (L-value). + * + * This function constructs an owning device matrix and copies the data. + * When the data is copied, padding elements are filled with zeroes. + * + * @tparam SrcT the source mdarray or mdspan + * + * @param[in] res raft resources handle + * @param[in] src the source mdarray or mdspan + * @param[in] required_stride the leading dimension (in elements) + * @return owning current-device-accessible strided matrix + */ +template +auto make_strided_dataset_owning(const raft::resources& res, const SrcT& src, uint32_t required_stride) + -> std::unique_ptr> +{ + using extents_type = typename SrcT::extents_type; + using value_type = typename SrcT::value_type; + using index_type = typename SrcT::index_type; + using layout_type = typename SrcT::layout_type; + static_assert(extents_type::rank() == 2, "The input must be a matrix."); + static_assert(std::is_same_v || + std::is_same_v> || + std::is_same_v, + "The input must be row-major"); + RAFT_EXPECTS(src.extent(1) <= required_stride, + "The input row length must be not larger than the desired stride."); + const uint32_t src_stride = src.stride(0) > 0 ? src.stride(0) : src.extent(1); + + auto out_layout = + raft::make_strided_layout(src.extents(), cuda::std::array{required_stride, 1}); + auto out_array = + raft::make_device_matrix(res, src.extent(0), required_stride); + + using out_mdarray_type = decltype(out_array); + using out_layout_type = typename out_mdarray_type::layout_type; + using out_container_policy_type = typename out_mdarray_type::container_policy_type; + using out_owning_type = + owning_dataset; + + RAFT_CUDA_TRY(cudaMemsetAsync(out_array.data_handle(), + 0, + out_array.size() * sizeof(value_type), + raft::resource::get_cuda_stream(res))); + RAFT_CUDA_TRY(cudaMemcpy2DAsync(out_array.data_handle(), + sizeof(value_type) * required_stride, + src.data_handle(), + sizeof(value_type) * src_stride, + sizeof(value_type) * src.extent(1), + src.extent(0), + cudaMemcpyDefault, + raft::resource::get_cuda_stream(res))); + + return std::make_unique(std::move(out_array), out_layout); +} + /** * @brief Contstruct a strided matrix from any mdarray or mdspan. * + * @deprecated Use make_strided_dataset_view (if the input matrix is properly aligned) or + * make_strided_dataset_owning (if the input matrix is not properly aligned) instead. + * * This function constructs a non-owning view if the input satisfied two conditions: * * 1) The data is accessible from the current device @@ -303,7 +481,7 @@ auto make_strided_dataset(const raft::resources& res, const SrcT& src, uint32_t } /** - * @brief Contstruct a strided matrix from any mdarray. + * @brief Contstruct a strided matrix from any mdarray (R-value). * * This function constructs an owning device matrix and copies the data. * When the data is copied, padding elements are filled with zeroes. @@ -319,7 +497,7 @@ auto make_strided_dataset(const raft::resources& res, const SrcT& src, uint32_t * @return owning current-device-accessible strided matrix */ template -auto make_strided_dataset( +auto make_strided_dataset_owning( const raft::resources& res, raft::mdarray, LayoutPolicy, ContainerPolicy>&& src, uint32_t required_stride) -> std::unique_ptr> @@ -374,11 +552,94 @@ auto make_strided_dataset( } /** - * @brief Contstruct a strided matrix from any mdarray or mdspan. + * @brief Contstruct a strided matrix from any mdarray. + * + * @deprecated Use make_strided_dataset_owning instead. + * + * This function constructs an owning device matrix and copies the data. + * When the data is copied, padding elements are filled with zeroes. + * + * @tparam DataT + * @tparam IdxT + * @tparam LayoutPolicy + * @tparam ContainerPolicy + * + * @param[in] res raft resources handle + * @param[in] src the source mdarray or mdspan + * @param[in] required_stride the leading dimension (in elements) + * @return owning current-device-accessible strided matrix + */ +template +auto make_strided_dataset( + const raft::resources& res, + raft::mdarray, LayoutPolicy, ContainerPolicy>&& src, + uint32_t required_stride) -> std::unique_ptr> +{ + return make_strided_dataset_owning(res, std::move(src), required_stride); +} + +/** + * @brief Contstruct a non-owning (zero-copy) strided matrix from any mdarray or mdspan. * * A variant `make_strided_dataset` that allows specifying the byte alignment instead of the * explicit stride length. * + * This function requries the input matrix to satisfy two conditions: + * + * 1) The data is accessible from the current device + * 2) The memory layout is the same as expected (row-major matrix with the required stride) + * + * @tparam SrcT the source mdarray or mdspan + * + * @param[in] res raft resources handle + * @param[in] src the source mdarray or mdspan + * @param[in] align_bytes the required byte alignment for the dataset rows. + * @return non-owning (zero-copy) current-device-accessible strided matrix + */ +template +auto make_aligned_dataset_view(const raft::resources& res, SrcT src, uint32_t align_bytes = 16) + -> std::unique_ptr> +{ + using source_type = std::remove_cv_t>; + using value_type = typename source_type::value_type; + constexpr size_t kSize = sizeof(value_type); + uint32_t required_stride = + raft::round_up_safe(src.extent(1) * kSize, std::lcm(align_bytes, kSize)) / kSize; + return make_strided_dataset_view(res, std::forward(src), required_stride); +} + +/** + * @brief Contstruct an owning strided matrix from any mdarray or mdspan. + * + * A variant `make_strided_dataset_owning` that allows specifying the byte alignment instead of the explicit stride length. + * + * @tparam SrcT the source mdarray or mdspan + * + * @param[in] res raft resources handle + * @param[in] src the source mdarray or mdspan + * @param[in] align_bytes the required byte alignment for the dataset rows. + * @return owning current-device-accessible strided matrix + */ +template +auto make_aligned_dataset_owning(const raft::resources& res, SrcT src, uint32_t align_bytes = 16) + -> std::unique_ptr> +{ + using source_type = std::remove_cv_t>; + using value_type = typename source_type::value_type; + constexpr size_t kSize = sizeof(value_type); + uint32_t required_stride = + raft::round_up_safe(src.extent(1) * kSize, std::lcm(align_bytes, kSize)) / kSize; + return make_strided_dataset_owning(res, std::forward(src), required_stride); +} + +/** + * @brief Contstruct a strided matrix from any mdarray or mdspan. + * + * @deprecated Use make_aligned_dataset_view or make_aligned_dataset_owning instead. + * + * A variant `make_strided_dataset` that allows specifying the byte alignment instead of the + * explicit stride length. + * * @tparam SrcT the source mdarray or mdspan * * @param[in] res raft resources handle @@ -395,8 +656,13 @@ auto make_aligned_dataset(const raft::resources& res, SrcT src, uint32_t align_b constexpr size_t kSize = sizeof(value_type); uint32_t required_stride = raft::round_up_safe(src.extent(1) * kSize, std::lcm(align_bytes, kSize)) / kSize; - return make_strided_dataset(res, std::forward(src), required_stride); + if (is_matrix_strided(src, required_stride)) { + return make_strided_dataset_view(res, std::forward(src), required_stride); + } else { + return make_strided_dataset_owning(res, std::forward(src), required_stride); + } } + /** * @brief VPQ compressed dataset. * diff --git a/cpp/src/neighbors/cagra.cuh b/cpp/src/neighbors/cagra.cuh index 30c5729f6b..9d5c3c4dc5 100644 --- a/cpp/src/neighbors/cagra.cuh +++ b/cpp/src/neighbors/cagra.cuh @@ -261,6 +261,22 @@ void optimize( detail::optimize(res, knn_graph, new_graph, guarantee_connectivity); } +template +index build( + raft::resources const& res, + const index_params& params, + strided_dataset const& dataset) +{ + if (std::holds_alternative(params.graph_build_params)) { + RAFT_EXPECTS(false, "ACE build with strided_dataset is not supported"); + } +#if 1 + return index(res, params.metric); +#else + return cuvs::neighbors::cagra::detail::build(res, params, dataset); +#endif +} + template (handle, dataset, knn_graph, params); \ } \ \ + template auto build(raft::resources const& handle, \ + const cuvs::neighbors::cagra::index_params& params, \ + strided_dataset const& dataset) \ + -> cuvs::neighbors::cagra::index; \ + \ auto build(raft::resources const& handle, \ const cuvs::neighbors::cagra::index_params& params, \ raft::device_matrix_view dataset) \ diff --git a/cpp/src/neighbors/cagra_build_half.cu b/cpp/src/neighbors/cagra_build_half.cu index dd57cb87cc..fe2f1ea3f2 100644 --- a/cpp/src/neighbors/cagra_build_half.cu +++ b/cpp/src/neighbors/cagra_build_half.cu @@ -17,6 +17,11 @@ void build_knn_graph(raft::resources const& handle, cuvs::neighbors::cagra::build_knn_graph(handle, dataset, knn_graph, params); } +template cuvs::neighbors::cagra::index build( + raft::resources const& handle, + const cuvs::neighbors::cagra::index_params& params, + strided_dataset const& dataset); + cuvs::neighbors::cagra::index build( raft::resources const& handle, const cuvs::neighbors::cagra::index_params& params, diff --git a/cpp/src/neighbors/cagra_build_int8.cu b/cpp/src/neighbors/cagra_build_int8.cu index d651790662..8a8db6e389 100644 --- a/cpp/src/neighbors/cagra_build_int8.cu +++ b/cpp/src/neighbors/cagra_build_int8.cu @@ -17,6 +17,11 @@ namespace cuvs::neighbors::cagra { cuvs::neighbors::cagra::build_knn_graph(handle, dataset, knn_graph, params); \ } \ \ + template auto build(raft::resources const& handle, \ + const cuvs::neighbors::cagra::index_params& params, \ + strided_dataset const& dataset) \ + -> cuvs::neighbors::cagra::index; \ + \ auto build(raft::resources const& handle, \ const cuvs::neighbors::cagra::index_params& params, \ raft::device_matrix_view dataset) \ diff --git a/cpp/src/neighbors/cagra_build_uint8.cu b/cpp/src/neighbors/cagra_build_uint8.cu index a819675d9c..f15f0613a2 100644 --- a/cpp/src/neighbors/cagra_build_uint8.cu +++ b/cpp/src/neighbors/cagra_build_uint8.cu @@ -17,6 +17,11 @@ namespace cuvs::neighbors::cagra { cuvs::neighbors::cagra::build_knn_graph(handle, dataset, knn_graph, params); \ } \ \ + template auto build(raft::resources const& handle, \ + const cuvs::neighbors::cagra::index_params& params, \ + strided_dataset const& dataset) \ + -> cuvs::neighbors::cagra::index; \ + \ auto build(raft::resources const& handle, \ const cuvs::neighbors::cagra::index_params& params, \ raft::device_matrix_view dataset) \ diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index 97d7bb1bac..5cec016a1b 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -1976,10 +1976,9 @@ template , raft::memory_type::host>> -auto iterative_build_graph( - raft::resources const& res, - const index_params& params, - raft::mdspan, raft::row_major, Accessor> dataset) +auto iterative_build_graph(raft::resources const& res, + const index_params& params, + strided_dataset const& dataset) { size_t intermediate_degree = params.intermediate_graph_degree; size_t graph_degree = params.graph_degree; @@ -1992,17 +1991,7 @@ auto iterative_build_graph( // initially small, and the number of nodes is doubled with each iteration. RAFT_LOG_INFO("Iteratively creating/improving graph index using CAGRA's search() and optimize()"); - // If dataset is a host matrix, change it to a device matrix. Also, if the - // dimensionality of the dataset does not meet the alighnemt restriction, - // add extra dimensions and change it to a strided matrix. - std::unique_ptr> dev_aligned_dataset; - try { - dev_aligned_dataset = make_aligned_dataset(res, dataset); - } catch (raft::logic_error& e) { - RAFT_LOG_ERROR("Iterative CAGRA graph build requires the dataset to fit GPU memory"); - throw e; - } - auto dev_aligned_dataset_view = dev_aligned_dataset.get()->view(); + auto dev_aligned_dataset_view = dataset.view(); // If the matrix stride and extent do no match, the extra dimensions are // also as extent since it cannot be used as query matrix. @@ -2012,7 +2001,7 @@ auto iterative_build_graph( dev_aligned_dataset_view.stride(0)); // Determine initial graph size. - uint64_t final_graph_size = (uint64_t)dataset.extent(0); + uint64_t final_graph_size = (uint64_t)dataset.n_rows(); uint64_t initial_graph_size = (final_graph_size + 1) / 2; while (initial_graph_size > graph_degree * 64) { initial_graph_size = (initial_graph_size + 1) / 2; @@ -2158,6 +2147,112 @@ auto iterative_build_graph( return cagra_graph; } +// Wrapper for mdspan that converts to strided_dataset +template , raft::memory_type::host>> +auto iterative_build_graph( + raft::resources const& res, + const index_params& params, + raft::mdspan, raft::row_major, Accessor> dataset) +{ + // If dataset is a host matrix, change it to a device matrix. Also, if the + // dimensionality of the dataset does not meet the alignment restriction, + // add extra dimensions and change it to a strided matrix. + std::unique_ptr> dev_aligned_dataset; + try { + dev_aligned_dataset = make_aligned_dataset(res, dataset); + } catch (raft::logic_error& e) { + RAFT_LOG_ERROR("Iterative CAGRA graph build requires the dataset to fit GPU memory"); + throw e; + } + + // Delegate to the strided_dataset version + return iterative_build_graph(res, params, *dev_aligned_dataset); +} + +template +index build(raft::resources const& res, + const index_params& params, + strided_dataset const& dataset) +{ + size_t intermediate_degree = params.intermediate_graph_degree; + size_t graph_degree = params.graph_degree; + + common::nvtx::range function_scope( + "cagra::build(%zu, %zu)", intermediate_degree, graph_degree); + + check_graph_degree(intermediate_degree, graph_degree, dataset.n_rows()); + + // Validate stride alignment (16-byte alignment required by CAGRA) + constexpr uint32_t kAlignBytes = 16; + RAFT_EXPECTS((dataset.stride() * sizeof(T)) % kAlignBytes == 0, + "CAGRA requires 16-byte aligned datasets. The provided strided_dataset has " + "stride=%u, sizeof(T)=%zu, stride_bytes=%zu which does not satisfy 16-byte " + "alignment. Create the dataset with make_strided_dataset_zerocopy(res, src, 16) " + "or make_strided_dataset_owning(res, src, 16) to ensure proper alignment.", + dataset.stride(), + sizeof(T), + dataset.stride() * sizeof(T)); + + // Only iterative CAGRA is supported with strided_dataset - must be explicitly set + RAFT_EXPECTS(std::holds_alternative( + params.graph_build_params), + "strided_dataset currently supports only iterative CAGRA graph build. IVF-PQ and " + "NN-Descent require row_major layout. Please set params.graph_build_params = " + "cagra::graph_build_params::iterative_search_params(). Use the mdspan build() " + "overload with row_major layout for IVF-PQ/NN-Descent support."); + + // Reject VPQ compression for strided_dataset + RAFT_EXPECTS(!params.compression.has_value(), + "VPQ compression with strided_dataset is currently not supported. VPQ requires " + "contiguous row-major layout. Use the mdspan overload of build() with a row_major " + "layout for VPQ compression."); + + // Validate metric compatibility + RAFT_EXPECTS(params.metric != cuvs::distance::DistanceType::CosineExpanded, + "CosineExpanded distance is not supported for iterative CAGRA graph build."); + + // Validate data type for BitwiseHamming metric + RAFT_EXPECTS(params.metric != cuvs::distance::DistanceType::BitwiseHamming || + (std::is_same_v || std::is_same_v), + "BitwiseHamming distance is only supported for int8_t and uint8_t data types. " + "Current data type is not supported."); + + // Build graph using iterative CAGRA + RAFT_LOG_TRACE("Building CAGRA graph using iterative search"); + auto cagra_graph = iterative_build_graph(res, params, dataset); + + RAFT_LOG_TRACE("Graph optimized, creating index"); + + // Construct an index from dataset and optimized knn graph + if (params.attach_dataset_on_build) { + try { + // Use non-owning constructor - stores reference to the dataset + // User must keep the original strided_dataset alive for the lifetime of the index + return index( + res, params.metric, dataset, raft::make_const_mdspan(cagra_graph.view())); + } catch (std::bad_alloc& e) { + RAFT_LOG_WARN( + "Insufficient GPU memory to construct CAGRA index with dataset on GPU. Only the graph will " + "be added to the index"); + // We just add the graph. User is expected to update dataset separately (e.g allocating in + // managed memory). + } catch (raft::logic_error& e) { + // The memory error can also manifest as logic_error. + RAFT_LOG_WARN( + "Insufficient GPU memory to construct CAGRA index with dataset on GPU. Only the graph will " + "be added to the index"); + } + } + + // Graph-only index (user will attach dataset later if needed) + index idx(res, params.metric); + idx.update_graph(res, raft::make_const_mdspan(cagra_graph.view())); + return idx; +} + template