From 7e3721809c8142ae5daa31e59fb0a1ae74b7a04e Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 5 Aug 2024 16:01:27 -0500 Subject: [PATCH 01/18] FEA Add cuvs-bench to dependencies and conda environments --- .../bench_ann_cuda-118_arch-aarch64.yaml | 43 ++++ .../bench_ann_cuda-118_arch-x86_64.yaml | 43 ++++ .../bench_ann_cuda-125_arch-aarch64.yaml | 39 ++++ .../bench_ann_cuda-125_arch-x86_64.yaml | 39 ++++ dependencies.yaml | 45 ++++ python/cuvs_bench/LICENSE | 201 ++++++++++++++++++ python/cuvs_bench/cuvs_bench/VERSION | 1 + python/cuvs_bench/pyproject.toml | 70 ++++++ 8 files changed, 481 insertions(+) create mode 100644 conda/environments/bench_ann_cuda-118_arch-aarch64.yaml create mode 100644 conda/environments/bench_ann_cuda-118_arch-x86_64.yaml create mode 100644 conda/environments/bench_ann_cuda-125_arch-aarch64.yaml create mode 100644 conda/environments/bench_ann_cuda-125_arch-x86_64.yaml create mode 100644 python/cuvs_bench/LICENSE create mode 100644 python/cuvs_bench/cuvs_bench/VERSION create mode 100644 python/cuvs_bench/pyproject.toml diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml new file mode 100644 index 0000000000..4f24d08ec8 --- /dev/null +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -0,0 +1,43 @@ +# This file is generated by `rapids-dependency-file-generator`. +# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +channels: +- rapidsai +- rapidsai-nightly +- dask/label/dev +- conda-forge +- nvidia +dependencies: +- benchmark>=1.8.2 +- c-compiler +- clang-tools=16.0.6 +- clang==16.0.6 +- cmake>=3.26.4,!=3.30.0 +- cuda-nvtx=11.8 +- cuda-profiler-api=11.8.86 +- cuda-version=11.8 +- cudatoolkit +- cxx-compiler +- cython>=3.0.0 +- gcc_linux-aarch64=11.* +- glog>=0.6.0 +- h5py>=3.8.0 +- hnswlib=0.7.0 +- libcublas-dev=11.11.3.6 +- libcublas=11.11.3.6 +- libcurand-dev=10.3.0.86 +- libcurand=10.3.0.86 +- libcusolver-dev=11.4.1.48 +- libcusolver=11.4.1.48 +- libcusparse-dev=11.7.5.86 +- libcusparse=11.7.5.86 +- matplotlib +- nccl>=2.9.9 +- ninja +- nlohmann_json>=3.11.2 +- nvcc_linux-aarch64=11.8 +- openblas +- pandas +- pyyaml +- rmm==24.10.*,>=0.0.0a0 +- sysroot_linux-aarch64==2.17 +name: bench_ann_cuda-118_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml new file mode 100644 index 0000000000..7d5f8236cc --- /dev/null +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -0,0 +1,43 @@ +# This file is generated by `rapids-dependency-file-generator`. +# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +channels: +- rapidsai +- rapidsai-nightly +- dask/label/dev +- conda-forge +- nvidia +dependencies: +- benchmark>=1.8.2 +- c-compiler +- clang-tools=16.0.6 +- clang==16.0.6 +- cmake>=3.26.4,!=3.30.0 +- cuda-nvtx=11.8 +- cuda-profiler-api=11.8.86 +- cuda-version=11.8 +- cudatoolkit +- cxx-compiler +- cython>=3.0.0 +- gcc_linux-64=11.* +- glog>=0.6.0 +- h5py>=3.8.0 +- hnswlib=0.7.0 +- libcublas-dev=11.11.3.6 +- libcublas=11.11.3.6 +- libcurand-dev=10.3.0.86 +- libcurand=10.3.0.86 +- libcusolver-dev=11.4.1.48 +- libcusolver=11.4.1.48 +- libcusparse-dev=11.7.5.86 +- libcusparse=11.7.5.86 +- matplotlib +- nccl>=2.9.9 +- ninja +- nlohmann_json>=3.11.2 +- nvcc_linux-64=11.8 +- openblas +- pandas +- pyyaml +- rmm==24.10.*,>=0.0.0a0 +- sysroot_linux-64==2.17 +name: bench_ann_cuda-118_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml new file mode 100644 index 0000000000..50a4b2d408 --- /dev/null +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -0,0 +1,39 @@ +# This file is generated by `rapids-dependency-file-generator`. +# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +channels: +- rapidsai +- rapidsai-nightly +- dask/label/dev +- conda-forge +- nvidia +dependencies: +- benchmark>=1.8.2 +- c-compiler +- clang-tools=16.0.6 +- clang==16.0.6 +- cmake>=3.26.4,!=3.30.0 +- cuda-cudart-dev +- cuda-nvcc +- cuda-nvtx-dev +- cuda-profiler-api +- cuda-version=12.5 +- cxx-compiler +- cython>=3.0.0 +- gcc_linux-aarch64=11.* +- glog>=0.6.0 +- h5py>=3.8.0 +- hnswlib=0.7.0 +- libcublas-dev +- libcurand-dev +- libcusolver-dev +- libcusparse-dev +- matplotlib +- nccl>=2.9.9 +- ninja +- nlohmann_json>=3.11.2 +- openblas +- pandas +- pyyaml +- rmm==24.10.*,>=0.0.0a0 +- sysroot_linux-aarch64==2.17 +name: bench_ann_cuda-125_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml new file mode 100644 index 0000000000..ab266d4053 --- /dev/null +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -0,0 +1,39 @@ +# This file is generated by `rapids-dependency-file-generator`. +# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +channels: +- rapidsai +- rapidsai-nightly +- dask/label/dev +- conda-forge +- nvidia +dependencies: +- benchmark>=1.8.2 +- c-compiler +- clang-tools=16.0.6 +- clang==16.0.6 +- cmake>=3.26.4,!=3.30.0 +- cuda-cudart-dev +- cuda-nvcc +- cuda-nvtx-dev +- cuda-profiler-api +- cuda-version=12.5 +- cxx-compiler +- cython>=3.0.0 +- gcc_linux-64=11.* +- glog>=0.6.0 +- h5py>=3.8.0 +- hnswlib=0.7.0 +- libcublas-dev +- libcurand-dev +- libcusolver-dev +- libcusparse-dev +- matplotlib +- nccl>=2.9.9 +- ninja +- nlohmann_json>=3.11.2 +- openblas +- pandas +- pyyaml +- rmm==24.10.*,>=0.0.0a0 +- sysroot_linux-64==2.17 +name: bench_ann_cuda-125_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index 9bb8682bf1..dc773ab69f 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -21,6 +21,18 @@ files: - test_py_cuvs - cupy - rust + bench_ann: + output: conda + matrix: + cuda: ["11.8", "12.5"] + arch: [x86_64, aarch64] + includes: + - rapids_build + - cuda + - cuda_version + - develop + - bench + - bench_python test_cpp: output: none includes: @@ -89,6 +101,20 @@ files: - test_python_common - test_py_cuvs - cupy + py_build_cuvs_bench: + output: pyproject + pyproject_dir: python/cuvs_bench + extras: + table: build-system + includes: + - rapids_build + py_run_cuvs_bench: + output: pyproject + pyproject_dir: python/cuvs_bench + extras: + table: project + includes: + - bench_python channels: - rapidsai - rapidsai-nightly @@ -429,3 +455,22 @@ dependencies: - output_types: [conda, requirements, pyproject] packages: - scikit-learn + bench: + common: + - output_types: [conda, pyproject, requirements] + packages: + - hnswlib=0.7.0 + - nlohmann_json>=3.11.2 + - glog>=0.6.0 + - h5py>=3.8.0 + - benchmark>=1.8.2 + - openblas + - *rmm_unsuffixed + bench_python: + common: + - output_types: [conda] + packages: + - matplotlib + - pandas + - pyyaml + - pandas diff --git a/python/cuvs_bench/LICENSE b/python/cuvs_bench/LICENSE new file mode 100644 index 0000000000..1a89b9054d --- /dev/null +++ b/python/cuvs_bench/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 NVIDIA Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/python/cuvs_bench/cuvs_bench/VERSION b/python/cuvs_bench/cuvs_bench/VERSION new file mode 100644 index 0000000000..7c7ba04436 --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/VERSION @@ -0,0 +1 @@ +24.10.00 diff --git a/python/cuvs_bench/pyproject.toml b/python/cuvs_bench/pyproject.toml new file mode 100644 index 0000000000..17caa8e28a --- /dev/null +++ b/python/cuvs_bench/pyproject.toml @@ -0,0 +1,70 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +[build-system] +build-backend = "rapids_build_backend.build" +requires = [ + "cmake>=3.26.4,!=3.30.0", + "cython>=3.0.0", + "ninja", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. + +[project] +name = "raft-ann-bench" +dynamic = ["version"] +description = "cuVS benchmarks" +authors = [ + { name = "NVIDIA Corporation" }, +] +license = { text = "Apache 2.0" } +requires-python = ">=3.9" +dependencies = [ +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +classifiers = [ + "Intended Audience :: Developers", + "Topic :: Database", + "Topic :: Scientific/Engineering", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", +] + +[project.urls] +Homepage = "https://github.com/rapidsai/raft" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools.package-data] +"*" = ["*.*", "VERSION"] + +[tool.isort] +line_length = 79 +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +combine_as_imports = true +order_by_type = true +skip = [ + "thirdparty", + ".eggs", + ".git", + ".hg", + ".mypy_cache", + ".tox", + ".venv", + "_build", + "buck-out", + "build", + "dist", +] + +[tool.setuptools.dynamic] +version = { file = "cuvs_bench/VERSION" } + +[tool.rapids-build-backend] +build-backend = "scikit_build_core.build" +requires = [] +dependencies-file = "../../dependencies.yaml" +matrix-entry = "cuda_suffixed=true" From b2aef6d6e1a11120a6bff67ad09b079df807177e Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 5 Aug 2024 16:10:40 -0500 Subject: [PATCH 02/18] FIX add missing deps --- conda/environments/bench_ann_cuda-118_arch-aarch64.yaml | 3 +++ conda/environments/bench_ann_cuda-118_arch-x86_64.yaml | 3 +++ conda/environments/bench_ann_cuda-125_arch-aarch64.yaml | 3 +++ conda/environments/bench_ann_cuda-125_arch-x86_64.yaml | 3 +++ dependencies.yaml | 1 + 5 files changed, 13 insertions(+) diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index 4f24d08ec8..cc43126a72 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -14,10 +14,12 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 +- cuda-python>=11.7.1,<12.0a0 - cuda-version=11.8 - cudatoolkit - cxx-compiler - cython>=3.0.0 +- dlpack>=0.8,<1.0 - gcc_linux-aarch64=11.* - glog>=0.6.0 - h5py>=3.8.0 @@ -37,6 +39,7 @@ dependencies: - nvcc_linux-aarch64=11.8 - openblas - pandas +- pylibraft==24.10.*,>=0.0.0a0 - pyyaml - rmm==24.10.*,>=0.0.0a0 - sysroot_linux-aarch64==2.17 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 7d5f8236cc..a1f01f19e1 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -14,10 +14,12 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 +- cuda-python>=11.7.1,<12.0a0 - cuda-version=11.8 - cudatoolkit - cxx-compiler - cython>=3.0.0 +- dlpack>=0.8,<1.0 - gcc_linux-64=11.* - glog>=0.6.0 - h5py>=3.8.0 @@ -37,6 +39,7 @@ dependencies: - nvcc_linux-64=11.8 - openblas - pandas +- pylibraft==24.10.*,>=0.0.0a0 - pyyaml - rmm==24.10.*,>=0.0.0a0 - sysroot_linux-64==2.17 diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml index 50a4b2d408..6251a5e16d 100644 --- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -16,9 +16,11 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api +- cuda-python>=12.0,<13.0a0 - cuda-version=12.5 - cxx-compiler - cython>=3.0.0 +- dlpack>=0.8,<1.0 - gcc_linux-aarch64=11.* - glog>=0.6.0 - h5py>=3.8.0 @@ -33,6 +35,7 @@ dependencies: - nlohmann_json>=3.11.2 - openblas - pandas +- pylibraft==24.10.*,>=0.0.0a0 - pyyaml - rmm==24.10.*,>=0.0.0a0 - sysroot_linux-aarch64==2.17 diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml index ab266d4053..faf370143c 100644 --- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -16,9 +16,11 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api +- cuda-python>=12.0,<13.0a0 - cuda-version=12.5 - cxx-compiler - cython>=3.0.0 +- dlpack>=0.8,<1.0 - gcc_linux-64=11.* - glog>=0.6.0 - h5py>=3.8.0 @@ -33,6 +35,7 @@ dependencies: - nlohmann_json>=3.11.2 - openblas - pandas +- pylibraft==24.10.*,>=0.0.0a0 - pyyaml - rmm==24.10.*,>=0.0.0a0 - sysroot_linux-64==2.17 diff --git a/dependencies.yaml b/dependencies.yaml index dc773ab69f..3a0d9577fa 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -28,6 +28,7 @@ files: arch: [x86_64, aarch64] includes: - rapids_build + - build_py_cuvs - cuda - cuda_version - develop From bf75242d7661dbe946f979bf79d91aa34977d62e Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 5 Aug 2024 19:52:31 -0500 Subject: [PATCH 03/18] FIX version and other improvements --- conda/environments/bench_ann_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/bench_ann_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/bench_ann_cuda-125_arch-aarch64.yaml | 2 +- conda/environments/bench_ann_cuda-125_arch-x86_64.yaml | 2 +- dependencies.yaml | 3 ++- python/cuvs_bench/pyproject.toml | 5 ++++- 6 files changed, 10 insertions(+), 6 deletions(-) diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index cc43126a72..5ca327fc58 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -23,7 +23,7 @@ dependencies: - gcc_linux-aarch64=11.* - glog>=0.6.0 - h5py>=3.8.0 -- hnswlib=0.7.0 +- hnswlib=0.6.2 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index a1f01f19e1..410624168f 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -23,7 +23,7 @@ dependencies: - gcc_linux-64=11.* - glog>=0.6.0 - h5py>=3.8.0 -- hnswlib=0.7.0 +- hnswlib=0.6.2 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml index 6251a5e16d..f22f6ee441 100644 --- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -24,7 +24,7 @@ dependencies: - gcc_linux-aarch64=11.* - glog>=0.6.0 - h5py>=3.8.0 -- hnswlib=0.7.0 +- hnswlib=0.6.2 - libcublas-dev - libcurand-dev - libcusolver-dev diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml index faf370143c..06b2c53381 100644 --- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -24,7 +24,7 @@ dependencies: - gcc_linux-64=11.* - glog>=0.6.0 - h5py>=3.8.0 -- hnswlib=0.7.0 +- hnswlib=0.6.2 - libcublas-dev - libcurand-dev - libcusolver-dev diff --git a/dependencies.yaml b/dependencies.yaml index 3a0d9577fa..9f44911c89 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -109,6 +109,7 @@ files: table: build-system includes: - rapids_build + - build_py_cuvs py_run_cuvs_bench: output: pyproject pyproject_dir: python/cuvs_bench @@ -460,7 +461,7 @@ dependencies: common: - output_types: [conda, pyproject, requirements] packages: - - hnswlib=0.7.0 + - hnswlib=0.6.2 - nlohmann_json>=3.11.2 - glog>=0.6.0 - h5py>=3.8.0 diff --git a/python/cuvs_bench/pyproject.toml b/python/cuvs_bench/pyproject.toml index 17caa8e28a..ec41af3df1 100644 --- a/python/cuvs_bench/pyproject.toml +++ b/python/cuvs_bench/pyproject.toml @@ -4,12 +4,15 @@ build-backend = "rapids_build_backend.build" requires = [ "cmake>=3.26.4,!=3.30.0", + "cuda-python", "cython>=3.0.0", "ninja", + "pylibraft==24.10.*,>=0.0.0a0", + "rmm==24.10.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project] -name = "raft-ann-bench" +name = "cuvs-bench" dynamic = ["version"] description = "cuVS benchmarks" authors = [ From a8bcdef525055669238156da94d800dc0614097a Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 6 Aug 2024 15:59:32 -0500 Subject: [PATCH 04/18] FEA Add cuvs_bench.run --- .../bench_ann_cuda-118_arch-aarch64.yaml | 3 +- .../bench_ann_cuda-118_arch-x86_64.yaml | 3 +- .../bench_ann_cuda-125_arch-aarch64.yaml | 3 +- .../bench_ann_cuda-125_arch-x86_64.yaml | 3 +- cpp/CMakeLists.txt | 2 +- dependencies.yaml | 11 +- .../cuvs_bench/config/algos/__init__.py | 0 .../constraints/__init__.py} | 2 +- .../cuvs_bench/config/algos/cuvs_cagra.yaml | 4 +- .../config/algos/cuvs_cagra_hnswlib.yaml | 2 +- .../cuvs_bench/config/algos/cuvs_ivf_pq.yaml | 4 +- .../config/algos/faiss_gpu_ivf_pq.yaml | 4 +- .../cuvs_bench/config/algos/hnswlib.yaml | 2 +- .../config/{ => datasets}/bigann-100M.yaml | 0 .../config/{ => datasets}/datasets.yaml | 0 .../config/{ => datasets}/deep-100M.yaml | 0 .../config/{ => datasets}/deep-1B.yaml | 0 .../{ => datasets}/deep-image-96-inner.yaml | 0 .../fashion-mnist-784-euclidean.yaml | 0 .../{ => datasets}/gist-960-euclidean.yaml | 0 .../{ => datasets}/glove-100-angular.yaml | 0 .../{ => datasets}/glove-100-inner.yaml | 0 .../{ => datasets}/glove-50-angular.yaml | 0 .../config/{ => datasets}/glove-50-inner.yaml | 0 .../{ => datasets}/lastfm-65-angular.yaml | 0 .../{ => datasets}/mnist-784-euclidean.yaml | 0 .../{ => datasets}/nytimes-256-angular.yaml | 0 .../{ => datasets}/nytimes-256-inner.yaml | 0 .../{ => datasets}/sift-128-euclidean.yaml | 0 .../config/{ => datasets}/wiki_all_10M.yaml | 0 .../config/{ => datasets}/wiki_all_1M.yaml | 0 .../config/{ => datasets}/wiki_all_88M.yaml | 0 python/cuvs_bench/cuvs_bench/run/__init__.py | 17 + python/cuvs_bench/cuvs_bench/run/__main__.py | 202 +++++++ python/cuvs_bench/cuvs_bench/run/run.py | 538 ++++++++++++++++++ python/cuvs_bench/cuvs_bench/run/runners.py | 246 ++++++++ python/cuvs_bench/pyproject.toml | 17 +- 37 files changed, 1024 insertions(+), 39 deletions(-) create mode 100644 python/cuvs_bench/cuvs_bench/config/algos/__init__.py rename python/cuvs_bench/cuvs_bench/config/{constraints.py => algos/constraints/__init__.py} (98%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/bigann-100M.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/datasets.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/deep-100M.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/deep-1B.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/deep-image-96-inner.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/fashion-mnist-784-euclidean.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/gist-960-euclidean.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/glove-100-angular.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/glove-100-inner.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/glove-50-angular.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/glove-50-inner.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/lastfm-65-angular.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/mnist-784-euclidean.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/nytimes-256-angular.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/nytimes-256-inner.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/sift-128-euclidean.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/wiki_all_10M.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/wiki_all_1M.yaml (100%) rename python/cuvs_bench/cuvs_bench/config/{ => datasets}/wiki_all_88M.yaml (100%) create mode 100644 python/cuvs_bench/cuvs_bench/run/__init__.py create mode 100644 python/cuvs_bench/cuvs_bench/run/__main__.py create mode 100644 python/cuvs_bench/cuvs_bench/run/run.py create mode 100644 python/cuvs_bench/cuvs_bench/run/runners.py diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index 5ca327fc58..eee696308f 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -11,6 +11,7 @@ dependencies: - c-compiler - clang-tools=16.0.6 - clang==16.0.6 +- click - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 @@ -23,7 +24,7 @@ dependencies: - gcc_linux-aarch64=11.* - glog>=0.6.0 - h5py>=3.8.0 -- hnswlib=0.6.2 +- hnswlib=0.7.0 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 410624168f..92a246d189 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -11,6 +11,7 @@ dependencies: - c-compiler - clang-tools=16.0.6 - clang==16.0.6 +- click - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 @@ -23,7 +24,7 @@ dependencies: - gcc_linux-64=11.* - glog>=0.6.0 - h5py>=3.8.0 -- hnswlib=0.6.2 +- hnswlib=0.7.0 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml index f22f6ee441..77db9fc093 100644 --- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -11,6 +11,7 @@ dependencies: - c-compiler - clang-tools=16.0.6 - clang==16.0.6 +- click - cmake>=3.26.4,!=3.30.0 - cuda-cudart-dev - cuda-nvcc @@ -24,7 +25,7 @@ dependencies: - gcc_linux-aarch64=11.* - glog>=0.6.0 - h5py>=3.8.0 -- hnswlib=0.6.2 +- hnswlib=0.7.0 - libcublas-dev - libcurand-dev - libcusolver-dev diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml index 06b2c53381..7379c2ca11 100644 --- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -11,6 +11,7 @@ dependencies: - c-compiler - clang-tools=16.0.6 - clang==16.0.6 +- click - cmake>=3.26.4,!=3.30.0 - cuda-cudart-dev - cuda-nvcc @@ -24,7 +25,7 @@ dependencies: - gcc_linux-64=11.* - glog>=0.6.0 - h5py>=3.8.0 -- hnswlib=0.6.2 +- hnswlib=0.7.0 - libcublas-dev - libcurand-dev - libcusolver-dev diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3b483538a1..02c2efa0bf 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -55,7 +55,7 @@ option(BUILD_SHARED_LIBS "Build cuvs shared libraries" ON) option(BUILD_TESTS "Build cuvs unit-tests" ON) option(BUILD_C_LIBRARY "Build raft C API library" OFF) option(BUILD_C_TESTS "Build raft C API tests" OFF) -option(BUILD_ANN_BENCH "Build cuVS ann benchmarks" OFF) +option(BUILD_ANN_BENCH "Build cuVS ann benchmarks" ON) option(BUILD_CAGRA_HNSWLIB "Build CAGRA+hnswlib interface" ON) option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF) option(CUDA_ENABLE_LINEINFO diff --git a/dependencies.yaml b/dependencies.yaml index 9f44911c89..6c52cb4cda 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -102,14 +102,6 @@ files: - test_python_common - test_py_cuvs - cupy - py_build_cuvs_bench: - output: pyproject - pyproject_dir: python/cuvs_bench - extras: - table: build-system - includes: - - rapids_build - - build_py_cuvs py_run_cuvs_bench: output: pyproject pyproject_dir: python/cuvs_bench @@ -461,7 +453,7 @@ dependencies: common: - output_types: [conda, pyproject, requirements] packages: - - hnswlib=0.6.2 + - hnswlib=0.7.0 - nlohmann_json>=3.11.2 - glog>=0.6.0 - h5py>=3.8.0 @@ -476,3 +468,4 @@ dependencies: - pandas - pyyaml - pandas + - click diff --git a/python/cuvs_bench/cuvs_bench/config/algos/__init__.py b/python/cuvs_bench/cuvs_bench/config/algos/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/cuvs_bench/cuvs_bench/config/constraints.py b/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py similarity index 98% rename from python/cuvs_bench/cuvs_bench/config/constraints.py rename to python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py index ff451c0565..de05bd752b 100644 --- a/python/cuvs_bench/cuvs_bench/config/constraints.py +++ b/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra.yaml index e7b049d0c0..4b0e0289b2 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra.yaml @@ -1,7 +1,7 @@ name: cuvs_cagra constraints: - build: cuvs_bench.constraints.raft_cagra_build_constraints - search: cuvs_bench.constraints.raft_cagra_search_constraints + build: cuvs_bench.config.algos.constraints.cuvs_cagra_build + search: cuvs_bench.config.algos.constraints.cuvs_cagra_search groups: base: build: diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml index 70e344dfd6..f1a7f272cb 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml @@ -1,6 +1,6 @@ name: cuvs_cagra_hnswlib constraints: - search: cuvs_bench.constraints.hnswlib_search + search: cuvs_bench.config.algos.constraints.hnswlib_search groups: base: build: diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_pq.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_pq.yaml index aa95d6716f..d68e7973ab 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_pq.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_pq.yaml @@ -1,7 +1,7 @@ name: cuvs_ivf_pq constraints: - build: cuvs_bench.constraints.cuvs_ivf_pq_build - search: cuvs_bench.constraints.cuvs_ivf_pq_search + build: cuvs_bench.config.algos.constraints.cuvs_ivf_pq_build + search: cuvs_bench.config.algos.constraints.cuvs_ivf_pq_search groups: base: build: diff --git a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml index 1bd78b736f..782f3aed12 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml @@ -1,7 +1,7 @@ name: faiss_gpu_ivf_pq constraints: - build: cuvs_bench.constraints.faiss_gpu_ivf_pq_build - search: cuvs_bench.constraints.faiss_gpu_ivf_pq_search + build: cuvs_bench.config.algos.constraints.faiss_gpu_ivf_pq_build + search: cuvs_bench.config.algos.constraints.faiss_gpu_ivf_pq_search groups: base: build: diff --git a/python/cuvs_bench/cuvs_bench/config/algos/hnswlib.yaml b/python/cuvs_bench/cuvs_bench/config/algos/hnswlib.yaml index dbd73155d5..93d8cff2d1 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/hnswlib.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/hnswlib.yaml @@ -1,6 +1,6 @@ name: hnswlib constraints: - search: cuvs_bench.constraints.hnswlib_search + search: cuvs_bench.config.algos.constraints.hnswlib_search groups: base: build: diff --git a/python/cuvs_bench/cuvs_bench/config/bigann-100M.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/bigann-100M.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/bigann-100M.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/bigann-100M.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/datasets.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/datasets.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/datasets.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/datasets.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/deep-100M.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/deep-100M.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/deep-100M.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/deep-100M.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/deep-1B.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/deep-1B.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/deep-1B.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/deep-1B.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/deep-image-96-inner.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/deep-image-96-inner.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/deep-image-96-inner.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/deep-image-96-inner.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/fashion-mnist-784-euclidean.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/fashion-mnist-784-euclidean.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/fashion-mnist-784-euclidean.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/fashion-mnist-784-euclidean.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/gist-960-euclidean.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/gist-960-euclidean.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/gist-960-euclidean.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/gist-960-euclidean.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/glove-100-angular.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/glove-100-angular.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/glove-100-angular.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/glove-100-angular.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/glove-100-inner.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/glove-100-inner.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/glove-100-inner.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/glove-100-inner.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/glove-50-angular.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/glove-50-angular.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/glove-50-angular.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/glove-50-angular.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/glove-50-inner.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/glove-50-inner.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/glove-50-inner.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/glove-50-inner.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/lastfm-65-angular.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/lastfm-65-angular.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/lastfm-65-angular.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/lastfm-65-angular.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/mnist-784-euclidean.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/mnist-784-euclidean.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/mnist-784-euclidean.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/mnist-784-euclidean.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/nytimes-256-angular.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/nytimes-256-angular.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/nytimes-256-angular.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/nytimes-256-angular.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/nytimes-256-inner.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/nytimes-256-inner.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/nytimes-256-inner.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/nytimes-256-inner.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/sift-128-euclidean.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/sift-128-euclidean.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/sift-128-euclidean.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/sift-128-euclidean.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/wiki_all_10M.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_10M.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/wiki_all_10M.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_10M.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/wiki_all_1M.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_1M.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/wiki_all_1M.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_1M.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/wiki_all_88M.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_88M.yaml similarity index 100% rename from python/cuvs_bench/cuvs_bench/config/wiki_all_88M.yaml rename to python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_88M.yaml diff --git a/python/cuvs_bench/cuvs_bench/run/__init__.py b/python/cuvs_bench/cuvs_bench/run/__init__.py new file mode 100644 index 0000000000..7cb04e6f8e --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/run/__init__.py @@ -0,0 +1,17 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .run import run_benchmark diff --git a/python/cuvs_bench/cuvs_bench/run/__main__.py b/python/cuvs_bench/cuvs_bench/run/__main__.py new file mode 100644 index 0000000000..fb269014e4 --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/run/__main__.py @@ -0,0 +1,202 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import click +import os + +from pathlib import Path +from typing import Optional + +from . import run_benchmark + + +@click.command() +@click.option( + "--subset-size", + type=click.IntRange(min=1), + help="the number of subset rows of the dataset to build the index" +) +@click.option( + "-k", + "--count", + default=10, + show_default=True, + type=click.IntRange(min=1), + help="the number of nearest neighbors to search for" +) +@click.option( + "-bs", "--batch-size", + default=10000, + show_default=True, + type=click.IntRange(min=1), + help="number of query vectors to use in each query trial" +) +@click.option( + "--dataset-configuration", + help="path to YAML configuration file for datasets" +) +@click.option( + "--configuration", + help="path to YAML configuration file or directory for algorithms" + "Any run groups found in the specified file/directory will " + "automatically override groups of the same name present in the " + "default configurations, including `base`", +) +@click.option( + "--dataset", + default="glove-100-inner", + show_default=True, + help="name of dataset" +) +@click.option( + "--dataset-path", + default=lambda: os.environ.get("RAPIDS_DATASET_ROOT_DIR", + os.path.join(Path(__file__).parent, "datasets/")), + show_default=True, + help="path to dataset folder, by default will look in RAPIDS_DATASET_ROOT_DIR if defined, " + "otherwise a datasets subdirectory from the calling directory", +) +@click.option( + "--build", + is_flag=True, + help="Build the index" +) +@click.option( + "--search", + is_flag=True, + help="Perform the search" +) +@click.option( + "--algorithms", + default=None, + show_default=True, + help="run only comma separated list of named algorithms. If parameters `groups` and `algo-groups` are both undefined, " + "then group `base` is run by default", +) +@click.option( + "--groups", + default="base", + show_default=True, + help="run only comma separated groups of parameters" +) +@click.option( + "--algo-groups", + help='add comma separated . to run. Example usage: "--algo-groups=raft_cagra.large,hnswlib.large"', +) +@click.option( + "-f", + "--force", + is_flag=True, + help="re-run algorithms even if their results already exist" +) +@click.option( + "-m", + "--search-mode", + default="latency", + show_default=True, + help="run search in 'latency' (measure individual batches) or 'throughput' (pipeline batches and measure end-to-end) mode", +) +@click.option( + "-t", + "--search-threads", + default=None, + show_default=True, + help="specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. " + "Example: --search-threads=1:4. Power of 2 values between 'min' and 'max' will be used. " + "If only 'min' is specified, then a single test is run with 'min' threads. By default min=1, max=.", +) +@click.option( + "-r", + "--dry-run", + is_flag=True, + help="dry-run mode will convert the yaml config for the specified algorithms and datasets to the json format that's consumed " + "by the lower-level c++ binaries and then print the command to run execute the benchmarks but will not actually execute " + "the command.", +) +@click.option( + "--raft-log-level", + default="info", + show_default=True, + help="Log level, possible values are [off, error, warn, info, debug, trace]. Default: 'info'. " + "Note that 'debug' or more detailed logging level requires that the library is compiled with " + "-DRAFT_ACTIVE_LEVEL= where >= ", +) +def main( + subset_size: Optional[int], + count: int, + batch_size: int, + dataset_configuration: Optional[str], + configuration: Optional[str], + dataset: str, + dataset_path: str, + build: bool, + search: bool, + algorithms: Optional[str], + groups: str, + algo_groups: Optional[str], + force: bool, + search_mode: str, + search_threads: Optional[str], + dry_run: bool, + raft_log_level: str +) -> None: + """ + Main function to run the benchmark with the provided options. + + Parameters + ---------- + subset_size : Optional[int] + The number of subset rows of the dataset to build the index. + count : int + The number of nearest neighbors to search for. + batch_size : int + Number of query vectors to use in each query trial. + dataset_configuration : Optional[str] + Path to YAML configuration file for datasets. + configuration : Optional[str] + Path to YAML configuration file or directory for algorithms. + dataset : str + Name of the dataset to use. + dataset_path : str + Path to the dataset folder. + build : bool + Whether to build the indices. + search : bool + Whether to perform the search. + algorithms : Optional[str] + Comma-separated list of algorithm names to use. + groups : str + Comma-separated list of groups to consider. + algo_groups : Optional[str] + Comma-separated list of algorithm groups to consider. + force : bool + Whether to force the execution regardless of warnings. + search_mode : str + The mode of search to perform ('latency' or 'throughput'). + search_threads : Optional[str] + The number of threads to use for throughput benchmark. + dry_run : bool + Whether to perform a dry run without actual execution. + raft_log_level : str + The logging level for the RAFT library. + + """ + + run_benchmark(**locals()) + + +if __name__ == '__main__': + main() diff --git a/python/cuvs_bench/cuvs_bench/run/run.py b/python/cuvs_bench/cuvs_bench/run/run.py new file mode 100644 index 0000000000..3f3a25a2af --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/run/run.py @@ -0,0 +1,538 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import importlib +import itertools +import json +import os +import subprocess +import sys +import uuid +import warnings +import yaml + +from importlib import import_module +from typing import Optional, Tuple, Union, Dict, Any + +from .runners import cuvs_bench_cpp + + +def run_benchmark( + subset_size: int, + count: int, + batch_size: int, + dataset_configuration: Optional[str], + configuration: Optional[str], + dataset: str, + dataset_path: str, + build: Optional[bool], + search: Optional[bool], + algorithms: Optional[str], + groups: str, + algo_groups: Optional[str], + force: bool, + search_mode: str, + search_threads: int, + dry_run: bool, + raft_log_level: int +) -> None: + """ + Runs a benchmarking process based on the provided configurations. + + Parameters + ---------- + count : int + The number of iterations to run. + batch_size : int + The size of each batch for processing. + dataset_configuration : Optional[str] + Path to the dataset configuration file. + configuration : Optional[str] + Path to the algorithm configuration file or directory. + dataset : str + The name of the dataset to use. + dataset_path : str + The path to the dataset directory. + build : Optional[bool] + Whether to build the indices. + search : Optional[bool] + Whether to perform the search. + algorithms : Optional[str] + Comma-separated list of algorithm names to use. + groups : str + Comma-separated list of groups to consider. + algo_groups : Optional[str] + Comma-separated list of algorithm groups to consider. + force : bool + Whether to force the execution regardless of warnings. + search_mode : str + The mode of search to perform. + search_threads : int + The number of threads to use for searching. + dry_run : bool + Whether to perform a dry run without actual execution. + raft_log_level : int + The logging level for the RAFT library. + + Returns + ------- + None + """ + + scripts_path = os.path.dirname(os.path.realpath(__file__)) + call_path = os.getcwd() + gpu_present = rmm_present() + + with open(os.path.join(scripts_path, "../config", "algorithms.yaml"), "r") as f: + algos_yaml = yaml.safe_load(f) + + # If both build and search are not provided, + # run both + if not build and not search: + build = True + search = True + + # look for dataset configuration path, if not given then use the + # default location in cuvs_bench/conf + if dataset_configuration: + dataset_conf_f = dataset_configuration + else: + dataset_conf_f = os.path.join(scripts_path, "../config/datasets", "datasets.yaml") + with open(dataset_conf_f, "r") as f: + dataset_conf_all = yaml.safe_load(f) + + # load datasets configuration files + dataset_conf = None + for dset in dataset_conf_all: + if dataset == dset["name"]: + dataset_conf = dset + break + if not dataset_conf: + raise ValueError("Could not find a dataset configuration") + + conf_file = dict() + conf_file["dataset"] = dataset_conf + if subset_size: + conf_file["dataset"]["subset_size"] = subset_size + + conf_file["search_basic_param"] = {} + conf_file["search_basic_param"]["k"] = count + conf_file["search_basic_param"]["batch_size"] = batch_size + + algos_conf_fs = os.listdir(os.path.join(scripts_path, "../config", "algos")) + algos_conf_fs = [ + os.path.join(scripts_path, "../config", "algos", f) + for f in algos_conf_fs + if ".json" not in f and "constraint" not in f and ".py" not in f + ] + conf_filedir = os.path.join(scripts_path, "conf", "algos") + if configuration: + if os.path.isdir(configuration): + conf_filedir = configuration + algos_conf_fs = algos_conf_fs + [ + os.path.join(configuration, f) + for f in os.listdir(configuration) + if ".json" not in f + ] + elif os.path.isfile(configuration): + conf_filedir = os.path.normpath(configuration).split(os.sep) + conf_filedir = os.path.join(*conf_filedir[:-1]) + algos_conf_fs = algos_conf_fs + [configuration] + + filter_algos = True if algorithms else False + if filter_algos: + allowed_algos = algorithms.split(",") + named_groups = groups.split(",") + filter_algo_groups = True if algo_groups else False + allowed_algo_groups = None + if filter_algo_groups: + allowed_algo_groups = [ + algo_group.split(".") for algo_group in algo_groups.split(",") + ] + allowed_algo_groups = list(zip(*allowed_algo_groups)) + algos_conf = dict() + for algo_f in algos_conf_fs: + with open(algo_f, "r") as f: + try: + algo = yaml.safe_load(f) + except Exception as e: + warnings.warn( + f"Could not load YAML config {algo_f} due to " + + e.with_traceback() + ) + continue + insert_algo = True + insert_algo_group = False + if filter_algos: + if algo["name"] not in allowed_algos: + insert_algo = False + if filter_algo_groups: + if algo["name"] in allowed_algo_groups[0]: + insert_algo_group = True + + def add_algo_group(group_list): + if algo["name"] not in algos_conf: + algos_conf[algo["name"]] = {"groups": {}} + for group in algo["groups"].keys(): + if group in group_list: + algos_conf[algo["name"]]["groups"][group] = algo[ + "groups" + ][group] + if "constraints" in algo: + algos_conf[algo["name"]]["constraints"] = algo[ + "constraints" + ] + + if insert_algo: + add_algo_group(named_groups) + if insert_algo_group: + add_algo_group(allowed_algo_groups[1]) + + executables_to_run = dict() + for algo in algos_conf.keys(): + validate_algorithm(algos_yaml, algo, gpu_present) + for group in algos_conf[algo]["groups"].keys(): + executable = find_executable( + algos_yaml, algo, group, count, batch_size + ) + if executable not in executables_to_run: + executables_to_run[executable] = {"index": []} + build_params = algos_conf[algo]["groups"][group]["build"] or {} + search_params = algos_conf[algo]["groups"][group]["search"] or {} + + param_names = [] + param_lists = [] + for param in build_params.keys(): + param_names.append(param) + param_lists.append(build_params[param]) + + all_build_params = itertools.product(*param_lists) + + search_param_names = [] + search_param_lists = [] + for search_param in search_params.keys(): + search_param_names.append(search_param) + search_param_lists.append(search_params[search_param]) + + for params in all_build_params: + index = {"algo": algo, "build_param": {}} + if group != "base": + index_name = f"{algo}_{group}" + else: + index_name = f"{algo}" + for i in range(len(params)): + index["build_param"][param_names[i]] = params[i] + index_name += "." + f"{param_names[i]}{params[i]}" + + if "constraints" in algos_conf[algo]: + if "build" in algos_conf[algo]["constraints"]: + importable = algos_conf[algo]["constraints"]["build"] + importable = importable.split(".") + module = ".".join(importable[:-1]) + func = importable[-1] + validator = import_module(module) + build_constraints = getattr(validator, func) + if "dims" not in conf_file["dataset"]: + raise ValueError( + "`dims` needed for build constraints but not " + "specified in datasets.yaml" + ) + if not build_constraints( + index["build_param"], conf_file["dataset"]["dims"] + ): + continue + index_filename = ( + index_name + if len(index_name) < 128 + else str(hash(index_name)) + ) + index["name"] = index_name + index["file"] = os.path.join( + dataset_path, dataset, "index", index_filename + ) + index["search_params"] = [] + all_search_params = itertools.product(*search_param_lists) + for search_params in all_search_params: + search_dict = dict() + for i in range(len(search_params)): + search_dict[search_param_names[i]] = search_params[i] + if "constraints" in algos_conf[algo]: + if "search" in algos_conf[algo]["constraints"]: + importable = algos_conf[algo]["constraints"][ + "search" + ] + importable = importable.split(".") + module = ".".join(importable[:-1]) + func = importable[-1] + validator = import_module(module) + search_constraints = getattr(validator, func) + if search_constraints( + search_dict, + index["build_param"], + count, + batch_size, + ): + index["search_params"].append(search_dict) + else: + index["search_params"].append(search_dict) + executables_to_run[executable]["index"].append(index) + + if len(index["search_params"]) == 0: + print("No search parameters were added to configuration") + executable = find_executable( + algos_yaml, algo, group, count, batch_size + ) + if executable not in executables_to_run: + executables_to_run[executable] = {"index": []} + build_params = algos_conf[algo]["groups"][group]["build"] or {} + search_params = algos_conf[algo]["groups"][group]["search"] or {} + + param_names = [] + param_lists = [] + for param in build_params.keys(): + param_names.append(param) + param_lists.append(build_params[param]) + + all_build_params = itertools.product(*param_lists) + + search_param_names = [] + search_param_lists = [] + for search_param in search_params.keys(): + search_param_names.append(search_param) + search_param_lists.append(search_params[search_param]) + + for params in all_build_params: + index = {"algo": algo, "build_param": {}} + if group != "base": + index_name = f"{algo}_{group}" + else: + index_name = f"{algo}" + for i in range(len(params)): + index["build_param"][param_names[i]] = params[i] + index_name += "." + f"{param_names[i]}{params[i]}" + + if "constraints" in algos_conf[algo]: + if "build" in algos_conf[algo]["constraints"]: + importable = algos_conf[algo]["constraints"]["build"] + importable = importable.split(".") + module = ".".join(importable[:-1]) + func = importable[-1] + validator = import_module(module) + build_constraints = getattr(validator, func) + if "dims" not in conf_file["dataset"]: + raise ValueError( + "`dims` needed for build constraints but not " + "specified in datasets.yaml" + ) + if not build_constraints( + index["build_param"], conf_file["dataset"]["dims"] + ): + continue + index_filename = ( + index_name + if len(index_name) < 128 + else str(hash(index_name)) + ) + index["name"] = index_name + index["file"] = os.path.join( + dataset_path, dataset, "index", index_filename + ) + index["search_params"] = [] + all_search_params = itertools.product(*search_param_lists) + for search_params in all_search_params: + search_dict = dict() + for i in range(len(search_params)): + search_dict[search_param_names[i]] = search_params[i] + # if "constraints" in algos_conf[algo]: + # todo: refactor common code + if False: + if "search" in algos_conf[algo]["constraints"]: + if validate_constraints(algos_conf, + algo, + "search", + search_dict, + index["build_param"], + count, + batch_size): + index["search_params"].append(search_dict) + else: + index["search_params"].append(search_dict) + executables_to_run[executable]["index"].append(index) + + if len(index["search_params"]) == 0: + print("No search parameters were added to configuration") + + cuvs_bench_cpp( + conf_file, + f"{dataset}", + conf_filedir, + executables_to_run, + dataset_path, + force, + build, + search, + dry_run, + count, + batch_size, + search_threads, + search_mode, + raft_log_level, + ) + + +def rmm_present() -> bool: + """ + Check if RMM is present. + + Returns + ------- + bool + True if RMM is present, False otherwise. + """ + try: + import rmm # noqa: F401 + return True + except ImportError: + return False + + +def find_executable(algos_conf: dict, algo: str, group: str, k: int, batch_size: int) -> Tuple[str, str, Tuple[str, str]]: + """ + Find the executable for the given algorithm and group. + + Parameters + ---------- + algos_conf : dict + The configuration dictionary for the algorithms. + algo : str + The name of the algorithm. + group : str + The name of the group. + k : int + The number of nearest neighbors to search for. + batch_size : int + The size of each batch for processing. + + Returns + ------- + Tuple[str, str, Tuple[str, str]] + A tuple containing the executable name, the path to the executable, and the file name. + """ + executable = algos_conf[algo]["executable"] + file_name = (f"{algo},{group}", f"{algo},{group},k{k},bs{batch_size}") + + # Check for devcontainer build + scripts_path = os.path.dirname(os.path.realpath(__file__)) + build_path = "/home/coder/cuvs/cpp/build/latest/bench/ann" + print(f"build_path: {build_path}") + if os.path.exists(build_path): + print(f"-- Detected devcontainer artifacts in {build_path}. ") + return executable, build_path, file_name + + build_path = os.getenv("CUVS_HOME") + if build_path is not None: + build_path = os.path.join(build_path, "cpp", "build", "release", executable) + if os.path.exists(build_path): + print(f"-- Using RAFT bench from repository in {build_path}. ") + return executable, build_path, file_name + + # # todo: better path detection for devcontainer + # build_path = os.getenv("CUVS_BENCH_BUILD_PATH") + # print("build_path: ", build_path) + # if build_path is not None: + # if os.path.exists(build_path): + # print(f"-- Using devcontainer location from {build_path}. ") + # return executable, build_path, file_name + + conda_path = os.getenv("CONDA_PREFIX") + if conda_path is not None: + conda_path = os.path.join(conda_path, "bin", "ann", executable) + if os.path.exists(conda_path): + print("-- Using cuVS bench found in conda environment. ") + return executable, conda_path, file_name + else: + raise FileNotFoundError(executable) + else: + raise FileNotFoundError(executable) + + +def validate_algorithm(algos_conf: dict, algo: str, gpu_present: bool) -> bool: + """ + Validate algorithm and whether it requires gpu. . + + Parameters + ---------- + algos_conf : dict + The configuration dictionary for the algorithms. + algo : str + The name of the algorithm. + gpu_present : bool + Whether a GPU is present. + + Returns + ------- + bool + True if the algorithm is valid for the current hardware configuration, False otherwise. + """ + algos_conf_keys = set(algos_conf.keys()) + if gpu_present: + return algo in algos_conf_keys + else: + return algo in algos_conf_keys and algos_conf[algo]["requires_gpu"] is False + + +def validate_constraints( + algos_conf: Dict[str, Any], + algo: str, + constraint_type: str, + param: Dict[str, Any], + dims: Any, + k: Optional[int], + batch_size: Optional[int] +) -> bool: + """ + Validate the constraints for the given algorithm and constraint type. + + Parameters + ---------- + algos_conf : Dict[str, Any] + The configuration dictionary for the algorithms. + algo : str + The name of the algorithm. + constraint_type : str + The type of constraint to validate ('build' or 'search'). + param : Dict[str, Any] + The parameters to validate against the constraints. + dims : Any + The dimensions required for the constraints. + + Returns + ------- + bool + True if the constraints are valid, False otherwise. + """ + if constraint_type in algos_conf[algo]["constraints"]: + importable = algos_conf[algo]["constraints"][constraint_type] + importable = importable.split(".") + module = ".".join(importable[:-1]) + func = importable[-1] + print(f"module: {module}") + validator = importlib.import_module(module) + constraints_func = getattr(validator, func) + if constraint_type == "build" and "dims" not in conf_file["dataset"]: + raise ValueError("`dims` needed for build constraints but not specified in datasets.yaml") + return constraints_func(param, dims) + return True diff --git a/python/cuvs_bench/cuvs_bench/run/runners.py b/python/cuvs_bench/cuvs_bench/run/runners.py new file mode 100644 index 0000000000..54d32c77d2 --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/run/runners.py @@ -0,0 +1,246 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import json +import subprocess +import uuid + +from typing import Dict, List, Optional, Tuple, Union + + +def cuvs_bench_cpp( + conf_file: Dict, + conf_filename: str, + conf_filedir: str, + executables_to_run: Dict[Tuple[str, str, Tuple[str, str]], Dict[str, List[Dict]]], + dataset_path: str, + force: bool, + build: bool, + search: bool, + dry_run: bool, + k: int, + batch_size: int, + search_threads: Optional[int], + mode: str = "throughput", + raft_log_level: str = "info" +) -> None: + """ + Run the CUVS benchmarking tool with the provided configuration. + + Parameters + ---------- + conf_file : Dict + The configuration file content. + conf_filename : str + The name of the configuration file. + conf_filedir : str + The directory of the configuration file. + executables_to_run : Dict[Tuple[str, str, Tuple[str, str]], Dict[str, List[Dict]]] + Dictionary of executables to run and their configurations. + dataset_path : str + The path to the dataset. + force : bool + Whether to force the execution regardless of existing results. + build : bool + Whether to build the indices. + search : bool + Whether to perform the search. + dry_run : bool + Whether to perform a dry run without actual execution. + k : int + The number of nearest neighbors to search for. + batch_size : int + The size of each batch for processing. + search_threads : Optional[int] + The number of threads to use for searching. + mode : str, optional + The mode of search to perform ('latency' or 'throughput'), by default 'throughput'. + raft_log_level : str, optional + The logging level for the RAFT library, by default 'info'. + + Returns + ------- + None + """ + for executable, ann_executable_path, output_filename in executables_to_run.keys(): + # Need to write temporary configuration + temp_conf_filename = f"{conf_filename}_{output_filename[1]}_{uuid.uuid1()}.json" + with open(temp_conf_filename, "w") as f: + temp_conf = { + "dataset": conf_file["dataset"], + "search_basic_param": conf_file["search_basic_param"], + "index": executables_to_run[(executable, ann_executable_path, output_filename)]["index"] + } + json_str = json.dumps(temp_conf, indent=2) + f.write(json_str) + + legacy_result_folder = os.path.join(dataset_path, conf_file["dataset"]["name"], "result") + os.makedirs(legacy_result_folder, exist_ok=True) + + if build: + build_folder = os.path.join(legacy_result_folder, "build") + os.makedirs(build_folder, exist_ok=True) + build_file = f"{output_filename[0]}.json" + temp_build_file = f"{build_file}.lock" + cmd = [ + ann_executable_path, + "--build", + f"--data_prefix={dataset_path}", + "--benchmark_out_format=json", + "--benchmark_counters_tabular=true", + f"--benchmark_out={os.path.join(build_folder, temp_build_file)}", + f"--raft_log_level={parse_log_level(raft_log_level)}" + ] + if force: + cmd.append("--force") + cmd.append(temp_conf_filename) + + if dry_run: + print(f"Benchmark command for {output_filename[0]}:\n{' '.join(cmd)}\n") + else: + try: + subprocess.run(cmd, check=True) + merge_build_files(build_folder, build_file, temp_build_file) + except Exception as e: + print(f"Error occurred running benchmark: {e}") + finally: + os.remove(os.path.join(build_folder, temp_build_file)) + if not search: + os.remove(temp_conf_filename) + + if search: + search_folder = os.path.join(legacy_result_folder, "search") + os.makedirs(search_folder, exist_ok=True) + search_file = f"{output_filename[1]}.json" + cmd = [ + ann_executable_path, + "--search", + f"--data_prefix={dataset_path}", + "--benchmark_counters_tabular=true", + f"--override_kv=k:{k}", + f"--override_kv=n_queries:{batch_size}", + "--benchmark_min_warmup_time=1", + "--benchmark_out_format=json", + f"--mode={mode}", + f"--benchmark_out={os.path.join(search_folder, search_file)}", + f"--raft_log_level={parse_log_level(raft_log_level)}" + ] + if force: + cmd.append("--force") + if search_threads: + cmd.append(f"--threads={search_threads}") + cmd.append(temp_conf_filename) + + if dry_run: + print(f"Benchmark command for {output_filename[1]}:\n{' '.join(cmd)}\n") + else: + try: + subprocess.run(cmd, check=True) + except Exception as e: + print(f"Error occurred running benchmark: {e}") + finally: + os.remove(temp_conf_filename) + + +log_levels = { + "off": 0, + "error": 1, + "warn": 2, + "info": 3, + "debug": 4, + "trace": 5, +} + +def parse_log_level(level_str: str) -> int: + """ + Parse the log level from string to integer. + + Parameters + ---------- + level_str : str + The log level as a string. + + Returns + ------- + int + The corresponding integer value of the log level. + + Raises + ------ + ValueError + If the log level string is invalid. + """ + if level_str not in log_levels: + raise ValueError(f"Invalid log level: {level_str}") + return log_levels[level_str.lower()] + + +def merge_build_files(build_dir: str, build_file: str, temp_build_file: str) -> None: + """ + Merge temporary build files into the main build file. + + Parameters + ---------- + build_dir : str + The directory of the build files. + build_file : str + The main build file. + temp_build_file : str + The temporary build file to merge. + + Returns + ------- + None + + Raises + ------ + ValueError + If the temporary build file is not found. + """ + build_dict = {} + + # If build file exists, read it + build_json_path = os.path.join(build_dir, build_file) + tmp_build_json_path = os.path.join(build_dir, temp_build_file) + if os.path.isfile(build_json_path): + try: + with open(build_json_path, "r") as f: + build_dict = json.load(f) + except Exception as e: + print(f"Error loading existing build file: {build_json_path} ({e})") + + temp_build_dict = {} + if os.path.isfile(tmp_build_json_path): + with open(tmp_build_json_path, "r") as f: + temp_build_dict = json.load(f) + else: + raise ValueError(f"Temp build file not found: {tmp_build_json_path}") + + tmp_benchmarks = temp_build_dict.get("benchmarks", {}) + benchmarks = build_dict.get("benchmarks", {}) + + # If the build time is absolute 0 then an error occurred + final_bench_dict = {b["name"]: b for b in benchmarks if b["real_time"] > 0} + + for tmp_bench in tmp_benchmarks: + if tmp_bench["real_time"] > 0: + final_bench_dict[tmp_bench["name"]] = tmp_bench + + temp_build_dict["benchmarks"] = list(final_bench_dict.values()) + with open(build_json_path, "w") as f: + json_str = json.dumps(temp_build_dict, indent=2) + f.write(json_str) diff --git a/python/cuvs_bench/pyproject.toml b/python/cuvs_bench/pyproject.toml index ec41af3df1..bf5372e558 100644 --- a/python/cuvs_bench/pyproject.toml +++ b/python/cuvs_bench/pyproject.toml @@ -1,18 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. [build-system] -build-backend = "rapids_build_backend.build" requires = [ - "cmake>=3.26.4,!=3.30.0", - "cuda-python", - "cython>=3.0.0", - "ninja", - "pylibraft==24.10.*,>=0.0.0a0", - "rmm==24.10.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project] -name = "cuvs-bench" +name = "cuvs_bench" dynamic = ["version"] description = "cuVS benchmarks" authors = [ @@ -36,9 +29,6 @@ classifiers = [ [project.urls] Homepage = "https://github.com/rapidsai/raft" -[tool.setuptools.packages.find] -where = ["src"] - [tool.setuptools.package-data] "*" = ["*.*", "VERSION"] @@ -66,8 +56,3 @@ skip = [ [tool.setuptools.dynamic] version = { file = "cuvs_bench/VERSION" } -[tool.rapids-build-backend] -build-backend = "scikit_build_core.build" -requires = [] -dependencies-file = "../../dependencies.yaml" -matrix-entry = "cuda_suffixed=true" From ec6d70c37fc50d8f914e489fa1af9b9e34722fb6 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 6 Aug 2024 17:49:21 -0500 Subject: [PATCH 05/18] FIX some cuvs_bench python build dependencies --- dependencies.yaml | 15 +++++++++++++++ python/cuvs_bench/pyproject.toml | 14 +++++++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index 6c52cb4cda..2773fef0c9 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -102,6 +102,13 @@ files: - test_python_common - test_py_cuvs - cupy + py_build_cuvs_bench: + output: pyproject + pyproject_dir: python/cuvs_bench + extras: + table: build-system + includes: + - rapids_build_setuptools py_run_cuvs_bench: output: pyproject pyproject_dir: python/cuvs_bench @@ -173,6 +180,14 @@ dependencies: - matrix: {cuda: "11.2", arch: aarch64} packages: [nvcc_linux-aarch64=11.2] + rapids_build_setuptools: + common: + - output_types: [requirements, pyproject] + packages: + - &rapids_build_backend rapids-build-backend>=0.3.0,<0.4.0.dev0 + - setuptools + - wheel + build_py_cuvs: common: - output_types: [conda] diff --git a/python/cuvs_bench/pyproject.toml b/python/cuvs_bench/pyproject.toml index bf5372e558..9894dfc171 100644 --- a/python/cuvs_bench/pyproject.toml +++ b/python/cuvs_bench/pyproject.toml @@ -2,6 +2,9 @@ [build-system] requires = [ + "rapids-build-backend>=0.3.0,<0.4.0.dev0", + "setuptools", + "wheel", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project] @@ -26,11 +29,16 @@ classifiers = [ "Programming Language :: Python :: 3.11", ] +[tool.rapids-build-backend] +build-backend = "setuptools.build_meta" +dependencies-file = "../../dependencies.yaml" +matrix-entry = "cuda_suffixed=true" + [project.urls] -Homepage = "https://github.com/rapidsai/raft" +Homepage = "https://github.com/rapidsai/cuvs" -[tool.setuptools.package-data] -"*" = ["*.*", "VERSION"] +[tool.setuptools] +license-files = ["LICENSE"] [tool.isort] line_length = 79 From 585ad53b9f8aa49690655f166e463ee0326e385f Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 6 Aug 2024 18:10:12 -0500 Subject: [PATCH 06/18] FIX add missing algorithms.yaml --- .../cuvs_bench/cuvs_bench/config/__init__.py | 17 ++++++++ .../cuvs_bench/config/algorithms.yaml | 42 +++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 python/cuvs_bench/cuvs_bench/config/__init__.py create mode 100644 python/cuvs_bench/cuvs_bench/config/algorithms.yaml diff --git a/python/cuvs_bench/cuvs_bench/config/__init__.py b/python/cuvs_bench/cuvs_bench/config/__init__.py new file mode 100644 index 0000000000..7c04e3fd8c --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/config/__init__.py @@ -0,0 +1,17 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .algos.constraints import * diff --git a/python/cuvs_bench/cuvs_bench/config/algorithms.yaml b/python/cuvs_bench/cuvs_bench/config/algorithms.yaml new file mode 100644 index 0000000000..dc1127fbc5 --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/config/algorithms.yaml @@ -0,0 +1,42 @@ +faiss_gpu_flat: + executable: FAISS_GPU_FLAT_ANN_BENCH + requires_gpu: true +faiss_gpu_ivf_flat: + executable: FAISS_GPU_IVF_FLAT_ANN_BENCH + requires_gpu: true +faiss_gpu_ivf_pq: + executable: FAISS_GPU_IVF_PQ_ANN_BENCH + requires_gpu: true +faiss_gpu_ivf_sq: + executable: FAISS_GPU_IVF_PQ_ANN_BENCH + requires_gpu: true +faiss_cpu_flat: + executable: FAISS_CPU_FLAT_ANN_BENCH + requires_gpu: false +faiss_cpu_ivf_flat: + executable: FAISS_CPU_IVF_FLAT_ANN_BENCH + requires_gpu: false +faiss_cpu_ivf_pq: + executable: FAISS_CPU_IVF_PQ_ANN_BENCH + requires_gpu: false +cuvs_ivf_flat: + executable: CUVS_IVF_FLAT_ANN_BENCH + requires_gpu: true +cuvs_ivf_pq: + executable: CUVS_IVF_PQ_ANN_BENCH + requires_gpu: true +cuvs_cagra: + executable: CUVS_CAGRA_ANN_BENCH + requires_gpu: true +cuvs_brute_force: + executable: CUVS_BRUTE_FORCE_ANN_BENCH + requires_gpu: true +ggnn: + executable: GGNN_ANN_BENCH + requires_gpu: true +hnswlib: + executable: HNSWLIB_ANN_BENCH + requires_gpu: false +cuvs_cagra_hnswlib: + executable: CUVS_CAGRA_HNSWLIB_ANN_BENCH + requires_gpu: true From a400e4c5fc2264b18c4ffaa5e5e356857e78e142 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Wed, 21 Aug 2024 10:56:33 -0500 Subject: [PATCH 07/18] ENH refactor run code and add prompts to CLI --- python/cuvs_bench/cuvs_bench/run/__main__.py | 53 +- python/cuvs_bench/cuvs_bench/run/run.py | 731 +++++++++---------- 2 files changed, 386 insertions(+), 398 deletions(-) diff --git a/python/cuvs_bench/cuvs_bench/run/__main__.py b/python/cuvs_bench/cuvs_bench/run/__main__.py index fb269014e4..5590105b63 100644 --- a/python/cuvs_bench/cuvs_bench/run/__main__.py +++ b/python/cuvs_bench/cuvs_bench/run/__main__.py @@ -27,7 +27,8 @@ @click.option( "--subset-size", type=click.IntRange(min=1), - help="the number of subset rows of the dataset to build the index" + prompt='Enter the subset size', + help="The number of subset rows of the dataset to build the index" ) @click.option( "-k", @@ -35,39 +36,45 @@ default=10, show_default=True, type=click.IntRange(min=1), - help="the number of nearest neighbors to search for" + prompt='Enter the number of neighbors to search for', + help="The number of nearest neighbors to search for" ) @click.option( "-bs", "--batch-size", default=10000, show_default=True, type=click.IntRange(min=1), - help="number of query vectors to use in each query trial" + prompt='Enter the batch size', + help="Number of query vectors to use in each query trial" ) @click.option( "--dataset-configuration", - help="path to YAML configuration file for datasets" + prompt='Enter the path to YAML configuration file for datasets', + help="Path to YAML configuration file for datasets" ) @click.option( "--configuration", - help="path to YAML configuration file or directory for algorithms" + prompt='Enter the path to YAML configuration file or directory for algorithms', + help="Path to YAML configuration file or directory for algorithms. " "Any run groups found in the specified file/directory will " "automatically override groups of the same name present in the " - "default configurations, including `base`", + "default configurations, including `base`." ) @click.option( "--dataset", default="glove-100-inner", show_default=True, - help="name of dataset" + prompt='Enter the name of dataset', + help="Name of dataset" ) @click.option( "--dataset-path", default=lambda: os.environ.get("RAPIDS_DATASET_ROOT_DIR", os.path.join(Path(__file__).parent, "datasets/")), show_default=True, - help="path to dataset folder, by default will look in RAPIDS_DATASET_ROOT_DIR if defined, " - "otherwise a datasets subdirectory from the calling directory", + prompt='Enter the path to dataset folder', + help="Path to dataset folder, by default will look in RAPIDS_DATASET_ROOT_DIR if defined, " + "otherwise a datasets subdirectory from the calling directory." ) @click.option( "--build", @@ -83,56 +90,62 @@ "--algorithms", default=None, show_default=True, - help="run only comma separated list of named algorithms. If parameters `groups` and `algo-groups` are both undefined, " - "then group `base` is run by default", + prompt='Enter the comma separated list of named algorithms to run', + help="Run only comma separated list of named algorithms. If parameters `groups` and `algo-groups` are both undefined, " + "then group `base` is run by default." ) @click.option( "--groups", default="base", show_default=True, - help="run only comma separated groups of parameters" + prompt='Enter the comma separated groups of parameters', + help="Run only comma separated groups of parameters" ) @click.option( "--algo-groups", - help='add comma separated . to run. Example usage: "--algo-groups=raft_cagra.large,hnswlib.large"', + prompt='Enter the comma separated . to run', + help='Add comma separated . to run. Example usage: "--algo-groups=raft_cagra.large,hnswlib.large".', ) @click.option( "-f", "--force", is_flag=True, - help="re-run algorithms even if their results already exist" + help="Re-run algorithms even if their results already exist" ) @click.option( "-m", "--search-mode", default="latency", show_default=True, - help="run search in 'latency' (measure individual batches) or 'throughput' (pipeline batches and measure end-to-end) mode", + prompt='Enter the search mode ("latency" or "throughput")', + help="Run search in 'latency' (measure individual batches) or 'throughput' (pipeline batches and measure end-to-end) mode." ) @click.option( "-t", "--search-threads", default=None, show_default=True, - help="specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. " + prompt='Enter the number of threads to use for throughput benchmark', + help="Specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. " "Example: --search-threads=1:4. Power of 2 values between 'min' and 'max' will be used. " - "If only 'min' is specified, then a single test is run with 'min' threads. By default min=1, max=.", + "If only 'min' is specified, then a single test is run with 'min' threads. By default min=1, max=." ) @click.option( "-r", "--dry-run", is_flag=True, - help="dry-run mode will convert the yaml config for the specified algorithms and datasets to the json format that's consumed " + help="Dry-run mode will convert the yaml config for the specified algorithms and datasets to the json format that’s consumed " "by the lower-level c++ binaries and then print the command to run execute the benchmarks but will not actually execute " - "the command.", + "the command." ) @click.option( "--raft-log-level", default="info", show_default=True, + prompt='Enter the log level', help="Log level, possible values are [off, error, warn, info, debug, trace]. Default: 'info'. " "Note that 'debug' or more detailed logging level requires that the library is compiled with " - "-DRAFT_ACTIVE_LEVEL= where >= ", + "-DRAFT_ACTIVE_LEVEL= where >= ." ) def main( subset_size: Optional[int], diff --git a/python/cuvs_bench/cuvs_bench/run/run.py b/python/cuvs_bench/cuvs_bench/run/run.py index 3f3a25a2af..db9628b80c 100644 --- a/python/cuvs_bench/cuvs_bench/run/run.py +++ b/python/cuvs_bench/cuvs_bench/run/run.py @@ -16,387 +16,263 @@ import importlib import itertools -import json import os -import subprocess -import sys -import uuid import warnings import yaml - from importlib import import_module -from typing import Optional, Tuple, Union, Dict, Any - +from typing import Optional, Dict, Any, Tuple from .runners import cuvs_bench_cpp -def run_benchmark( - subset_size: int, - count: int, - batch_size: int, - dataset_configuration: Optional[str], - configuration: Optional[str], - dataset: str, - dataset_path: str, - build: Optional[bool], - search: Optional[bool], - algorithms: Optional[str], - groups: str, - algo_groups: Optional[str], - force: bool, - search_mode: str, - search_threads: int, - dry_run: bool, - raft_log_level: int -) -> None: +def load_yaml_file(file_path: str) -> dict: """ - Runs a benchmarking process based on the provided configurations. - + Load a YAML file and return its contents as a dictionary. Parameters ---------- - count : int - The number of iterations to run. - batch_size : int - The size of each batch for processing. - dataset_configuration : Optional[str] - Path to the dataset configuration file. - configuration : Optional[str] - Path to the algorithm configuration file or directory. - dataset : str - The name of the dataset to use. - dataset_path : str - The path to the dataset directory. - build : Optional[bool] - Whether to build the indices. - search : Optional[bool] - Whether to perform the search. - algorithms : Optional[str] - Comma-separated list of algorithm names to use. - groups : str - Comma-separated list of groups to consider. - algo_groups : Optional[str] - Comma-separated list of algorithm groups to consider. - force : bool - Whether to force the execution regardless of warnings. - search_mode : str - The mode of search to perform. - search_threads : int - The number of threads to use for searching. - dry_run : bool - Whether to perform a dry run without actual execution. - raft_log_level : int - The logging level for the RAFT library. - + file_path : str + The path to the YAML file. Returns ------- - None + dict + The contents of the YAML file. """ + with open(file_path, "r") as f: + return yaml.safe_load(f) - scripts_path = os.path.dirname(os.path.realpath(__file__)) - call_path = os.getcwd() - gpu_present = rmm_present() - - with open(os.path.join(scripts_path, "../config", "algorithms.yaml"), "r") as f: - algos_yaml = yaml.safe_load(f) - # If both build and search are not provided, - # run both - if not build and not search: - build = True - search = True - - # look for dataset configuration path, if not given then use the - # default location in cuvs_bench/conf - if dataset_configuration: - dataset_conf_f = dataset_configuration - else: - dataset_conf_f = os.path.join(scripts_path, "../config/datasets", "datasets.yaml") - with open(dataset_conf_f, "r") as f: - dataset_conf_all = yaml.safe_load(f) - - # load datasets configuration files - dataset_conf = None +def get_dataset_configuration(dataset: str, dataset_conf_all: list) -> dict: + """ + Retrieve the configuration for a specific dataset. + Parameters + ---------- + dataset : str + The name of the dataset to retrieve the configuration for. + dataset_conf_all : list + A list of dataset configurations. + Returns + ------- + dict + The configuration for the specified dataset. + Raises + ------ + ValueError + If the dataset configuration is not found. + """ for dset in dataset_conf_all: if dataset == dset["name"]: - dataset_conf = dset - break - if not dataset_conf: - raise ValueError("Could not find a dataset configuration") + return dset + raise ValueError("Could not find a dataset configuration") - conf_file = dict() - conf_file["dataset"] = dataset_conf + +def prepare_conf_file(dataset_conf: dict, subset_size: Optional[int], count: int, batch_size: int) -> dict: + """ + Prepare the main configuration file for the benchmark. + Parameters + ---------- + dataset_conf : dict + The configuration for the dataset. + subset_size : Optional[int] + The subset size of the dataset. + count : int + The number of nearest neighbors to search for. + batch_size : int + The size of each batch for processing. + Returns + ------- + dict + The prepared configuration file. + """ + conf_file = {"dataset": dataset_conf} if subset_size: conf_file["dataset"]["subset_size"] = subset_size + conf_file["search_basic_param"] = {"k": count, "batch_size": batch_size} + return conf_file - conf_file["search_basic_param"] = {} - conf_file["search_basic_param"]["k"] = count - conf_file["search_basic_param"]["batch_size"] = batch_size +def gather_algorithm_configs(scripts_path: str, configuration: Optional[str]) -> list: + """ + Gather the list of algorithm configuration files. + Parameters + ---------- + scripts_path : str + The path to the script directory. + configuration : Optional[str] + The path to the algorithm configuration directory or file. + Returns + ------- + list + A list of paths to the algorithm configuration files. + """ algos_conf_fs = os.listdir(os.path.join(scripts_path, "../config", "algos")) algos_conf_fs = [ os.path.join(scripts_path, "../config", "algos", f) for f in algos_conf_fs if ".json" not in f and "constraint" not in f and ".py" not in f ] - conf_filedir = os.path.join(scripts_path, "conf", "algos") + if configuration: if os.path.isdir(configuration): - conf_filedir = configuration - algos_conf_fs = algos_conf_fs + [ + algos_conf_fs += [ os.path.join(configuration, f) for f in os.listdir(configuration) if ".json" not in f ] elif os.path.isfile(configuration): - conf_filedir = os.path.normpath(configuration).split(os.sep) - conf_filedir = os.path.join(*conf_filedir[:-1]) - algos_conf_fs = algos_conf_fs + [configuration] - - filter_algos = True if algorithms else False - if filter_algos: - allowed_algos = algorithms.split(",") - named_groups = groups.split(",") - filter_algo_groups = True if algo_groups else False - allowed_algo_groups = None - if filter_algo_groups: - allowed_algo_groups = [ - algo_group.split(".") for algo_group in algo_groups.split(",") - ] - allowed_algo_groups = list(zip(*allowed_algo_groups)) - algos_conf = dict() + algos_conf_fs.append(configuration) + return algos_conf_fs + + +def load_algorithms_conf(algos_conf_fs: list, allowed_algos: Optional[list], allowed_algo_groups: Optional[tuple]) -> dict: + """ + Load and filter the algorithm configurations. + Parameters + ---------- + algos_conf_fs : list + A list of paths to algorithm configuration files. + allowed_algos : Optional[list] + A list of allowed algorithm names to filter by. + allowed_algo_groups : Optional[tuple] + A tuple of allowed algorithm groups to filter by. + Returns + ------- + dict + A dictionary containing the loaded and filtered algorithm configurations. + """ + algos_conf = {} for algo_f in algos_conf_fs: - with open(algo_f, "r") as f: - try: - algo = yaml.safe_load(f) - except Exception as e: - warnings.warn( - f"Could not load YAML config {algo_f} due to " - + e.with_traceback() - ) - continue - insert_algo = True - insert_algo_group = False - if filter_algos: - if algo["name"] not in allowed_algos: - insert_algo = False - if filter_algo_groups: - if algo["name"] in allowed_algo_groups[0]: - insert_algo_group = True - - def add_algo_group(group_list): - if algo["name"] not in algos_conf: - algos_conf[algo["name"]] = {"groups": {}} - for group in algo["groups"].keys(): - if group in group_list: - algos_conf[algo["name"]]["groups"][group] = algo[ - "groups" - ][group] - if "constraints" in algo: - algos_conf[algo["name"]]["constraints"] = algo[ - "constraints" - ] - - if insert_algo: - add_algo_group(named_groups) - if insert_algo_group: - add_algo_group(allowed_algo_groups[1]) - - executables_to_run = dict() - for algo in algos_conf.keys(): - validate_algorithm(algos_yaml, algo, gpu_present) - for group in algos_conf[algo]["groups"].keys(): - executable = find_executable( - algos_yaml, algo, group, count, batch_size - ) - if executable not in executables_to_run: - executables_to_run[executable] = {"index": []} - build_params = algos_conf[algo]["groups"][group]["build"] or {} - search_params = algos_conf[algo]["groups"][group]["search"] or {} - - param_names = [] - param_lists = [] - for param in build_params.keys(): - param_names.append(param) - param_lists.append(build_params[param]) - - all_build_params = itertools.product(*param_lists) - - search_param_names = [] - search_param_lists = [] - for search_param in search_params.keys(): - search_param_names.append(search_param) - search_param_lists.append(search_params[search_param]) - - for params in all_build_params: - index = {"algo": algo, "build_param": {}} - if group != "base": - index_name = f"{algo}_{group}" - else: - index_name = f"{algo}" - for i in range(len(params)): - index["build_param"][param_names[i]] = params[i] - index_name += "." + f"{param_names[i]}{params[i]}" - - if "constraints" in algos_conf[algo]: - if "build" in algos_conf[algo]["constraints"]: - importable = algos_conf[algo]["constraints"]["build"] - importable = importable.split(".") - module = ".".join(importable[:-1]) - func = importable[-1] - validator = import_module(module) - build_constraints = getattr(validator, func) - if "dims" not in conf_file["dataset"]: - raise ValueError( - "`dims` needed for build constraints but not " - "specified in datasets.yaml" - ) - if not build_constraints( - index["build_param"], conf_file["dataset"]["dims"] - ): - continue - index_filename = ( - index_name - if len(index_name) < 128 - else str(hash(index_name)) - ) - index["name"] = index_name - index["file"] = os.path.join( - dataset_path, dataset, "index", index_filename - ) - index["search_params"] = [] - all_search_params = itertools.product(*search_param_lists) - for search_params in all_search_params: - search_dict = dict() - for i in range(len(search_params)): - search_dict[search_param_names[i]] = search_params[i] - if "constraints" in algos_conf[algo]: - if "search" in algos_conf[algo]["constraints"]: - importable = algos_conf[algo]["constraints"][ - "search" - ] - importable = importable.split(".") - module = ".".join(importable[:-1]) - func = importable[-1] - validator = import_module(module) - search_constraints = getattr(validator, func) - if search_constraints( - search_dict, - index["build_param"], - count, - batch_size, - ): - index["search_params"].append(search_dict) - else: - index["search_params"].append(search_dict) - executables_to_run[executable]["index"].append(index) - - if len(index["search_params"]) == 0: - print("No search parameters were added to configuration") - executable = find_executable( - algos_yaml, algo, group, count, batch_size + try: + algo = load_yaml_file(algo_f) + except Exception as e: + warnings.warn(f"Could not load YAML config {algo_f} due to {e}") + continue + if allowed_algos and algo["name"] not in allowed_algos: + continue + algos_conf[algo["name"]] = {"groups": algo.get("groups", {}), "constraints": algo.get("constraints", {})} + if allowed_algo_groups and algo["name"] in allowed_algo_groups[0]: + algos_conf[algo["name"]]["groups"].update( + {group: algo["groups"][group] for group in allowed_algo_groups[1] if group in algo["groups"]} ) + return algos_conf + + +def prepare_executables(algos_conf: dict, algos_yaml: dict, gpu_present: bool, conf_file: dict, dataset_path: str, dataset: str, count: int, batch_size: int) -> dict: + """ + Prepare the list of executables to run based on the configurations. + Parameters + ---------- + algos_conf : dict + The loaded algorithm configurations. + algos_yaml : dict + The global algorithms configuration. + gpu_present : bool + Whether a GPU is present. + conf_file : dict + The main configuration file. + dataset_path : str + The path to the dataset directory. + dataset : str + The name of the dataset. + count : int + The number of nearest neighbors to search for. + batch_size : int + The size of each batch for processing. + Returns + ------- + dict + A dictionary of executables to run with their associated configurations. + """ + executables_to_run = {} + for algo, algo_conf in algos_conf.items(): + validate_algorithm(algos_yaml, algo, gpu_present) + for group, group_conf in algo_conf["groups"].items(): + executable = find_executable(algos_yaml, algo, group, count, batch_size) if executable not in executables_to_run: executables_to_run[executable] = {"index": []} - build_params = algos_conf[algo]["groups"][group]["build"] or {} - search_params = algos_conf[algo]["groups"][group]["search"] or {} - - param_names = [] - param_lists = [] - for param in build_params.keys(): - param_names.append(param) - param_lists.append(build_params[param]) - - all_build_params = itertools.product(*param_lists) - - search_param_names = [] - search_param_lists = [] - for search_param in search_params.keys(): - search_param_names.append(search_param) - search_param_lists.append(search_params[search_param]) - - for params in all_build_params: - index = {"algo": algo, "build_param": {}} - if group != "base": - index_name = f"{algo}_{group}" - else: - index_name = f"{algo}" - for i in range(len(params)): - index["build_param"][param_names[i]] = params[i] - index_name += "." + f"{param_names[i]}{params[i]}" - - if "constraints" in algos_conf[algo]: - if "build" in algos_conf[algo]["constraints"]: - importable = algos_conf[algo]["constraints"]["build"] - importable = importable.split(".") - module = ".".join(importable[:-1]) - func = importable[-1] - validator = import_module(module) - build_constraints = getattr(validator, func) - if "dims" not in conf_file["dataset"]: - raise ValueError( - "`dims` needed for build constraints but not " - "specified in datasets.yaml" - ) - if not build_constraints( - index["build_param"], conf_file["dataset"]["dims"] - ): - continue - index_filename = ( - index_name - if len(index_name) < 128 - else str(hash(index_name)) - ) - index["name"] = index_name - index["file"] = os.path.join( - dataset_path, dataset, "index", index_filename - ) - index["search_params"] = [] - all_search_params = itertools.product(*search_param_lists) - for search_params in all_search_params: - search_dict = dict() - for i in range(len(search_params)): - search_dict[search_param_names[i]] = search_params[i] - # if "constraints" in algos_conf[algo]: - # todo: refactor common code - if False: - if "search" in algos_conf[algo]["constraints"]: - if validate_constraints(algos_conf, - algo, - "search", - search_dict, - index["build_param"], - count, - batch_size): - index["search_params"].append(search_dict) - else: - index["search_params"].append(search_dict) - executables_to_run[executable]["index"].append(index) - - if len(index["search_params"]) == 0: - print("No search parameters were added to configuration") + indexes = prepare_indexes(group_conf, algo, group, conf_file, dataset_path, dataset, count, batch_size) + executables_to_run[executable]["index"].extend(indexes) + return executables_to_run - cuvs_bench_cpp( - conf_file, - f"{dataset}", - conf_filedir, - executables_to_run, - dataset_path, - force, - build, - search, - dry_run, - count, - batch_size, - search_threads, - search_mode, - raft_log_level, - ) + +def prepare_indexes(group_conf: dict, algo: str, group: str, conf_file: dict, dataset_path: str, dataset: str, count: int, batch_size: int) -> list: + """ + Prepare the index configurations for the given algorithm and group. + Parameters + ---------- + group_conf : dict + The configuration for the algorithm group. + algo : str + The name of the algorithm. + group : str + The name of the group. + conf_file : dict + The main configuration file. + dataset_path : str + The path to the dataset directory. + dataset : str + The name of the dataset. + count : int + The number of nearest neighbors to search for. + batch_size : int + The size of each batch for processing. + Returns + ------- + list + A list of index configurations. + """ + indexes = [] + build_params = group_conf.get("build", {}) + search_params = group_conf.get("search", {}) + all_build_params = itertools.product(*build_params.values()) + search_param_names, search_param_lists = zip(*search_params.items()) if search_params else ([], []) + for params in all_build_params: + index = {"algo": algo, "build_param": dict(zip(build_params.keys(), params))} + index_name = f"{algo}_{group}" if group != "base" else f"{algo}" + index_filename = index_name if len(index_name) < 128 else str(hash(index_name)) + index["name"] = index_name + index["file"] = os.path.join(dataset_path, dataset, "index", index_filename) + index["search_params"] = validate_search_params( + itertools.product(*search_param_lists), search_param_names, algo, group_conf, conf_file, count, batch_size + ) + if index["search_params"]: + indexes.append(index) + return indexes + + +def validate_search_params(all_search_params, search_param_names, algo, group_conf, conf_file, count, batch_size) -> list: + """ + Validate and prepare the search parameters for the given algorithm and group. + Parameters + ---------- + all_search_params : itertools.product + The Cartesian product of search parameter values. + search_param_names : list + The names of the search parameters. + algo : str + The name of the algorithm. + group_conf : dict + The configuration for the algorithm group. + conf_file : dict + The main configuration file. + count : int + The number of nearest neighbors to search for. + batch_size : int + The size of each batch for processing. + Returns + ------- + list + A list of validated search parameters. + """ + search_params_list = [] + for search_params in all_search_params: + search_dict = dict(zip(search_param_names, search_params)) + if validate_constraints(group_conf, algo, "search", search_dict, conf_file["dataset"].get("dims"), count, batch_size): + search_params_list.append(search_dict) + return search_params_list def rmm_present() -> bool: """ - Check if RMM is present. - + Check if RMM (RAPIDS Memory Manager) is present. Returns ------- bool @@ -412,7 +288,6 @@ def rmm_present() -> bool: def find_executable(algos_conf: dict, algo: str, group: str, k: int, batch_size: int) -> Tuple[str, str, Tuple[str, str]]: """ Find the executable for the given algorithm and group. - Parameters ---------- algos_conf : dict @@ -425,7 +300,6 @@ def find_executable(algos_conf: dict, algo: str, group: str, k: int, batch_size: The number of nearest neighbors to search for. batch_size : int The size of each batch for processing. - Returns ------- Tuple[str, str, Tuple[str, str]] @@ -433,46 +307,50 @@ def find_executable(algos_conf: dict, algo: str, group: str, k: int, batch_size: """ executable = algos_conf[algo]["executable"] file_name = (f"{algo},{group}", f"{algo},{group},k{k},bs{batch_size}") - - # Check for devcontainer build - scripts_path = os.path.dirname(os.path.realpath(__file__)) - build_path = "/home/coder/cuvs/cpp/build/latest/bench/ann" - print(f"build_path: {build_path}") - if os.path.exists(build_path): - print(f"-- Detected devcontainer artifacts in {build_path}. ") + build_path = get_build_path(executable) + if build_path: return executable, build_path, file_name + raise FileNotFoundError(executable) + + +def get_build_path(executable: str) -> Optional[str]: + """ + Get the build path for the given executable. + Parameters + ---------- + executable : str + The name of the executable. + Returns + ------- + Optional[str] + The build path for the executable, if found. + """ + + devcontainer_path = "/home/coder/cuvs/cpp/build/latest/bench/ann" + if os.path.exists(devcontainer_path): + print(f"-- Detected devcontainer artifacts in {devcontainer_path}.") + return devcontainer_path build_path = os.getenv("CUVS_HOME") - if build_path is not None: + if build_path: build_path = os.path.join(build_path, "cpp", "build", "release", executable) if os.path.exists(build_path): - print(f"-- Using RAFT bench from repository in {build_path}. ") - return executable, build_path, file_name - - # # todo: better path detection for devcontainer - # build_path = os.getenv("CUVS_BENCH_BUILD_PATH") - # print("build_path: ", build_path) - # if build_path is not None: - # if os.path.exists(build_path): - # print(f"-- Using devcontainer location from {build_path}. ") - # return executable, build_path, file_name + print(f"-- Using RAFT bench from repository in {build_path}.") + return build_path conda_path = os.getenv("CONDA_PREFIX") - if conda_path is not None: - conda_path = os.path.join(conda_path, "bin", "ann", executable) - if os.path.exists(conda_path): - print("-- Using cuVS bench found in conda environment. ") - return executable, conda_path, file_name - else: - raise FileNotFoundError(executable) - else: - raise FileNotFoundError(executable) + if conda_path: + conda_executable = os.path.join(conda_path, "bin", "ann", executable) + if os.path.exists(conda_executable): + print("-- Using cuVS bench found in conda environment.") + return conda_executable + + return None def validate_algorithm(algos_conf: dict, algo: str, gpu_present: bool) -> bool: """ - Validate algorithm and whether it requires gpu. . - + Validate the algorithm based on the available hardware (GPU presence). Parameters ---------- algos_conf : dict @@ -481,7 +359,6 @@ def validate_algorithm(algos_conf: dict, algo: str, gpu_present: bool) -> bool: The name of the algorithm. gpu_present : bool Whether a GPU is present. - Returns ------- bool @@ -490,8 +367,7 @@ def validate_algorithm(algos_conf: dict, algo: str, gpu_present: bool) -> bool: algos_conf_keys = set(algos_conf.keys()) if gpu_present: return algo in algos_conf_keys - else: - return algo in algos_conf_keys and algos_conf[algo]["requires_gpu"] is False + return algo in algos_conf_keys and algos_conf[algo]["requires_gpu"] is False def validate_constraints( @@ -505,7 +381,6 @@ def validate_constraints( ) -> bool: """ Validate the constraints for the given algorithm and constraint type. - Parameters ---------- algos_conf : Dict[str, Any] @@ -518,21 +393,121 @@ def validate_constraints( The parameters to validate against the constraints. dims : Any The dimensions required for the constraints. - + k : Optional[int] + The number of nearest neighbors to search for. + batch_size : Optional[int] + The size of each batch for processing. Returns ------- bool True if the constraints are valid, False otherwise. + Raises + ------ + ValueError + If `dims` are needed for build constraints but not specified in the dataset configuration. """ if constraint_type in algos_conf[algo]["constraints"]: importable = algos_conf[algo]["constraints"][constraint_type] - importable = importable.split(".") - module = ".".join(importable[:-1]) - func = importable[-1] - print(f"module: {module}") - validator = importlib.import_module(module) + module, func = ".".join(importable.split(".")[:-1]), importable.split(".")[-1] + validator = import_module(module) constraints_func = getattr(validator, func) if constraint_type == "build" and "dims" not in conf_file["dataset"]: raise ValueError("`dims` needed for build constraints but not specified in datasets.yaml") return constraints_func(param, dims) return True + + +def run_benchmark( + subset_size: int, + count: int, + batch_size: int, + dataset_configuration: Optional[str], + configuration: Optional[str], + dataset: str, + dataset_path: str, + build: Optional[bool], + search: Optional[bool], + algorithms: Optional[str], + groups: str, + algo_groups: Optional[str], + force: bool, + search_mode: str, + search_threads: int, + dry_run: bool, + raft_log_level: int +) -> None: + """ + Runs a benchmarking process based on the provided configurations. + Parameters + ---------- + subset_size : int + The subset size of the dataset. + count : int + The number of nearest neighbors to search for. + batch_size : int + The size of each batch for processing. + dataset_configuration : Optional[str] + Path to the dataset configuration file. + configuration : Optional[str] + Path to the algorithm configuration directory or file. + dataset : str + The name of the dataset to use. + dataset_path : str + The path to the dataset directory. + build : Optional[bool] + Whether to build the indices. + search : Optional[bool] + Whether to perform the search. + algorithms : Optional[str] + Comma-separated list of algorithm names to use. + groups : str + Comma-separated list of groups to consider. + algo_groups : Optional[str] + Comma-separated list of algorithm groups to consider. + force : bool + Whether to force the execution regardless of warnings. + search_mode : str + The mode of search to perform. + search_threads : int + The number of threads to use for searching. + dry_run : bool + Whether to perform a dry run without actual execution. + raft_log_level : int + The logging level for the RAFT library. + Returns + ------- + None + """ + scripts_path = os.path.dirname(os.path.realpath(__file__)) + gpu_present = rmm_present() + + if not build and not search: + build, search = True, True + + dataset_conf_all = load_yaml_file(dataset_configuration or os.path.join(scripts_path, "../config/datasets", "datasets.yaml")) + dataset_conf = get_dataset_configuration(dataset, dataset_conf_all) + conf_file = prepare_conf_file(dataset_conf, subset_size, count, batch_size) + algos_conf_fs = gather_algorithm_configs(scripts_path, configuration) + + allowed_algos = algorithms.split(",") if algorithms else None + allowed_algo_groups = [algo_group.split(".") for algo_group in algo_groups.split(",")] if algo_groups else None + algos_conf = load_algorithms_conf(algos_conf_fs, allowed_algos, list(zip(*allowed_algo_groups)) if allowed_algo_groups else None) + + executables_to_run = prepare_executables(algos_conf, load_yaml_file(os.path.join(scripts_path, "../config", "algorithms.yaml")), gpu_present, conf_file, dataset_path, dataset, count, batch_size) + + cuvs_bench_cpp( + conf_file, + dataset, + os.path.dirname(configuration) if configuration and os.path.isfile(configuration) else os.path.join(scripts_path, "conf", "algos"), + executables_to_run, + dataset_path, + force, + build, + search, + dry_run, + count, + batch_size, + search_threads, + search_mode, + raft_log_level, + ) From 0e36e2184c95144c38059df1f51daf475e9a8a05 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Wed, 21 Aug 2024 10:57:23 -0500 Subject: [PATCH 08/18] FEA Add first version of pytests --- .../cuvs_bench/cuvs_bench/tests/test_run.py | 205 ++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 python/cuvs_bench/cuvs_bench/tests/test_run.py diff --git a/python/cuvs_bench/cuvs_bench/tests/test_run.py b/python/cuvs_bench/cuvs_bench/tests/test_run.py new file mode 100644 index 0000000000..41ed5316b5 --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/tests/test_run.py @@ -0,0 +1,205 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import os +import pytest + +from unittest.mock import patch, mock_open, MagicMock +from benchmark import ( + load_yaml_file, + get_dataset_configuration, + prepare_conf_file, + gather_algorithm_configs, + load_algorithms_conf, + prepare_executables, + prepare_indexes, + validate_search_params, + rmm_present, + find_executable, + validate_algorithm, + validate_constraints, +) + + +def test_load_yaml_file(): + yaml_content = """ + key: value + """ + with patch("builtins.open", mock_open(read_data=yaml_content)): + result = load_yaml_file("dummy_path.yaml") + assert result == {"key": "value"} + + +def test_get_dataset_configuration(): + dataset_conf_all = [{"name": "dataset1"}, {"name": "dataset2"}] + result = get_dataset_configuration("dataset1", dataset_conf_all) + assert result == {"name": "dataset1"} + with pytest.raises(ValueError): + get_dataset_configuration("non_existent_dataset", dataset_conf_all) + + +def test_prepare_conf_file(): + dataset_conf = {"name": "dataset1"} + result = prepare_conf_file(dataset_conf, 1000, 10, 128) + expected_result = { + "dataset": {"name": "dataset1", "subset_size": 1000}, + "search_basic_param": {"k": 10, "batch_size": 128}, + } + assert result == expected_result + result_no_subset = prepare_conf_file(dataset_conf, None, 10, 128) + assert result_no_subset["dataset"].get("subset_size") is None + + +def test_gather_algorithm_configs(tmpdir): + scripts_path = tmpdir.mkdir("scripts") + algos_path = scripts_path.mkdir("algos") + algos_path.join("algo1.yaml").write("key: value") + algos_path.join("algo2.yaml").write("key: value") + result = gather_algorithm_configs(str(scripts_path), None) + assert len(result) == 2 + + + custom_conf_dir = tmpdir.mkdir("custom_conf") + custom_conf_dir.join("custom_algo.yaml").write("key: value") + result = gather_algorithm_configs(str(scripts_path), str(custom_conf_dir)) + assert len(result) == 3 + + + custom_conf_file = custom_conf_dir.join("custom_algo_file.yaml") + custom_conf_file.write("key: value") + result = gather_algorithm_configs(str(scripts_path), str(custom_conf_file)) + assert len(result) == 4 + + +def test_load_algorithms_conf(): + algos_conf_fs = ["path/to/algo1.yaml", "path/to/algo2.yaml"] + yaml_content = """ + name: algo1 + groups: + group1: {} + """ + with patch("builtins.open", mock_open(read_data=yaml_content)): + result = load_algorithms_conf(algos_conf_fs, None, None) + assert "algo1" in result + + + with patch("builtins.open", mock_open(read_data=yaml_content)): + result = load_algorithms_conf(algos_conf_fs, ["algo1"], None) + assert "algo1" in result + result = load_algorithms_conf(algos_conf_fs, ["algo2"], None) + assert "algo1" not in result + + +@patch("benchmark.find_executable", return_value=("executable", "path", "filename")) +@patch("benchmark.validate_algorithm", return_value=True) +@patch("benchmark.prepare_indexes", return_value=[{"index_key": "index_value"}]) +def test_prepare_executables(mock_prepare_indexes, mock_validate_algorithm, mock_find_executable): + algos_conf = { + "algo1": { + "groups": { + "group1": { + "build": {}, + "search": {} + } + } + } + } + algos_yaml = {"algo1": {}} + gpu_present = True + conf_file = {} + dataset_path = "dataset_path" + dataset = "dataset" + count = 10 + batch_size = 128 + result = prepare_executables(algos_conf, algos_yaml, gpu_present, conf_file, dataset_path, dataset, count, batch_size) + assert "executable" in result + assert len(result["executable"]["index"]) == 1 + + +def test_prepare_indexes(): + group_conf = { + "build": {"param1": [1, 2]}, + "search": {"param2": [3, 4]} + } + conf_file = {"dataset": {"dims": 128}} + result = prepare_indexes(group_conf, "algo", "group", conf_file, "dataset_path", "dataset", 10, 128) + assert len(result) == 2 + assert "param1" in result[0]["build_param"] + + +def test_validate_search_params(): + all_search_params = itertools.product([1, 2], [3, 4]) + search_param_names = ["param1", "param2"] + group_conf = {} + conf_file = {"dataset": {"dims": 128}} + result = validate_search_params(all_search_params, search_param_names, "algo", group_conf, conf_file, 10, 128) + assert len(result) == 4 + + +def test_rmm_present(): + with patch.dict("sys.modules", {"rmm": MagicMock()}): + assert rmm_present() is True + with patch.dict("sys.modules", {"rmm": None}): + assert rmm_present() is False + + +@patch("benchmark.get_build_path", return_value="build_path") +def test_find_executable(mock_get_build_path): + algos_conf = {"algo1": {"executable": "executable1"}} + result = find_executable(algos_conf, "algo1", "group1", 10, 128) + assert result == ("executable1", "build_path", ("algo1,group1", "algo1,group1,k10,bs128")) + mock_get_build_path.return_value = None + with pytest.raises(FileNotFoundError): + find_executable(algos_conf, "algo1", "group1", 10, 128) + + +def test_validate_algorithm(): + algos_conf = {"algo1": {"requires_gpu": False}} + result = validate_algorithm(algos_conf, "algo1", gpu_present=True) + assert result is True + result = validate_algorithm(algos_conf, "algo1", gpu_present=False) + assert result is True + algos_conf["algo1"]["requires_gpu"] = True + result = validate_algorithm(algos_conf, "algo1", gpu_present=False) + assert result is False + + +@patch("benchmark.import_module") +def test_validate_constraints(mock_import_module): + mock_validator = MagicMock() + mock_import_module.return_value = mock_validator + mock_validator.constraint_func.return_value = True + algos_conf = { + "algo1": { + "constraints": { + "build": "module.constraint_func" + } + } + } + result = validate_constraints(algos_conf, "algo1", "build", {"param1": "value1"}, 128, None, None) + assert result is True + + + algos_conf = {"algo1": {"constraints": {}}} + result = validate_constraints(algos_conf, "algo1", "build", {"param1": "value1"}, 128, None, None) + assert result is True + + + mock_validator.constraint_func.return_value = False + algos_conf["algo1"]["constraints"]["build"] = "module.constraint_func" + result = validate_constraints(algos_conf, "algo1", "build", {"param1": "value1"}, 128, None, None) + assert result is False From 5e36a13be1d7129b42fb50dc5a2d691e54e78430 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 17 Sep 2024 13:03:53 -0500 Subject: [PATCH 09/18] Merge dev branch and branch-24.10 --- .github/workflows/build.yaml | 14 +- .github/workflows/pr.yaml | 26 +- .github/workflows/test.yaml | 10 +- .gitignore | 5 + .pre-commit-config.yaml | 2 +- ci/build_wheel.sh | 18 +- ci/build_wheel_cuvs.sh | 11 +- ci/release/update-version.sh | 1 + .../all_cuda-118_arch-aarch64.yaml | 4 +- .../all_cuda-118_arch-x86_64.yaml | 4 +- .../all_cuda-125_arch-aarch64.yaml | 4 +- .../all_cuda-125_arch-x86_64.yaml | 4 +- .../bench_ann_cuda-118_arch-aarch64.yaml | 4 +- .../bench_ann_cuda-118_arch-x86_64.yaml | 4 +- .../bench_ann_cuda-125_arch-aarch64.yaml | 4 +- .../bench_ann_cuda-125_arch-x86_64.yaml | 4 +- conda/recipes/cuvs/meta.yaml | 8 +- conda/recipes/libcuvs/meta.yaml | 33 +- cpp/CMakeLists.txt | 22 +- cpp/bench/ann/CMakeLists.txt | 18 + .../ann/src/cuvs/cuvs_brute_force_knn.cu | 333 +++++ cpp/include/cuvs/cluster/kmeans.hpp | 1 + cpp/include/cuvs/core/c_api.h | 21 + cpp/include/cuvs/distance/distance.hpp | 87 ++ cpp/include/cuvs/neighbors/brute_force.hpp | 139 +- cpp/include/cuvs/neighbors/cagra.h | 35 +- cpp/include/cuvs/neighbors/cagra.hpp | 12 + cpp/include/cuvs/neighbors/hnsw.h | 207 +++ cpp/include/cuvs/neighbors/hnsw.hpp | 77 +- cpp/include/cuvs/neighbors/ivf_flat.hpp | 72 ++ cpp/include/cuvs/stats/silhouette_score.hpp | 121 ++ .../cuvs/stats/trustworthiness_score.hpp | 51 + cpp/src/cluster/detail/kmeans_balanced.cuh | 115 +- cpp/src/cluster/kmeans_balanced.cuh | 47 +- cpp/src/core/c_api.cpp | 29 + cpp/src/distance/detail/distance.cuh | 84 +- .../distance/detail/distance_ops/canberra.cuh | 26 +- .../detail/distance_ops/correlation.cuh | 32 +- .../distance/detail/distance_ops/cosine.cuh | 27 +- .../distance/detail/distance_ops/hamming.cuh | 6 +- .../detail/distance_ops/hellinger.cuh | 6 +- .../detail/distance_ops/jensen_shannon.cuh | 14 +- .../detail/distance_ops/kl_divergence.cuh | 26 +- cpp/src/distance/detail/distance_ops/l1.cuh | 9 +- .../distance/detail/distance_ops/l2_exp.cuh | 28 +- .../distance/detail/distance_ops/l2_unexp.cuh | 15 +- .../distance/detail/distance_ops/l_inf.cuh | 4 +- .../distance/detail/distance_ops/lp_unexp.cuh | 10 +- .../detail/distance_ops/russel_rao.cuh | 9 +- .../distance/detail/distance_ops/template.cuh | 4 +- .../distance/detail/masked_distance_base.cuh | 2 +- .../detail/pairwise_distance_base.cuh | 26 +- .../detail/pairwise_distance_cutlass_base.cuh | 30 +- .../distance/detail/pairwise_distance_gemm.h | 103 +- .../detail/pairwise_matrix/dispatch-ext.cuh | 174 +-- .../detail/pairwise_matrix/dispatch-inl.cuh | 12 +- .../detail/pairwise_matrix/dispatch.cuh | 2 - .../pairwise_matrix/dispatch_00_generate.py | 10 +- ...patch_canberra_double_double_double_int.cu | 4 +- ...dispatch_canberra_float_float_float_int.cu | 4 +- .../dispatch_canberra_half_float_float_int.cu | 50 + ...ch_correlation_double_double_double_int.cu | 4 +- ...patch_correlation_float_float_float_int.cu | 4 +- ...spatch_correlation_half_float_float_int.cu | 50 + ...ispatch_cosine_double_double_double_int.cu | 4 +- .../dispatch_cosine_float_float_float_int.cu | 4 +- .../dispatch_cosine_half_float_float_int.cu | 51 + ...ing_unexpanded_double_double_double_int.cu | 4 +- ...amming_unexpanded_float_float_float_int.cu | 4 +- ...hamming_unexpanded_half_float_float_int.cu | 50 + ...inger_expanded_double_double_double_int.cu | 4 +- ...ellinger_expanded_float_float_float_int.cu | 4 +- ...hellinger_expanded_half_float_float_int.cu | 50 + ...jensen_shannon_double_double_double_int.cu | 4 +- ...ch_jensen_shannon_float_float_float_int.cu | 4 +- ...tch_jensen_shannon_half_float_float_int.cu | 55 + ..._kl_divergence_double_double_double_int.cu | 4 +- ...tch_kl_divergence_float_float_float_int.cu | 4 +- ...atch_kl_divergence_half_float_float_int.cu | 50 + .../dispatch_l1_double_double_double_int.cu | 4 +- .../dispatch_l1_float_float_float_int.cu | 4 +- .../dispatch_l1_half_float_float_int.cu | 50 + ...ch_l2_expanded_double_double_double_int.cu | 4 +- ...patch_l2_expanded_float_float_float_int.cu | 4 +- ...spatch_l2_expanded_half_float_float_int.cu | 51 + ..._l2_unexpanded_double_double_double_int.cu | 4 +- ...tch_l2_unexpanded_float_float_float_int.cu | 4 +- ...atch_l2_unexpanded_half_float_float_int.cu | 50 + ...dispatch_l_inf_double_double_double_int.cu | 4 +- .../dispatch_l_inf_float_float_float_int.cu | 4 +- .../dispatch_l_inf_half_float_float_int.cu | 50 + ..._lp_unexpanded_double_double_double_int.cu | 4 +- ...tch_lp_unexpanded_float_float_float_int.cu | 4 +- ...atch_lp_unexpanded_half_float_float_int.cu | 50 + .../detail/pairwise_matrix/dispatch_rbf.cu | 12 +- ...tch_russel_rao_double_double_double_int.cu | 4 +- ...spatch_russel_rao_float_float_float_int.cu | 4 +- ...ispatch_russel_rao_half_float_float_int.cu | 50 + .../detail/pairwise_matrix/params.cuh | 4 +- cpp/src/distance/distance-ext.cuh | 1122 ++++------------- cpp/src/distance/distance-inl.cuh | 48 +- cpp/src/distance/distance.cu | 1073 +++------------- cpp/src/distance/distance.cuh | 2 - cpp/src/distance/pairwise_distance.cu | 36 + cpp/src/neighbors/brute_force.cu | 105 +- cpp/src/neighbors/cagra_c.cpp | 25 + cpp/src/neighbors/cagra_optimize.cu | 10 +- .../cagra/search_multi_cta_kernel-ext.cuh | 4 +- .../detail/cagra/search_multi_cta_kernel.cuh | 2 +- .../cagra/search_single_cta_kernel-ext.cuh | 4 +- .../detail/cagra/search_single_cta_kernel.cuh | 2 +- .../neighbors/detail/faiss_distance_utils.h | 42 +- cpp/src/neighbors/detail/fused_l2_knn.cuh | 179 +-- .../neighbors/detail/haversine_distance.cuh | 52 +- cpp/src/neighbors/detail/knn_brute_force.cuh | 247 ++-- cpp/src/neighbors/detail/knn_utils.cuh | 37 +- cpp/src/neighbors/hnsw_c.cpp | 166 +++ cpp/src/neighbors/ivf_common.cuh | 1 + cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh | 45 +- .../ivf_flat/ivf_flat_build_float_int64_t.cu | 69 - .../ivf_flat/ivf_flat_build_int8_t_int64_t.cu | 69 - .../ivf_flat_build_uint8_t_int64_t.cu | 69 - .../ivf_flat/ivf_flat_extend_float_int64_t.cu | 71 -- .../ivf_flat_extend_int8_t_int64_t.cu | 71 -- .../ivf_flat_extend_uint8_t_int64_t.cu | 71 -- .../ivf_flat/ivf_flat_interleaved_scan.cuh | 263 +++- .../neighbors/ivf_flat/ivf_flat_search.cuh | 32 +- cpp/src/neighbors/ivf_flat_c.cpp | 2 +- cpp/src/neighbors/ivf_flat_index.cpp | 1 + cpp/src/neighbors/ivf_pq_c.cpp | 2 +- .../stats/detail/batched/silhouette_score.cuh | 285 +++++ cpp/src/stats/detail/silhouette_score.cuh | 328 +++++ .../stats/detail/trustworthiness_score.cuh | 220 ++++ cpp/src/stats/silhouette_score.cu | 138 ++ cpp/src/stats/trustworthiness_score.cu | 48 + cpp/test/CMakeLists.txt | 37 +- cpp/test/core/c_api.c | 27 + cpp/test/distance/dist_canberra.cu | 25 +- cpp/test/distance/dist_correlation.cu | 49 +- cpp/test/distance/dist_cos.cu | 69 +- cpp/test/distance/dist_hamming.cu | 24 +- cpp/test/distance/dist_hellinger.cu | 24 +- cpp/test/distance/dist_inner_product.cu | 25 +- cpp/test/distance/dist_jensen_shannon.cu | 24 +- cpp/test/distance/dist_kl_divergence.cu | 24 +- cpp/test/distance/dist_l1.cu | 25 +- cpp/test/distance/dist_l2_exp.cu | 72 +- cpp/test/distance/dist_l2_sqrt_exp.cu | 27 +- cpp/test/distance/dist_l2_unexp.cu | 24 +- cpp/test/distance/dist_l_inf.cu | 25 +- cpp/test/distance/dist_lp_unexp.cu | 26 +- cpp/test/distance/dist_russell_rao.cu | 24 +- cpp/test/distance/distance_base.cuh | 347 ++--- cpp/test/neighbors/ann_brute_force.cuh | 200 +++ .../neighbors/ann_brute_force/test_float.cu | 28 + .../neighbors/ann_brute_force/test_half.cu | 30 + cpp/test/neighbors/ann_hnsw_c.cu | 131 ++ cpp/test/neighbors/ann_ivf_flat.cuh | 56 + cpp/test/neighbors/brute_force.cu | 231 +++- cpp/test/neighbors/brute_force_prefiltered.cu | 202 ++- cpp/test/neighbors/naive_knn.cuh | 14 +- cpp/test/stats/silhouette_score.cu | 235 ++++ cpp/test/stats/trustworthiness.cu | 356 ++++++ dependencies.yaml | 143 ++- docs/source/c_api/neighbors_cagra_c.rst | 6 + docs/source/c_api/neighbors_hnsw_c.rst | 43 + docs/source/cpp_api.rst | 1 + docs/source/cpp_api/neighbors_cagra.rst | 13 +- docs/source/cpp_api/neighbors_hnsw.rst | 52 + docs/source/cpp_api/stats.rst | 35 + examples/c/CMakeLists.txt | 4 + examples/c/src/L2_c_example.c | 123 ++ .../VectorSearch_QuestionRetrieval.ipynb | 2 +- notebooks/ivf_flat_example.ipynb | 26 +- notebooks/tutorial_ivf_pq.ipynb | 44 +- pyproject.toml | 2 +- python/cuvs/CMakeLists.txt | 21 + .../cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx | 4 +- python/cuvs/cuvs/test/test_ivf_flat.py | 3 +- python/cuvs/pyproject.toml | 15 +- python/cuvs_bench/LICENSE | 202 +-- python/cuvs_bench/cuvs_bench/VERSION | 2 +- python/cuvs_bench/cuvs_bench/run/__main__.py | 9 +- python/cuvs_bench/cuvs_bench/run/run.py | 199 +-- python/cuvs_bench/pyproject.toml | 27 +- 185 files changed, 7457 insertions(+), 3824 deletions(-) create mode 100644 cpp/bench/ann/src/cuvs/cuvs_brute_force_knn.cu create mode 100644 cpp/include/cuvs/neighbors/hnsw.h create mode 100644 cpp/include/cuvs/stats/silhouette_score.hpp create mode 100644 cpp/include/cuvs/stats/trustworthiness_score.hpp create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_half_float_float_int.cu create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_half_float_float_int.cu create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_half_float_float_int.cu create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_half_float_float_int.cu create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_half_float_float_int.cu create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_half_float_float_int.cu create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_half_float_float_int.cu create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_l1_half_float_float_int.cu create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_half_float_float_int.cu create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_half_float_float_int.cu create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_half_float_float_int.cu create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_half_float_float_int.cu create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_half_float_float_int.cu create mode 100644 cpp/src/neighbors/hnsw_c.cpp delete mode 100644 cpp/src/neighbors/ivf_flat/ivf_flat_build_float_int64_t.cu delete mode 100644 cpp/src/neighbors/ivf_flat/ivf_flat_build_int8_t_int64_t.cu delete mode 100644 cpp/src/neighbors/ivf_flat/ivf_flat_build_uint8_t_int64_t.cu delete mode 100644 cpp/src/neighbors/ivf_flat/ivf_flat_extend_float_int64_t.cu delete mode 100644 cpp/src/neighbors/ivf_flat/ivf_flat_extend_int8_t_int64_t.cu delete mode 100644 cpp/src/neighbors/ivf_flat/ivf_flat_extend_uint8_t_int64_t.cu mode change 100644 => 100755 cpp/src/neighbors/ivf_flat_c.cpp mode change 100644 => 100755 cpp/src/neighbors/ivf_pq_c.cpp create mode 100644 cpp/src/stats/detail/batched/silhouette_score.cuh create mode 100644 cpp/src/stats/detail/silhouette_score.cuh create mode 100644 cpp/src/stats/detail/trustworthiness_score.cuh create mode 100644 cpp/src/stats/silhouette_score.cu create mode 100644 cpp/src/stats/trustworthiness_score.cu create mode 100644 cpp/test/neighbors/ann_brute_force.cuh create mode 100644 cpp/test/neighbors/ann_brute_force/test_float.cu create mode 100644 cpp/test/neighbors/ann_brute_force/test_half.cu create mode 100644 cpp/test/neighbors/ann_hnsw_c.cu create mode 100644 cpp/test/stats/silhouette_score.cu create mode 100644 cpp/test/stats/trustworthiness.cu create mode 100644 docs/source/c_api/neighbors_hnsw_c.rst create mode 100644 docs/source/cpp_api/neighbors_hnsw.rst create mode 100644 docs/source/cpp_api/stats.rst create mode 100644 examples/c/src/L2_c_example.c mode change 100644 => 120000 python/cuvs_bench/LICENSE mode change 100644 => 120000 python/cuvs_bench/cuvs_bench/VERSION diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index db20bdbc12..b73a8eea98 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: rust-build: needs: cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -50,7 +50,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -59,7 +59,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@python-3.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -70,7 +70,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.12 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -82,7 +82,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cuvs: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -92,7 +92,7 @@ jobs: wheel-publish-cuvs: needs: wheel-build-cuvs secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 8ea2fa503c..d34f74062f 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -24,49 +24,49 @@ jobs: - wheel-tests-cuvs - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@python-3.12 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@python-3.12 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.12 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@python-3.12 with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@python-3.12 with: build_type: pull-request enable_check_symbols: true - symbol_exclusions: (void (thrust::|cub::)|_ZN\d+raft_cutlass) + symbol_exclusions: (void (thrust::|cub::)|raft_cutlass) conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.12 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@python-3.12 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -76,7 +76,7 @@ jobs: rust-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -86,20 +86,20 @@ jobs: wheel-build-cuvs: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.12 with: build_type: pull-request script: ci/build_wheel_cuvs.sh wheel-tests-cuvs: needs: wheel-build-cuvs secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.12 with: build_type: pull-request script: ci/test_wheel_cuvs.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@python-3.12 with: arch: '["amd64"]' cuda: '["12.5"]' diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index c0d07297b8..f2daecbec6 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,17 +16,17 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@python-3.12 with: build_type: nightly branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} enable_check_symbols: true - symbol_exclusions: (void (thrust::|cub::)|_ZN\d+raft_cutlass) + symbol_exclusions: (void (thrust::|cub::)|raft_cutlass) conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@python-3.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -34,7 +34,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@python-3.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-cuvs: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.12 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/.gitignore b/.gitignore index 4b6f46320f..fcbe0fa3a2 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,11 @@ bench/ann/data temporary_*.json rust/target/ rust/Cargo.lock +rmm_log.txt + +## example notebooks +notebooks/simplewiki-2020-11-01-nq-distilbert-base-v1.pt +notebooks/data/ ## scikit-build _skbuild diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 592693adb6..3e3623f24f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -97,7 +97,7 @@ repos: hooks: - id: check-json - repo: https://github.com/rapidsai/pre-commit-hooks - rev: v0.3.1 + rev: v0.4.0 hooks: - id: verify-copyright files: | diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 5d48bab220..d1030276f9 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -16,10 +16,26 @@ rapids-generate-version > ./VERSION cd "${package_dir}" +case "${RAPIDS_CUDA_VERSION}" in + 12.*) + EXCLUDE_ARGS=( + --exclude "libcublas.so.12" + --exclude "libcublasLt.so.12" + --exclude "libcurand.so.10" + --exclude "libcusolver.so.11" + --exclude "libcusparse.so.12" + --exclude "libnvJitLink.so.12" + ) + ;; + 11.*) + EXCLUDE_ARGS=() + ;; +esac + # Hardcode the output dir python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check mkdir -p final_dist -python -m auditwheel repair -w final_dist dist/* +python -m auditwheel repair -w final_dist "${EXCLUDE_ARGS[@]}" dist/* RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist diff --git a/ci/build_wheel_cuvs.sh b/ci/build_wheel_cuvs.sh index 0fe28c2f7f..e03da9f19d 100755 --- a/ci/build_wheel_cuvs.sh +++ b/ci/build_wheel_cuvs.sh @@ -3,7 +3,16 @@ set -euo pipefail +case "${RAPIDS_CUDA_VERSION}" in + 12.*) + EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON" + ;; + 11.*) + EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF" + ;; +esac + # Set up skbuild options. Enable sccache in skbuild config options -export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF -DFIND_CUVS_CPP=OFF" +export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUVS_CPP=OFF${EXTRA_CMAKE_ARGS}" ci/build_wheel.sh cuvs python/cuvs diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 6d7d022c2d..feb0a400ce 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -45,6 +45,7 @@ DEPENDENCIES=( dask-cuda cuvs pylibraft + librmm rmm rapids-dask-dependency ) diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 54866d0e53..cfcb562258 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -35,10 +35,11 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 +- librmm==24.10.*,>=0.0.0a0 - make - nccl>=2.9.9 - ninja -- numpy>=1.23,<2.0a0 +- numpy>=1.23,<3.0a0 - numpydoc - nvcc_linux-aarch64=11.8 - openblas @@ -49,7 +50,6 @@ dependencies: - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 - recommonmark -- rmm==24.10.*,>=0.0.0a0 - rust - scikit-build-core>=0.10.0 - scikit-learn diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index b29fe1b50e..dc519d1b58 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -35,10 +35,11 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 +- librmm==24.10.*,>=0.0.0a0 - make - nccl>=2.9.9 - ninja -- numpy>=1.23,<2.0a0 +- numpy>=1.23,<3.0a0 - numpydoc - nvcc_linux-64=11.8 - openblas @@ -49,7 +50,6 @@ dependencies: - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 - recommonmark -- rmm==24.10.*,>=0.0.0a0 - rust - scikit-build-core>=0.10.0 - scikit-learn diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index c6deb93f8b..b32650e449 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -32,10 +32,11 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev +- librmm==24.10.*,>=0.0.0a0 - make - nccl>=2.9.9 - ninja -- numpy>=1.23,<2.0a0 +- numpy>=1.23,<3.0a0 - numpydoc - openblas - pre-commit @@ -45,7 +46,6 @@ dependencies: - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 - recommonmark -- rmm==24.10.*,>=0.0.0a0 - rust - scikit-build-core>=0.10.0 - scikit-learn diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 538fdf08be..d40fc3b99b 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -32,10 +32,11 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev +- librmm==24.10.*,>=0.0.0a0 - make - nccl>=2.9.9 - ninja -- numpy>=1.23,<2.0a0 +- numpy>=1.23,<3.0a0 - numpydoc - openblas - pre-commit @@ -45,7 +46,6 @@ dependencies: - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 - recommonmark -- rmm==24.10.*,>=0.0.0a0 - rust - scikit-build-core>=0.10.0 - scikit-learn diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index eee696308f..c6e8b05a2c 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -24,7 +24,7 @@ dependencies: - gcc_linux-aarch64=11.* - glog>=0.6.0 - h5py>=3.8.0 -- hnswlib=0.7.0 +- hnswlib=0.6.2 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 @@ -33,6 +33,7 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 +- librmm==24.10.*,>=0.0.0a0 - matplotlib - nccl>=2.9.9 - ninja @@ -42,6 +43,5 @@ dependencies: - pandas - pylibraft==24.10.*,>=0.0.0a0 - pyyaml -- rmm==24.10.*,>=0.0.0a0 - sysroot_linux-aarch64==2.17 name: bench_ann_cuda-118_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 92a246d189..d6c023ae97 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -24,7 +24,7 @@ dependencies: - gcc_linux-64=11.* - glog>=0.6.0 - h5py>=3.8.0 -- hnswlib=0.7.0 +- hnswlib=0.6.2 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 @@ -33,6 +33,7 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 +- librmm==24.10.*,>=0.0.0a0 - matplotlib - nccl>=2.9.9 - ninja @@ -42,6 +43,5 @@ dependencies: - pandas - pylibraft==24.10.*,>=0.0.0a0 - pyyaml -- rmm==24.10.*,>=0.0.0a0 - sysroot_linux-64==2.17 name: bench_ann_cuda-118_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml index 77db9fc093..4d0ca94966 100644 --- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -25,11 +25,12 @@ dependencies: - gcc_linux-aarch64=11.* - glog>=0.6.0 - h5py>=3.8.0 -- hnswlib=0.7.0 +- hnswlib=0.6.2 - libcublas-dev - libcurand-dev - libcusolver-dev - libcusparse-dev +- librmm==24.10.*,>=0.0.0a0 - matplotlib - nccl>=2.9.9 - ninja @@ -38,6 +39,5 @@ dependencies: - pandas - pylibraft==24.10.*,>=0.0.0a0 - pyyaml -- rmm==24.10.*,>=0.0.0a0 - sysroot_linux-aarch64==2.17 name: bench_ann_cuda-125_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml index 7379c2ca11..7dd67ab5e0 100644 --- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -25,11 +25,12 @@ dependencies: - gcc_linux-64=11.* - glog>=0.6.0 - h5py>=3.8.0 -- hnswlib=0.7.0 +- hnswlib=0.6.2 - libcublas-dev - libcurand-dev - libcusolver-dev - libcusparse-dev +- librmm==24.10.*,>=0.0.0a0 - matplotlib - nccl>=2.9.9 - ninja @@ -38,6 +39,5 @@ dependencies: - pandas - pylibraft==24.10.*,>=0.0.0a0 - pyyaml -- rmm==24.10.*,>=0.0.0a0 - sysroot_linux-64==2.17 name: bench_ann_cuda-125_arch-x86_64 diff --git a/conda/recipes/cuvs/meta.yaml b/conda/recipes/cuvs/meta.yaml index df6cfeec68..e7e2daf0c6 100644 --- a/conda/recipes/cuvs/meta.yaml +++ b/conda/recipes/cuvs/meta.yaml @@ -1,7 +1,7 @@ # Copyright (c) 2022-2024, NVIDIA CORPORATION. # Usage: -# conda build . -c conda-forge -c numba -c rapidsai -c pytorch +# conda build . -c rapidsai -c conda-forge -c nvidia {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set py_version = environ['CONDA_PY'] %} @@ -54,10 +54,8 @@ requirements: - pylibraft {{ minor_version }} - libcuvs {{ version }} - python x.x - - rmm ={{ minor_version }} - rapids-build-backend>=0.3.0,<0.4.0.dev0 - scikit-build-core >=0.10.0 - - setuptools run: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} {% if cuda_major == "11" %} @@ -68,7 +66,8 @@ requirements: - pylibraft {{ minor_version }} - libcuvs {{ version }} - python x.x - - rmm ={{ minor_version }} + - cuda-python + - numpy >=1.23,<3.0a0 tests: requirements: @@ -79,5 +78,4 @@ tests: about: home: https://rapids.ai/ license: Apache-2.0 - # license_file: LICENSE summary: cuvs python library diff --git a/conda/recipes/libcuvs/meta.yaml b/conda/recipes/libcuvs/meta.yaml index 4ffdc91e40..e154ccf410 100644 --- a/conda/recipes/libcuvs/meta.yaml +++ b/conda/recipes/libcuvs/meta.yaml @@ -1,7 +1,7 @@ # Copyright (c) 2022-2024, NVIDIA CORPORATION. # Usage: -# conda build . -c conda-forge -c nvidia -c rapidsai +# conda build . -c rapidsai -c conda-forge -c nvidia {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %} @@ -63,7 +63,8 @@ outputs: - ninja - {{ stdlib("c") }} host: - - libraft ={{ minor_version }} + - librmm ={{ minor_version }} + - libraft-headers ={{ minor_version }} - cuda-version ={{ cuda_version }} {% if cuda_major == "11" %} - cuda-profiler-api {{ cuda11_cuda_profiler_api_host_version }} @@ -84,7 +85,6 @@ outputs: - libcusparse-dev {% endif %} run: - - libraft ={{ minor_version }} - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} {% if cuda_major != "11" %} - cuda-cudart @@ -129,7 +129,8 @@ outputs: - ninja - {{ stdlib("c") }} host: - - libraft ={{ minor_version }} + - librmm ={{ minor_version }} + - libraft-headers ={{ minor_version }} - cuda-version ={{ cuda_version }} {% if cuda_major == "11" %} - cuda-profiler-api {{ cuda11_cuda_profiler_api_host_version }} @@ -150,7 +151,6 @@ outputs: - libcusparse-dev {% endif %} run: - - libraft ={{ minor_version }} - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} {% if cuda_major != "11" %} - cuda-cudart @@ -195,7 +195,8 @@ outputs: - ninja - {{ stdlib("c") }} host: - - libraft ={{ minor_version }} + - librmm ={{ minor_version }} + - libraft-headers ={{ minor_version }} - {{ pin_subpackage('libcuvs', exact=True) }} - cuda-version ={{ cuda_version }} - openblas # required by some CPU algos in benchmarks @@ -228,7 +229,6 @@ outputs: - libcusolver - libcusparse {% endif %} - - libraft ={{ minor_version }} - {{ pin_subpackage('libcuvs', exact=True) }} about: home: https://rapids.ai/ @@ -248,6 +248,9 @@ outputs: - {{ compiler('cuda') }} - cuda-cudart-dev - libcublas-dev + - libcurand-dev + - libcusolver-dev + - libcusparse-dev {% endif %} requirements: build: @@ -263,17 +266,27 @@ outputs: - ninja - {{ stdlib("c") }} host: - - libraft ={{ minor_version }} + - librmm ={{ minor_version }} + - libraft-headers ={{ minor_version }} - {{ pin_subpackage('libcuvs', exact=True) }} - cuda-version ={{ cuda_version }} {% if cuda_major == "11" %} - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }} - libcublas {{ cuda11_libcublas_host_version }} - libcublas-dev {{ cuda11_libcublas_host_version }} + - libcurand {{ cuda11_libcurand_host_version }} + - libcurand-dev {{ cuda11_libcurand_host_version }} + - libcusolver {{ cuda11_libcusolver_host_version }} + - libcusolver-dev {{ cuda11_libcusolver_host_version }} + - libcusparse {{ cuda11_libcusparse_host_version }} + - libcusparse-dev {{ cuda11_libcusparse_host_version }} {% else %} - cuda-cudart-dev - cuda-profiler-api - libcublas-dev + - libcurand-dev + - libcusolver-dev + - libcusparse-dev {% endif %} run: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} @@ -282,8 +295,10 @@ outputs: {% else %} - cuda-cudart - libcublas + - libcurand + - libcusolver + - libcusparse {% endif %} - - libraft ={{ minor_version }} - {{ pin_subpackage('libcuvs', exact=True) }} about: home: https://rapids.ai/ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 02c2efa0bf..e9439d60a4 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -53,8 +53,8 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) option(BUILD_SHARED_LIBS "Build cuvs shared libraries" ON) option(BUILD_TESTS "Build cuvs unit-tests" ON) -option(BUILD_C_LIBRARY "Build raft C API library" OFF) -option(BUILD_C_TESTS "Build raft C API tests" OFF) +option(BUILD_C_LIBRARY "Build cuVS C API library" OFF) +option(BUILD_C_TESTS "Build cuVS C API tests" OFF) option(BUILD_ANN_BENCH "Build cuVS ann benchmarks" ON) option(BUILD_CAGRA_HNSWLIB "Build CAGRA+hnswlib interface" ON) option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF) @@ -216,30 +216,43 @@ add_library( src/cluster/kmeans_transform_float.cu src/cluster/single_linkage_float.cu src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_canberra_half_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_correlation_half_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_cosine_half_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_half_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_half_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_half_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_kl_divergence_half_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_l1_half_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_l2_expanded_half_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_half_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_l_inf_half_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_half_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_russel_rao_half_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu src/distance/detail/pairwise_matrix/dispatch_rbf.cu src/distance/detail/fused_distance_nn.cu @@ -411,8 +424,12 @@ add_library( src/selection/select_k_float_int64_t.cu src/selection/select_k_float_uint32_t.cu src/selection/select_k_half_uint32_t.cu + src/stats/silhouette_score.cu + src/stats/trustworthiness_score.cu ) +target_compile_definitions(cuvs PRIVATE "CUVS_EXPLICIT_INSTANTIATE_ONLY") + target_compile_options( cuvs INTERFACE $<$:--expt-extended-lambda --expt-relaxed-constexpr> @@ -534,6 +551,7 @@ if(BUILD_C_LIBRARY) src/neighbors/ivf_flat_c.cpp src/neighbors/ivf_pq_c.cpp src/neighbors/cagra_c.cpp + src/neighbors/hnsw_c.cpp src/neighbors/refine/refine_c.cpp src/distance/pairwise_distance_c.cpp ) diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 80c1f35306..6fe23483e0 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -35,6 +35,7 @@ option(CUVS_ANN_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" OFF) option(CUVS_ANN_BENCH_SINGLE_EXE "Make a single executable with benchmark as shared library modules" OFF ) +option(CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE "Include cuVS brute force knn in benchmark" ON) # ################################################################################################## # * Process options ---------------------------------------------------------- @@ -53,6 +54,9 @@ if(BUILD_CPU_ONLY) set(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE OFF) set(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB OFF) set(CUVS_ANN_BENCH_USE_GGNN OFF) + set(CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE OFF) +else() + set(CUVS_FAISS_ENABLE_GPU ON) endif() set(CUVS_ANN_BENCH_USE_CUVS OFF) @@ -61,6 +65,7 @@ if(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ OR CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT OR CUVS_ANN_BENCH_USE_CUVS_CAGRA OR CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB + OR CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE ) set(CUVS_ANN_BENCH_USE_CUVS ON) endif() @@ -169,6 +174,8 @@ function(ConfigureAnnBench) ) endif() + target_compile_definitions(${BENCH_NAME} PRIVATE "CUVS_EXPLICIT_INSTANTIATE_ONLY") + target_include_directories( ${BENCH_NAME} PUBLIC "$" @@ -223,6 +230,17 @@ if(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE) ConfigureAnnBench(NAME CUVS_BRUTE_FORCE PATH src/cuvs/cuvs_benchmark.cu LINKS cuvs) endif() +if(CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE) + ConfigureAnnBench( + NAME + CUVS_KNN_BRUTE_FORCE + PATH + $<$:src/cuvs/cuvs_brute_force_knn.cu> + LINKS + cuvs + ) +endif() + if(CUVS_ANN_BENCH_USE_CUVS_CAGRA) ConfigureAnnBench( NAME diff --git a/cpp/bench/ann/src/cuvs/cuvs_brute_force_knn.cu b/cpp/bench/ann/src/cuvs/cuvs_brute_force_knn.cu new file mode 100644 index 0000000000..4c38b3420d --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_brute_force_knn.cu @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace cuvs::neighbors::brute_force { + +struct print_metric { + cuvs::distance::DistanceType value; +}; + +struct RandomKNNInputs { + int num_queries; + int num_db_vecs; + int dim; + int k; + cuvs::distance::DistanceType metric; + bool row_major; +}; + +inline auto operator<<(std::ostream& os, const print_metric& p) -> std::ostream& +{ + switch (p.value) { + case cuvs::distance::DistanceType::L2Expanded: os << "L2Expanded"; break; + case cuvs::distance::DistanceType::L2SqrtExpanded: os << "L2SqrtExpanded"; break; + case cuvs::distance::DistanceType::CosineExpanded: os << "CosineExpanded"; break; + case cuvs::distance::DistanceType::L1: os << "L1"; break; + case cuvs::distance::DistanceType::L2Unexpanded: os << "L2Unexpanded"; break; + case cuvs::distance::DistanceType::L2SqrtUnexpanded: os << "L2SqrtUnexpanded"; break; + case cuvs::distance::DistanceType::InnerProduct: os << "InnerProduct"; break; + case cuvs::distance::DistanceType::Linf: os << "Linf"; break; + case cuvs::distance::DistanceType::Canberra: os << "Canberra"; break; + case cuvs::distance::DistanceType::LpUnexpanded: os << "LpUnexpanded"; break; + case cuvs::distance::DistanceType::CorrelationExpanded: os << "CorrelationExpanded"; break; + case cuvs::distance::DistanceType::JaccardExpanded: os << "JaccardExpanded"; break; + case cuvs::distance::DistanceType::HellingerExpanded: os << "HellingerExpanded"; break; + case cuvs::distance::DistanceType::Haversine: os << "Haversine"; break; + case cuvs::distance::DistanceType::BrayCurtis: os << "BrayCurtis"; break; + case cuvs::distance::DistanceType::JensenShannon: os << "JensenShannon"; break; + case cuvs::distance::DistanceType::HammingUnexpanded: os << "HammingUnexpanded"; break; + case cuvs::distance::DistanceType::KLDivergence: os << "KLDivergence"; break; + case cuvs::distance::DistanceType::RusselRaoExpanded: os << "RusselRaoExpanded"; break; + case cuvs::distance::DistanceType::DiceExpanded: os << "DiceExpanded"; break; + case cuvs::distance::DistanceType::Precomputed: os << "Precomputed"; break; + default: RAFT_FAIL("unreachable code"); + } + return os; +} + +std::ostream& operator<<(std::ostream& os, const RandomKNNInputs& input) +{ + return os << "num_queries:" << input.num_queries << " num_vecs:" << input.num_db_vecs + << " dim:" << input.dim << " k:" << input.k << " metric:" << print_metric{input.metric} + << " row_major:" << input.row_major; +} + +template +class BruteForceKNNBenchmark { + public: + BruteForceKNNBenchmark(const RandomKNNInputs& params, const std::string& type_str) + : stream_(raft::resource::get_cuda_stream(handle_)), + params_(params), + type_str_(type_str), + database(params_.num_db_vecs * params_.dim, stream_), + search_queries(params_.num_queries * params_.dim, stream_), + cuvs_indices_(params_.num_queries * params_.k, stream_), + cuvs_distances_(params_.num_queries * params_.k, stream_) + { + raft::matrix::fill( + handle_, + raft::make_device_matrix_view(database.data(), params_.num_db_vecs, params_.dim), + T{0.0}); + raft::matrix::fill( + handle_, + raft::make_device_matrix_view(search_queries.data(), params_.num_queries, params_.dim), + T{0.0}); + raft::matrix::fill( + handle_, + raft::make_device_matrix_view(cuvs_distances_.data(), params_.num_queries, params_.k), + DistT{0.0}); + } + + void runBenchmark() + { + DistT metric_arg = 3.0; + rmm::device_uvector workspace(0, stream_); + + std::chrono::duration build_dur; + std::chrono::duration search_dur; + + auto indices = raft::make_device_matrix_view( + cuvs_indices_.data(), params_.num_queries, params_.k); + auto distances = raft::make_device_matrix_view( + cuvs_distances_.data(), params_.num_queries, params_.k); + raft::resource::sync_stream(handle_, stream_); + + if (params_.row_major) { + { + auto idx_warm = + cuvs::neighbors::brute_force::build(handle_, + raft::make_device_matrix_view( + database.data(), params_.num_db_vecs, params_.dim), + params_.metric, + metric_arg); + cuvs::neighbors::brute_force::search( + handle_, + idx_warm, + raft::make_device_matrix_view( + search_queries.data(), params_.num_queries, params_.dim), + indices, + distances, + std::nullopt); + flush_l2_cache(); + raft::resource::sync_stream(handle_, stream_); + } + + auto start = std::chrono::high_resolution_clock::now(); + auto idx = + cuvs::neighbors::brute_force::build(handle_, + raft::make_device_matrix_view( + database.data(), params_.num_db_vecs, params_.dim), + params_.metric, + metric_arg); + raft::resource::sync_stream(handle_, stream_); + auto end = std::chrono::high_resolution_clock::now(); + build_dur = end - start; + + start = std::chrono::high_resolution_clock::now(); + cuvs::neighbors::brute_force::search( + handle_, + idx, + raft::make_device_matrix_view( + search_queries.data(), params_.num_queries, params_.dim), + indices, + distances, + std::nullopt); + raft::resource::sync_stream(handle_, stream_); + end = std::chrono::high_resolution_clock::now(); + search_dur = end - start; + + } else { + { + auto idx_warm = + cuvs::neighbors::brute_force::build(handle_, + raft::make_device_matrix_view( + database.data(), params_.num_db_vecs, params_.dim), + params_.metric, + metric_arg); + cuvs::neighbors::brute_force::search( + handle_, + idx_warm, + raft::make_device_matrix_view( + search_queries.data(), params_.num_queries, params_.dim), + indices, + distances, + std::nullopt); + flush_l2_cache(); + raft::resource::sync_stream(handle_, stream_); + } + + auto start = std::chrono::high_resolution_clock::now(); + auto idx = cuvs::neighbors::brute_force::build( + handle_, + raft::make_device_matrix_view( + database.data(), params_.num_db_vecs, params_.dim), + params_.metric, + metric_arg); + raft::resource::sync_stream(handle_, stream_); + auto end = std::chrono::high_resolution_clock::now(); + build_dur = end - start; + + start = std::chrono::high_resolution_clock::now(); + cuvs::neighbors::brute_force::search( + handle_, + idx, + raft::make_device_matrix_view( + search_queries.data(), params_.num_queries, params_.dim), + indices, + distances, + std::nullopt); + raft::resource::sync_stream(handle_, stream_); + end = std::chrono::high_resolution_clock::now(); + search_dur = end - start; + } + + double total_dur = build_dur.count() + search_dur.count(); + double throughput = static_cast(params_.num_queries) / (total_dur / 1000.0); + ; + printResult(params_, build_dur.count(), search_dur.count(), total_dur, throughput); + } + + void setUp() + { + unsigned long long int seed = 1234ULL; + raft::random::RngState r(seed); + + // JensenShannon distance requires positive values + T min_val = params_.metric == cuvs::distance::DistanceType::JensenShannon ? T(0.0) : T(-1.0); + uniform(handle_, r, database.data(), params_.num_db_vecs * params_.dim, min_val, T(1.0)); + uniform(handle_, r, search_queries.data(), params_.num_queries * params_.dim, min_val, T(1.0)); + } + + private: + void flush_l2_cache() + { + int l2_cache_size = 0; + int device_id = 0; + RAFT_CUDA_TRY(cudaGetDevice(&device_id)); + RAFT_CUDA_TRY(cudaDeviceGetAttribute(&l2_cache_size, cudaDevAttrL2CacheSize, device_id)); + scratch_buf_ = rmm::device_buffer(l2_cache_size * 3, stream_); + RAFT_CUDA_TRY(cudaMemsetAsync(scratch_buf_.data(), 0, scratch_buf_.size(), stream_)); + }; + + void printResult(const RandomKNNInputs& params, + double build_time, + double search_time, + double total_time, + double throughput) + { + std::cout << std::left << std::setw(15) << type_str_ << std::setw(10) << params.num_queries + << std::setw(10) << params.num_db_vecs << std::setw(10) << params.dim << std::setw(10) + << params.k << std::setw(20) << print_metric{params.metric} << std::setw(15) + << (params.row_major ? "row" : "col") << std::right << std::setw(20) << std::fixed + << std::setprecision(3) << build_time << std::right << std::setw(20) << std::fixed + << std::setprecision(3) << search_time << std::right << std::setw(20) << std::fixed + << std::setprecision(3) << total_time << std::right << std::setw(20) << std::fixed + << std::setprecision(3) << throughput << "\n"; + } + raft::resources handle_; + cudaStream_t stream_ = 0; + RandomKNNInputs params_; + rmm::device_uvector database; + rmm::device_uvector search_queries; + rmm::device_uvector cuvs_indices_; + rmm::device_uvector cuvs_distances_; + rmm::device_buffer scratch_buf_; + std::string type_str_; +}; + +static std::vector getInputs() +{ + std::vector param_vec; + struct TestParams { + int num_queries; + int num_db_vecs; + int dim; + int k; + cuvs::distance::DistanceType metric; + bool row_major; + }; + + const std::vector params_group = raft::util::itertools::product( + {int(10), int(100), int(1024)}, + {int(1000000)}, + {int(32), int(256), int(1024)}, + {int(128), int(1024)}, + {cuvs::distance::DistanceType::InnerProduct, cuvs::distance::DistanceType::L2SqrtExpanded}, + {true, false}); + + param_vec.reserve(params_group.size()); + for (TestParams params : params_group) { + param_vec.push_back(RandomKNNInputs({params.num_queries, + params.num_db_vecs, + params.dim, + params.k, + params.metric, + params.row_major})); + } + return param_vec; +} + +void printHeader() +{ + std::cout << std::left << std::setw(15) << "Type" << std::setw(10) << "Queries" << std::setw(10) + << "Vectors" << std::setw(10) << "Dim" << std::setw(10) << "K" << std::setw(20) + << "Metric" << std::setw(15) << "Layout" << std::right << std::setw(20) + << "Build Time (ms)" << std::right << std::setw(20) << "Search Time (ms)" << std::right + << std::setw(20) << "Total Time (ms)" << std::right << std::setw(20) + << "Throughput (q/s)" + << "\n"; + std::cout << std::string(165, '-') << "\n"; +} + +void runBenchmarkForType() +{ + auto selected_inputs = getInputs(); + for (const auto& input : selected_inputs) { + { + BruteForceKNNBenchmark benchmark(input, "float"); + benchmark.setUp(); + benchmark.runBenchmark(); + } + { + BruteForceKNNBenchmark benchmark(input, "half"); + benchmark.setUp(); + benchmark.runBenchmark(); + } + } +} + +} // namespace cuvs::neighbors::brute_force + +int main() +{ + cuvs::neighbors::brute_force::printHeader(); + cuvs::neighbors::brute_force::runBenchmarkForType(); + return 0; +} diff --git a/cpp/include/cuvs/cluster/kmeans.hpp b/cpp/include/cuvs/cluster/kmeans.hpp index c6fe4d0465..75205fa4f1 100644 --- a/cpp/include/cuvs/cluster/kmeans.hpp +++ b/cpp/include/cuvs/cluster/kmeans.hpp @@ -120,6 +120,7 @@ struct params : base_params { * Simple object to specify hyper-parameters to the balanced k-means algorithm. * * The following metrics are currently supported in k-means balanced: + * - CosineExpanded * - InnerProduct * - L2Expanded * - L2SqrtExpanded diff --git a/cpp/include/cuvs/core/c_api.h b/cpp/include/cuvs/core/c_api.h index d931d6c13d..4db7fd12c0 100644 --- a/cpp/include/cuvs/core/c_api.h +++ b/cpp/include/cuvs/core/c_api.h @@ -127,6 +127,27 @@ cuvsError_t cuvsRMMAlloc(cuvsResources_t res, void** ptr, size_t bytes); */ cuvsError_t cuvsRMMFree(cuvsResources_t res, void* ptr, size_t bytes); +/** + * @brief Switches the working memory resource to use the RMM pool memory resource, which will + * bypass unnecessary synchronizations by allocating a chunk of device memory up front and carving + * that up for temporary memory allocations within algorithms. Be aware that this function will + * change the memory resource for the whole process and the new memory resource will be used until + * explicitly changed. + * + * @param[in] initial_pool_size_percent The initial pool size as a percentage of the total + * available memory + * @param[in] max_pool_size_percent The maximum pool size as a percentage of the total + * available memory + * @return cuvsError_t + */ +cuvsError_t cuvsRMMPoolMemoryResourceEnable(int initial_pool_size_percent, + int max_pool_size_percent); +/** + * @brief Resets the memory resource to use the default memory resource (cuda_memory_resource) + * @return cuvsError_t + */ +cuvsError_t cuvsRMMMemoryResourceReset(); + /** @} */ #ifdef __cplusplus diff --git a/cpp/include/cuvs/distance/distance.hpp b/cpp/include/cuvs/distance/distance.hpp index 5786b0a32b..def72641ec 100644 --- a/cpp/include/cuvs/distance/distance.hpp +++ b/cpp/include/cuvs/distance/distance.hpp @@ -19,6 +19,7 @@ #include "distance.h" #include +#include #include #include @@ -156,6 +157,49 @@ void pairwise_distance( raft::device_matrix_view dist, cuvs::distance::DistanceType metric, double metric_arg = 2.0f); +/** + * @brief Compute pairwise distances for two matrices + * + * Note: Only contiguous row- or column-major layouts supported currently. + * + * Usage example: + * @code{.cpp} + * #include + * #include + * #include + * + * raft::resources handle; + * int n_samples = 5000; + * int n_features = 50; + * + * auto input = raft::make_device_matrix(handle, n_samples, n_features); + * + * // ... fill input with data ... + * + * auto output = raft::make_device_matrix(handle, n_samples, n_samples); + * + * auto metric = cuvs::distance::DistanceType::L2SqrtExpanded; + * cuvs::distance::pairwise_distance(handle, + * raft::make_const(input.view()), + * raft::make_const(input.view()), + * output.view(), + * metric); + * @endcode + * + * @param[in] handle raft handle for managing expensive resources + * @param[in] x first set of points (size n*k) + * @param[in] y second set of points (size m*k) + * @param[out] dist output distance matrix (size n*m) + * @param[in] metric distance to evaluate + * @param[in] metric_arg metric argument (used for Minkowski distance) + */ +void pairwise_distance( + raft::resources const& handle, + raft::device_matrix_view const x, + raft::device_matrix_view const y, + raft::device_matrix_view dist, + cuvs::distance::DistanceType metric, + float metric_arg = 2.0f); /** * @brief Compute pairwise distances for two matrices @@ -243,6 +287,49 @@ void pairwise_distance( raft::device_matrix_view dist, cuvs::distance::DistanceType metric, double metric_arg = 2.0f); +/** + * @brief Compute pairwise distances for two matrices + * + * Note: Only contiguous row- or column-major layouts supported currently. + * + * Usage example: + * @code{.cpp} + * #include + * #include + * #include + * + * raft::resources handle; + * int n_samples = 5000; + * int n_features = 50; + * + * auto input = raft::make_device_matrix(handle, n_samples, n_features); + * + * // ... fill input with data ... + * + * auto output = raft::make_device_matrix(handle, n_samples, n_samples); + * + * auto metric = cuvs::distance::DistanceType::L2SqrtExpanded; + * cuvs::distance::pairwise_distance(handle, + * raft::make_const(input.view()), + * raft::make_const(input.view()), + * output.view(), + * metric); + * @endcode + * + * @param[in] handle raft handle for managing expensive resources + * @param[in] x first set of points (size n*k) + * @param[in] y second set of points (size m*k) + * @param[out] dist output distance matrix (size n*m) + * @param[in] metric distance to evaluate + * @param[in] metric_arg metric argument (used for Minkowski distance) + */ +void pairwise_distance( + raft::resources const& handle, + raft::device_matrix_view const x, + raft::device_matrix_view const y, + raft::device_matrix_view dist, + cuvs::distance::DistanceType metric, + float metric_arg = 2.0f); /** @} */ // end group pairwise_distance_runtime diff --git a/cpp/include/cuvs/neighbors/brute_force.hpp b/cpp/include/cuvs/neighbors/brute_force.hpp index db70a7fa65..5408eb1a00 100644 --- a/cpp/include/cuvs/neighbors/brute_force.hpp +++ b/cpp/include/cuvs/neighbors/brute_force.hpp @@ -23,6 +23,8 @@ #include #include +#include + namespace cuvs::neighbors::brute_force { /** @@ -36,7 +38,7 @@ namespace cuvs::neighbors::brute_force { * * @tparam T data element type */ -template +template struct index : cuvs::neighbors::index { public: index(const index&) = delete; @@ -54,9 +56,9 @@ struct index : cuvs::neighbors::index { */ index(raft::resources const& res, raft::host_matrix_view dataset_view, - std::optional>&& norms, + std::optional>&& norms, cuvs::distance::DistanceType metric, - T metric_arg = 0.0); + DistT metric_arg = 0.0); /** Construct a brute force index from dataset * @@ -67,9 +69,9 @@ struct index : cuvs::neighbors::index { */ index(raft::resources const& res, raft::device_matrix_view dataset_view, - std::optional>&& norms, + std::optional>&& norms, cuvs::distance::DistanceType metric, - T metric_arg = 0.0); + DistT metric_arg = 0.0); /** Construct a brute force index from dataset * @@ -78,9 +80,9 @@ struct index : cuvs::neighbors::index { */ index(raft::resources const& res, raft::device_matrix_view dataset_view, - std::optional> norms_view, + std::optional> norms_view, cuvs::distance::DistanceType metric, - T metric_arg = 0.0); + DistT metric_arg = 0.0); /** Construct a brute force index from dataset * @@ -91,9 +93,9 @@ struct index : cuvs::neighbors::index { */ index(raft::resources const& res, raft::device_matrix_view dataset_view, - std::optional>&& norms, + std::optional>&& norms, cuvs::distance::DistanceType metric, - T metric_arg = 0.0); + DistT metric_arg = 0.0); /** Construct a brute force index from dataset * @@ -102,9 +104,9 @@ struct index : cuvs::neighbors::index { */ index(raft::resources const& res, raft::device_matrix_view dataset_view, - std::optional> norms_view, + std::optional> norms_view, cuvs::distance::DistanceType metric, - T metric_arg = 0.0); + DistT metric_arg = 0.0); /** * Replace the dataset with a new dataset. @@ -124,7 +126,7 @@ struct index : cuvs::neighbors::index { cuvs::distance::DistanceType metric() const noexcept { return metric_; } /** Metric argument */ - T metric_arg() const noexcept { return metric_arg_; } + DistT metric_arg() const noexcept { return metric_arg_; } /** Total length of the index (number of vectors). */ size_t size() const noexcept { return dataset_view_.extent(0); } @@ -139,7 +141,7 @@ struct index : cuvs::neighbors::index { } /** Dataset norms */ - raft::device_vector_view norms() const + raft::device_vector_view norms() const { return norms_view_.value(); } @@ -150,10 +152,10 @@ struct index : cuvs::neighbors::index { private: cuvs::distance::DistanceType metric_; raft::device_matrix dataset_; - std::optional> norms_; - std::optional> norms_view_; + std::optional> norms_; + std::optional> norms_view_; raft::device_matrix_view dataset_view_; - T metric_arg_; + DistT metric_arg_; }; /** * @} @@ -183,8 +185,28 @@ struct index : cuvs::neighbors::index { auto build(raft::resources const& handle, raft::device_matrix_view dataset, cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, - float metric_arg = 0) -> cuvs::neighbors::brute_force::index; - + float metric_arg = 0) -> cuvs::neighbors::brute_force::index; +/** + * @brief Build the index from the dataset for efficient search. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // create and fill the index from a [N, D] dataset + * auto index = brute_force::build(handle, dataset, metric); + * @endcode + * + * @param[in] handle + * @param[in] dataset a device pointer to a row-major matrix [n_rows, dim] + * @param[in] metric cuvs::distance::DistanceType + * @param[in] metric_arg metric argument + * + * @return the constructed ivf-flat index + */ +auto build(raft::resources const& handle, + raft::device_matrix_view dataset, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, + float metric_arg = 0) -> cuvs::neighbors::brute_force::index; /** * @brief Build the index from the dataset for efficient search. * @@ -205,7 +227,28 @@ auto build(raft::resources const& handle, auto build(raft::resources const& handle, raft::device_matrix_view dataset, cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, - float metric_arg = 0) -> cuvs::neighbors::brute_force::index; + float metric_arg = 0) -> cuvs::neighbors::brute_force::index; +/** + * @brief Build the index from the dataset for efficient search. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // create and fill the index from a [N, D] dataset + * auto index = brute_force::build(handle, dataset, metric); + * @endcode + * + * @param[in] handle + * @param[in] dataset a device pointer to a col-major matrix [n_rows, dim] + * @param[in] metric cuvs::distance::DistanceType + * @param[in] metric_arg metric argument + * + * @return the constructed bruteforce index + */ +auto build(raft::resources const& handle, + raft::device_matrix_view dataset, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, + float metric_arg = 0) -> cuvs::neighbors::brute_force::index; /** * @} */ @@ -244,12 +287,46 @@ auto build(raft::resources const& handle, * `index->size()` bits to indicate whether queries[0] should compute the distance with dataset. */ void search(raft::resources const& handle, - const cuvs::neighbors::brute_force::index& index, + const cuvs::neighbors::brute_force::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, std::optional> sample_filter); +/** + * @brief Search ANN using the constructed index. + * + * See the [brute_force::build](#brute_force::build) documentation for a usage example. + * + * Note, this function requires a temporary buffer to store intermediate results between cuda kernel + * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can + * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or + * eliminate entirely allocations happening within `search`: + * @code{.cpp} + * ... + * // Use the same allocator across multiple searches to reduce the number of + * // cuda memory allocations + * brute_force::search(handle, index, queries1, out_inds1, out_dists1); + * brute_force::search(handle, index, queries2, out_inds2, out_dists2); + * brute_force::search(handle, index, queries3, out_inds3, out_dists3); + * ... + * @endcode + * + * @param[in] handle + * @param[in] index ivf-flat constructed index + * @param[in] queries a device pointer to a row-major matrix [n_queries, index->dim()] + * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset + * [n_queries, k] + * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k] + * @param[in] sample_filter a optional device bitmap filter function that greenlights samples for a + * given + */ +void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + std::optional> sample_filter); /** * @brief Search ANN using the constructed index. * @@ -265,11 +342,31 @@ void search(raft::resources const& handle, * given query */ void search(raft::resources const& handle, - const cuvs::neighbors::brute_force::index& index, + const cuvs::neighbors::brute_force::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, std::optional> sample_filter); +/** + * @brief Search ANN using the constructed index. + * + * See the [brute_force::build](#brute_force::build) documentation for a usage example. + * + * @param[in] handle + * @param[in] index bruteforce constructed index + * @param[in] queries a device pointer to a col-major matrix [n_queries, index->dim()] + * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset + * [n_queries, k] + * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k] + * @param[in] sample_filter an optional device bitmap filter function that greenlights samples for a + * given query + */ +void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + std::optional> sample_filter); /** * @} */ diff --git a/cpp/include/cuvs/neighbors/cagra.h b/cpp/include/cuvs/neighbors/cagra.h index 87541f7f08..241f5d8b01 100644 --- a/cpp/include/cuvs/neighbors/cagra.h +++ b/cpp/include/cuvs/neighbors/cagra.h @@ -337,7 +337,10 @@ cuvsError_t cuvsCagraBuild(cuvsResources_t res, * It is also important to note that the CAGRA Index must have been built * with the same type of `queries`, such that `index.dtype.code == * queries.dl_tensor.dtype.code` Types for input are: - * 1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * 1. `queries`: + *` a. kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * b. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8` + * c. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8` * 2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32` * 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` * @@ -394,7 +397,7 @@ cuvsError_t cuvsCagraSearch(cuvsResources_t res, * * Experimental, both the API and the serialization format are subject to change. * - * @code{.cpp} + * @code{.c} * #include * * // Create cuvsResources_t @@ -416,6 +419,34 @@ cuvsError_t cuvsCagraSerialize(cuvsResources_t res, cuvsCagraIndex_t index, bool include_dataset); +/** + * Save the CAGRA index to file in hnswlib format. + * NOTE: The saved index can only be read by the hnswlib wrapper in cuVS, + * as the serialization format is not compatible with the original hnswlib. + * + * Experimental, both the API and the serialization format are subject to change. + * + * @code{.c} + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // create an index with `cuvsCagraBuild` + * cuvsCagraSerializeHnswlib(res, "/path/to/index", index); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] filename the file name for saving the index + * @param[in] index CAGRA index + * + */ +cuvsError_t cuvsCagraSerializeToHnswlib(cuvsResources_t res, + const char* filename, + cuvsCagraIndex_t index); + /** * Load index from file. * diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index f74eac7111..5f77eb8a3c 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -1345,6 +1345,8 @@ void deserialize(raft::resources const& handle, /** * Write the CAGRA built index as a base layer HNSW index to an output stream + * NOTE: The saved index can only be read by the hnswlib wrapper in cuVS, + * as the serialization format is not compatible with the original hnswlib. * * Experimental, both the API and the serialization format are subject to change. * @@ -1371,6 +1373,8 @@ void serialize_to_hnswlib(raft::resources const& handle, /** * Save a CAGRA build index in hnswlib base-layer-only serialized format + * NOTE: The saved index can only be read by the hnswlib wrapper in cuVS, + * as the serialization format is not compatible with the original hnswlib. * * Experimental, both the API and the serialization format are subject to change. * @@ -1398,6 +1402,8 @@ void serialize_to_hnswlib(raft::resources const& handle, /** * Write the CAGRA built index as a base layer HNSW index to an output stream + * NOTE: The saved index can only be read by the hnswlib wrapper in cuVS, + * as the serialization format is not compatible with the original hnswlib. * * Experimental, both the API and the serialization format are subject to change. * @@ -1424,6 +1430,8 @@ void serialize_to_hnswlib(raft::resources const& handle, /** * Save a CAGRA build index in hnswlib base-layer-only serialized format + * NOTE: The saved index can only be read by the hnswlib wrapper in cuVS, + * as the serialization format is not compatible with the original hnswlib. * * Experimental, both the API and the serialization format are subject to change. * @@ -1451,6 +1459,8 @@ void serialize_to_hnswlib(raft::resources const& handle, /** * Write the CAGRA built index as a base layer HNSW index to an output stream + * NOTE: The saved index can only be read by the hnswlib wrapper in cuVS, + * as the serialization format is not compatible with the original hnswlib. * * Experimental, both the API and the serialization format are subject to change. * @@ -1477,6 +1487,8 @@ void serialize_to_hnswlib(raft::resources const& handle, /** * Save a CAGRA build index in hnswlib base-layer-only serialized format + * NOTE: The saved index can only be read by the hnswlib wrapper in cuVS, + * as the serialization format is not compatible with the original hnswlib. * * Experimental, both the API and the serialization format are subject to change. * diff --git a/cpp/include/cuvs/neighbors/hnsw.h b/cpp/include/cuvs/neighbors/hnsw.h new file mode 100644 index 0000000000..5e94de60a1 --- /dev/null +++ b/cpp/include/cuvs/neighbors/hnsw.h @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @defgroup hnsw_c_search_params C API for hnswlib wrapper search params + * @{ + */ + +struct cuvsHnswSearchParams { + int32_t ef; + int32_t numThreads; +}; + +typedef struct cuvsHnswSearchParams* cuvsHnswSearchParams_t; + +/** + * @brief Allocate HNSW search params, and populate with default values + * + * @param[in] params cuvsHnswSearchParams_t to allocate + * @return cuvsError_t + */ +cuvsError_t cuvsHnswSearchParamsCreate(cuvsHnswSearchParams_t* params); + +/** + * @brief De-allocate HNSW search params + * + * @param[in] params cuvsHnswSearchParams_t to de-allocate + * @return cuvsError_t + */ +cuvsError_t cuvsHnswSearchParamsDestroy(cuvsHnswSearchParams_t params); + +/** + * @} + */ + +/** + * @defgroup hnsw_c_index C API for hnswlib wrapper index + * @{ + */ + +/** + * @brief Struct to hold address of cuvs::neighbors::Hnsw::index and its active trained dtype + * + */ +typedef struct { + uintptr_t addr; + DLDataType dtype; + +} cuvsHnswIndex; + +typedef cuvsHnswIndex* cuvsHnswIndex_t; + +/** + * @brief Allocate HNSW index + * + * @param[in] index cuvsHnswIndex_t to allocate + * @return HnswError_t + */ +cuvsError_t cuvsHnswIndexCreate(cuvsHnswIndex_t* index); + +/** + * @brief De-allocate HNSW index + * + * @param[in] index cuvsHnswIndex_t to de-allocate + */ +cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index); + +/** + * @} + */ + +/** + * @defgroup hnsw_c_index_search C API for CUDA ANN Graph-based nearest neighbor search + * @{ + */ +/** + * @brief Search a HNSW index with a `DLManagedTensor` which has underlying + * `DLDeviceType` equal to `kDLCPU`, `kDLCUDAHost`, or `kDLCUDAManaged`. + * It is also important to note that the HNSW Index must have been built + * with the same type of `queries`, such that `index.dtype.code == + * queries.dl_tensor.dtype.code` + * Supported types for input are: + * 1. `queries`: `kDLDataType.code == kDLFloat` or `kDLDataType.code == kDLInt` and + * `kDLDataType.bits = 32` + * 2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 64` + * 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS, + * as the format is not compatible with the original hnswlib. + * + * @code {.c} + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // Assume a populated `DLManagedTensor` type here + * DLManagedTensor dataset; + * DLManagedTensor queries; + * DLManagedTensor neighbors; + * + * // Create default search params + * cuvsHnswSearchParams_t params; + * cuvsError_t params_create_status = cuvsHnswSearchParamsCreate(¶ms); + * + * // Search the `index` built using `cuvsHnswBuild` + * cuvsError_t search_status = cuvsHnswSearch(res, params, index, &queries, &neighbors, + * &distances); + * + * // de-allocate `params` and `res` + * cuvsError_t params_destroy_status = cuvsHnswSearchParamsDestroy(params); + * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] params cuvsHnswSearchParams_t used to search Hnsw index + * @param[in] index cuvsHnswIndex which has been returned by `cuvsHnswBuild` + * @param[in] queries DLManagedTensor* queries dataset to search + * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries + * @param[out] distances DLManagedTensor* output `k` distances for queries + */ +cuvsError_t cuvsHnswSearch(cuvsResources_t res, + cuvsHnswSearchParams_t params, + cuvsHnswIndex_t index, + DLManagedTensor* queries, + DLManagedTensor* neighbors, + DLManagedTensor* distances); + +/** + * @} + */ + +/** + * @defgroup hnsw_c_serialize HNSW C-API serialize functions + * @{ + */ + +/** + * Load hnswlib index from file which was serialized from a HNSW index. + * NOTE: The loaded hnswlib index is immutable, and only be read by the + * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib. + * Experimental, both the API and the serialization format are subject to change. + * + * @code{.c} + * #include + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // create an index with `cuvsCagraBuild` + * cuvsCagraSerializeHnswlib(res, "/path/to/index", index); + * + * // Load the serialized CAGRA index from file as an hnswlib index + * // The index should have the same dtype as the one used to build CAGRA the index + * cuvsHnswIndex_t hnsw_index; + * cuvsHnswIndexCreate(&hnsw_index); + * hnsw_index->dtype = index->dtype; + * cuvsCagraDeserialize(res, "/path/to/index", hnsw_index); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] filename the name of the file that stores the index + * @param[in] dim the dimension of the vectors in the index + * @param[in] metric the distance metric used to build the index + * @param[out] index HNSW index loaded disk + */ +cuvsError_t cuvsHnswDeserialize(cuvsResources_t res, + const char* filename, + int dim, + cuvsDistanceType metric, + cuvsHnswIndex_t index); +/** + * @} + */ + +#ifdef __cplusplus +} +#endif diff --git a/cpp/include/cuvs/neighbors/hnsw.hpp b/cpp/include/cuvs/neighbors/hnsw.hpp index 86f3215649..007adef0d4 100644 --- a/cpp/include/cuvs/neighbors/hnsw.hpp +++ b/cpp/include/cuvs/neighbors/hnsw.hpp @@ -34,7 +34,7 @@ namespace cuvs::neighbors::hnsw { /** - * @defgroup hnsw Build CAGRA index and search with hnswlib + * @defgroup hnsw_cpp_search_params Build CAGRA index and search with hnswlib * @{ */ @@ -44,6 +44,13 @@ struct search_params : cuvs::neighbors::search_params { // automatically maximizes parallelism }; +/**@}*/ + +/** + * @defgroup hnsw_cpp_index hnswlib index wrapper + * @{ + */ + template struct index : cuvs::neighbors::index { public: @@ -58,6 +65,8 @@ struct index : cuvs::neighbors::index { */ index(int dim, cuvs::distance::DistanceType metric) : dim_{dim}, metric_{metric} {} + virtual ~index() {} + /** @brief Get underlying index */ @@ -77,11 +86,19 @@ struct index : cuvs::neighbors::index { cuvs::distance::DistanceType metric_; }; +/**@}*/ + +/** + * @defgroup hnsw_cpp_index_load Load CAGRA index as hnswlib index + * @{ + */ + /** - * @brief Construct an hnswlib base-layer-only index from a CAGRA index - * NOTE: 1. This method uses the filesystem to write the CAGRA index in `/tmp/.bin` - * before reading it as an hnswlib index, then deleting the temporary file. - * 2. This function is only offered as a compiled symbol in `libraft.so` + * @brief Construct an immutable hnswlib base-layer-only index from a CAGRA index + * NOTE: This method uses the filesystem to write the CAGRA index in `/tmp/.bin` + * before reading it as an hnswlib index, then deleting the temporary file. The returned index + * is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not + * compatible with the original hnswlib. * * @param[in] res raft resources * @param[in] cagra_index cagra index @@ -103,10 +120,11 @@ std::unique_ptr> from_cagra( raft::resources const& res, const cuvs::neighbors::cagra::index& cagra_index); /** - * @brief Construct an hnswlib base-layer-only index from a CAGRA index - * NOTE: 1. This method uses the filesystem to write the CAGRA index in `/tmp/.bin` - * before reading it as an hnswlib index, then deleting the temporary file. - * 2. This function is only offered as a compiled symbol in `libraft.so` + * @brief Construct an immutable hnswlib base-layer-only index from a CAGRA index + * NOTE: This method uses the filesystem to write the CAGRA index in `/tmp/.bin` + * before reading it as an hnswlib index, then deleting the temporary file. The returned index + * is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not + * compatible with the original hnswlib. * * @param[in] res raft resources * @param[in] cagra_index cagra index @@ -128,10 +146,11 @@ std::unique_ptr> from_cagra( raft::resources const& res, const cuvs::neighbors::cagra::index& cagra_index); /** - * @brief Construct an hnswlib base-layer-only index from a CAGRA index - * NOTE: 1. This method uses the filesystem to write the CAGRA index in `/tmp/.bin` - * before reading it as an hnswlib index, then deleting the temporary file. - * 2. This function is only offered as a compiled symbol in `libraft.so` + * @brief Construct an immutable hnswlib base-layer-only index from a CAGRA index + * NOTE: This method uses the filesystem to write the CAGRA index in `/tmp/.bin` + * before reading it as an hnswlib index, then deleting the temporary file. The returned index + * is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not + * compatible with the original hnswlib. * * @param[in] res raft resources * @param[in] cagra_index cagra index @@ -152,8 +171,17 @@ std::unique_ptr> from_cagra( std::unique_ptr> from_cagra( raft::resources const& res, const cuvs::neighbors::cagra::index& cagra_index); +/**@}*/ + +/** + * @defgroup hnsw_cpp_index_search Search hnswlib index + * @{ + */ + /** * @brief Search hnswlib base-layer-only index constructed from a CAGRA index + * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS, + * as the format is not compatible with the original hnswlib. * * @param[in] res raft resources * @param[in] params configure the search @@ -195,6 +223,8 @@ void search(raft::resources const& res, /** * @brief Search hnswlib base-layer-only index constructed from a CAGRA index + * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS, + * as the format is not compatible with the original hnswlib. * * @param[in] res raft resources * @param[in] params configure the search @@ -236,6 +266,8 @@ void search(raft::resources const& res, /** * @brief Search hnswlib base-layer-only index constructed from a CAGRA index + * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS, + * as the format is not compatible with the original hnswlib. * * @param[in] res raft resources * @param[in] params configure the search @@ -275,8 +307,17 @@ void search(raft::resources const& res, raft::host_matrix_view neighbors, raft::host_matrix_view distances); +/**@}*/ + +/** + * @defgroup hnsw_cpp_index_deserialize Deserialize CAGRA index as hnswlib index + * @{ + */ + /** - * @brief De-serialize a CAGRA index saved to a file as an hnsw index + * @brief De-serialize a CAGRA index saved to a file as an hnswlib index + * NOTE: The loaded hnswlib index is immutable, and only be read by the + * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib. * * @param[in] res raft resources * @param[in] filename path to the file containing the serialized CAGRA index @@ -310,7 +351,9 @@ void deserialize(raft::resources const& res, index** index); /** - * @brief De-serialize a CAGRA index saved to a file as an hnsw index + * @brief De-serialize a CAGRA index saved to a file as an hnswlib index + * NOTE: The loaded hnswlib index is immutable, and only be read by the + * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib. * * @param[in] res raft resources * @param[in] filename path to the file containing the serialized CAGRA index @@ -344,7 +387,9 @@ void deserialize(raft::resources const& res, index** index); /** - * @brief De-serialize a CAGRA index saved to a file as an hnsw index + * @brief De-serialize a CAGRA index saved to a file as an hnswlib index + * NOTE: The loaded hnswlib index is immutable, and only be read by the + * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib. * * @param[in] res raft resources * @param[in] filename path to the file containing the serialized CAGRA index diff --git a/cpp/include/cuvs/neighbors/ivf_flat.hpp b/cpp/include/cuvs/neighbors/ivf_flat.hpp index 918fef5aff..44502f942f 100644 --- a/cpp/include/cuvs/neighbors/ivf_flat.hpp +++ b/cpp/include/cuvs/neighbors/ivf_flat.hpp @@ -304,6 +304,12 @@ struct index : cuvs::neighbors::index { /** * @brief Build the index from the dataset for efficient search. * + * NB: Currently, the following distance metrics are supported: + * - L2Expanded + * - L2Unexpanded + * - InnerProduct + * - CosineExpanded + * * Usage example: * @code{.cpp} * using namespace cuvs::neighbors; @@ -327,6 +333,12 @@ auto build(raft::resources const& handle, /** * @brief Build the index from the dataset for efficient search. * + * NB: Currently, the following distance metrics are supported: + * - L2Expanded + * - L2Unexpanded + * - InnerProduct + * - CosineExpanded + * * Usage example: * @code{.cpp} * using namespace cuvs::neighbors; @@ -351,6 +363,12 @@ void build(raft::resources const& handle, /** * @brief Build the index from the dataset for efficient search. * + * NB: Currently, the following distance metrics are supported: + * - L2Expanded + * - L2Unexpanded + * - InnerProduct + * - CosineExpanded + * * Usage example: * @code{.cpp} * using namespace cuvs::neighbors; @@ -374,6 +392,12 @@ auto build(raft::resources const& handle, /** * @brief Build the index from the dataset for efficient search. * + * NB: Currently, the following distance metrics are supported: + * - L2Expanded + * - L2Unexpanded + * - InnerProduct + * - CosineExpanded + * * Usage example: * @code{.cpp} * using namespace cuvs::neighbors; @@ -398,6 +422,12 @@ void build(raft::resources const& handle, /** * @brief Build the index from the dataset for efficient search. * + * NB: Currently, the following distance metrics are supported: + * - L2Expanded + * - L2Unexpanded + * - InnerProduct + * - CosineExpanded + * * Usage example: * @code{.cpp} * using namespace cuvs::neighbors; @@ -421,6 +451,12 @@ auto build(raft::resources const& handle, /** * @brief Build the index from the dataset for efficient search. * + * NB: Currently, the following distance metrics are supported: + * - L2Expanded + * - L2Unexpanded + * - InnerProduct + * - CosineExpanded + * * Usage example: * @code{.cpp} * using namespace cuvs::neighbors; @@ -445,6 +481,12 @@ void build(raft::resources const& handle, /** * @brief Build the index from the dataset for efficient search. * + * NB: Currently, the following distance metrics are supported: + * - L2Expanded + * - L2Unexpanded + * - InnerProduct + * - CosineExpanded + * * Note, if index_params.add_data_on_build is set to true, the user can set a * stream pool in the input raft::resource with at least one stream to enable kernel and copy * overlapping. @@ -475,6 +517,12 @@ auto build(raft::resources const& handle, /** * @brief Build the index from the dataset for efficient search. * + * NB: Currently, the following distance metrics are supported: + * - L2Expanded + * - L2Unexpanded + * - InnerProduct + * - CosineExpanded + * * Note, if index_params.add_data_on_build is set to true, the user can set a * stream pool in the input raft::resource with at least one stream to enable kernel and copy * overlapping. @@ -506,6 +554,12 @@ void build(raft::resources const& handle, /** * @brief Build the index from the dataset for efficient search. * + * NB: Currently, the following distance metrics are supported: + * - L2Expanded + * - L2Unexpanded + * - InnerProduct + * - CosineExpanded + * * Note, if index_params.add_data_on_build is set to true, the user can set a * stream pool in the input raft::resource with at least one stream to enable kernel and copy * overlapping. @@ -536,6 +590,12 @@ auto build(raft::resources const& handle, /** * @brief Build the index from the dataset for efficient search. * + * NB: Currently, the following distance metrics are supported: + * - L2Expanded + * - L2Unexpanded + * - InnerProduct + * - CosineExpanded + * * Note, if index_params.add_data_on_build is set to true, the user can set a * stream pool in the input raft::resource with at least one stream to enable kernel and copy * overlapping. @@ -567,6 +627,12 @@ void build(raft::resources const& handle, /** * @brief Build the index from the dataset for efficient search. * + * NB: Currently, the following distance metrics are supported: + * - L2Expanded + * - L2Unexpanded + * - InnerProduct + * - CosineExpanded + * * Note, if index_params.add_data_on_build is set to true, the user can set a * stream pool in the input raft::resource with at least one stream to enable kernel and copy * overlapping. @@ -597,6 +663,12 @@ auto build(raft::resources const& handle, /** * @brief Build the index from the dataset for efficient search. * + * NB: Currently, the following distance metrics are supported: + * - L2Expanded + * - L2Unexpanded + * - InnerProduct + * - CosineExpanded + * * Note, if index_params.add_data_on_build is set to true, the user can set a * stream pool in the input raft::resource with at least one stream to enable kernel and copy * overlapping. diff --git a/cpp/include/cuvs/stats/silhouette_score.hpp b/cpp/include/cuvs/stats/silhouette_score.hpp new file mode 100644 index 0000000000..1771cc21cd --- /dev/null +++ b/cpp/include/cuvs/stats/silhouette_score.hpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +namespace cuvs { +namespace stats { + +/** + * @defgroup stats_silhouette_score Silhouette Score + * @{ + */ +/** + * @brief main function that returns the average silhouette score for a given set of data and its + * clusterings + * @param[in] handle: raft handle for managing expensive resources + * @param[in] X_in: input matrix Data in row-major format (nRows x nCols) + * @param[in] labels: the pointer to the array containing labels for every data sample (length: + * nRows) + * @param[out] silhouette_score_per_sample: optional array populated with the silhouette score + * for every sample (length: nRows) + * @param[in] n_unique_labels: number of unique labels in the labels array + * @param[in] metric: Distance metric to use. Euclidean (L2) is used by default + * @return: The silhouette score. + */ +float silhouette_score( + raft::resources const& handle, + raft::device_matrix_view X_in, + raft::device_vector_view labels, + std::optional> silhouette_score_per_sample, + int64_t n_unique_labels, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); + +/** + * @brief function that returns the average silhouette score for a given set of data and its + * clusterings + * @param[in] handle: raft handle for managing expensive resources + * @param[in] X: input matrix Data in row-major format (nRows x nCols) + * @param[in] labels: the pointer to the array containing labels for every data sample (length: + * nRows) + * @param[out] silhouette_score_per_sample: optional array populated with the silhouette score + * for every sample (length: nRows) + * @param[in] n_unique_labels: number of unique labels in the labels array + * @param[in] batch_size: number of samples per batch + * @param[in] metric: the numerical value that maps to the type of distance metric to be used in + * the calculations + * @return: The silhouette score. + */ +float silhouette_score_batched( + raft::resources const& handle, + raft::device_matrix_view X, + raft::device_vector_view labels, + std::optional> silhouette_score_per_sample, + int64_t n_unique_labels, + int64_t batch_size, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); + +/** + * @brief main function that returns the average silhouette score for a given set of data and its + * clusterings + * @param[in] handle: raft handle for managing expensive resources + * @param[in] X_in: input matrix Data in row-major format (nRows x nCols) + * @param[in] labels: the pointer to the array containing labels for every data sample (length: + * nRows) + * @param[out] silhouette_score_per_sample: optional array populated with the silhouette score + * for every sample (length: nRows) + * @param[in] n_unique_labels: number of unique labels in the labels array + * @param[in] metric: the numerical value that maps to the type of distance metric to be used in + * the calculations + * @return: The silhouette score. + */ +double silhouette_score( + raft::resources const& handle, + raft::device_matrix_view X_in, + raft::device_vector_view labels, + std::optional> silhouette_score_per_sample, + int64_t n_unique_labels, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); + +/** + * @brief function that returns the average silhouette score for a given set of data and its + * clusterings + * @param[in] handle: raft handle for managing expensive resources + * @param[in] X: input matrix Data in row-major format (nRows x nCols) + * @param[in] labels: the pointer to the array containing labels for every data sample (length: + * nRows) + * @param[out] silhouette_score_per_sample: optional array populated with the silhouette score + * for every sample (length: nRows) + * @param[in] n_unique_labels: number of unique labels in the labels array + * @param[in] batch_size: number of samples per batch + * @param[in] metric: the numerical value that maps to the type of distance metric to be used in + * the calculations + * @return: The silhouette score. + */ +double silhouette_score_batched( + raft::resources const& handle, + raft::device_matrix_view X, + raft::device_vector_view labels, + std::optional> silhouette_score_per_sample, + int64_t n_unique_labels, + int64_t batch_size, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); + +} // namespace stats +} // namespace cuvs diff --git a/cpp/include/cuvs/stats/trustworthiness_score.hpp b/cpp/include/cuvs/stats/trustworthiness_score.hpp new file mode 100644 index 0000000000..08a26fad51 --- /dev/null +++ b/cpp/include/cuvs/stats/trustworthiness_score.hpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include +#include +#include + +namespace cuvs { +namespace stats { +/** + * @defgroup stats_trustworthiness Trustworthiness + * @{ + */ + +/** + * @brief Compute the trustworthiness score + * @param[in] handle the raft handle + * @param[in] X: Data in original dimension + * @param[in] X_embedded: Data in target dimension (embedding) + * @param[in] n_neighbors Number of neighbors considered by trustworthiness score + * @param[in] metric Distance metric to use. Euclidean (L2) is used by default + * @param[in] batch_size Batch size + * @return Trustworthiness score + * @note The constness of the data in X_embedded is currently casted away and the data is slightly + * modified. + */ +double trustworthiness_score( + raft::resources const& handle, + raft::device_matrix_view X, + raft::device_matrix_view X_embedded, + int n_neighbors, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2SqrtUnexpanded, + int batch_size = 512); + +/** @} */ // end group stats_trustworthiness +} // namespace stats +} // namespace cuvs diff --git a/cpp/src/cluster/detail/kmeans_balanced.cuh b/cpp/src/cluster/detail/kmeans_balanced.cuh index a09b17532b..34bb22e85b 100644 --- a/cpp/src/cluster/detail/kmeans_balanced.cuh +++ b/cpp/src/cluster/detail/kmeans_balanced.cuh @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -141,6 +142,53 @@ inline std::enable_if_t> predict_core( raft::compose_op, raft::key_op>()); break; } + case cuvs::distance::DistanceType::CosineExpanded: { + auto workspace = raft::make_device_mdarray( + handle, mr, raft::make_extents((sizeof(int)) * n_rows)); + + auto minClusterAndDistance = raft::make_device_mdarray, IdxT>( + handle, mr, raft::make_extents(n_rows)); + raft::KeyValuePair initial_value(0, std::numeric_limits::max()); + thrust::fill(raft::resource::get_thrust_policy(handle), + minClusterAndDistance.data_handle(), + minClusterAndDistance.data_handle() + minClusterAndDistance.size(), + initial_value); + + auto centroidsNorm = + raft::make_device_mdarray(handle, mr, raft::make_extents(n_clusters)); + raft::linalg::rowNorm(centroidsNorm.data_handle(), + centers, + dim, + n_clusters, + raft::linalg::L2Norm, + true, + stream, + raft::sqrt_op{}); + + cuvs::distance::fusedDistanceNNMinReduce, IdxT>( + minClusterAndDistance.data_handle(), + dataset, + centers, + dataset_norm, + centroidsNorm.data_handle(), + n_rows, + n_clusters, + dim, + (void*)workspace.data_handle(), + false, + false, + true, + params.metric, + 0.0f, + stream); + // Copy keys to output labels + thrust::transform(raft::resource::get_thrust_policy(handle), + minClusterAndDistance.data_handle(), + minClusterAndDistance.data_handle() + n_rows, + labels, + raft::compose_op, raft::key_op>()); + break; + } case cuvs::distance::DistanceType::InnerProduct: { // TODO: pass buffer rmm::device_uvector distances(n_rows * n_clusters, stream, mr); @@ -320,13 +368,14 @@ void calc_centers_and_sizes(const raft::resources& handle, } /** Computes the L2 norm of the dataset, converting to MathT if necessary */ -template +template void compute_norm(const raft::resources& handle, MathT* dataset_norm, const T* dataset, IdxT dim, IdxT n_rows, MappingOpT mapping_op, + FinOpT norm_fin_op, std::optional mr = std::nullopt) { raft::common::nvtx::range fun_scope("compute_norm"); @@ -347,7 +396,7 @@ void compute_norm(const raft::resources& handle, } raft::linalg::rowNorm( - dataset_norm, dataset_ptr, dim, n_rows, raft::linalg::L2Norm, true, stream); + dataset_norm, dataset_ptr, dim, n_rows, raft::linalg::L2Norm, true, stream, norm_fin_op); } /** @@ -394,7 +443,8 @@ void predict(const raft::resources& handle, std::is_same_v ? 0 : max_minibatch_size * dim, stream, mem_res); bool need_compute_norm = dataset_norm == nullptr && (params.metric == cuvs::distance::DistanceType::L2Expanded || - params.metric == cuvs::distance::DistanceType::L2SqrtExpanded); + params.metric == cuvs::distance::DistanceType::L2SqrtExpanded || + params.metric == cuvs::distance::DistanceType::CosineExpanded); rmm::device_uvector cur_dataset_norm( need_compute_norm ? max_minibatch_size : 0, stream, mem_res); const MathT* dataset_norm_ptr = nullptr; @@ -411,8 +461,24 @@ void predict(const raft::resources& handle, // Compute the norm now if it hasn't been pre-computed. if (need_compute_norm) { - compute_norm( - handle, cur_dataset_norm.data(), cur_dataset_ptr, dim, minibatch_size, mapping_op, mem_res); + if (params.metric == cuvs::distance::DistanceType::CosineExpanded) + compute_norm(handle, + cur_dataset_norm.data(), + cur_dataset_ptr, + dim, + minibatch_size, + mapping_op, + raft::sqrt_op{}, + mr); + else + compute_norm(handle, + cur_dataset_norm.data(), + cur_dataset_ptr, + dim, + minibatch_size, + mapping_op, + raft::identity_op{}, + mr); dataset_norm_ptr = cur_dataset_norm.data(); } else if (dataset_norm != nullptr) { dataset_norm_ptr = dataset_norm + offset; @@ -904,7 +970,8 @@ auto build_fine_clusters(const raft::resources& handle, cub::TransformInputIterator mapping_itr(dataset_mptr, mapping_op); raft::matrix::gather(mapping_itr, dim, n_rows, mc_trainset_ids, k, mc_trainset, stream); if (params.metric == cuvs::distance::DistanceType::L2Expanded || - params.metric == cuvs::distance::DistanceType::L2SqrtExpanded) { + params.metric == cuvs::distance::DistanceType::L2SqrtExpanded || + params.metric == cuvs::distance::DistanceType::CosineExpanded) { thrust::gather(raft::resource::get_thrust_policy(handle), mc_trainset_ids, mc_trainset_ids + k, @@ -963,7 +1030,8 @@ void build_hierarchical(const raft::resources& handle, IdxT n_rows, MathT* cluster_centers, IdxT n_clusters, - MappingOpT mapping_op) + MappingOpT mapping_op, + const MathT* dataset_norm = nullptr) { auto stream = raft::resource::get_cuda_stream(handle); using LabelT = uint32_t; @@ -980,21 +1048,32 @@ void build_hierarchical(const raft::resources& handle, auto [max_minibatch_size, mem_per_row] = calc_minibatch_size(n_clusters, n_rows, dim, params.metric, std::is_same_v); - // Precompute the L2 norm of the dataset if relevant. - const MathT* dataset_norm = nullptr; + // Precompute the L2 norm of the dataset if relevant and not yet computed. rmm::device_uvector dataset_norm_buf(0, stream, device_memory); - if (params.metric == cuvs::distance::DistanceType::L2Expanded || - params.metric == cuvs::distance::DistanceType::L2SqrtExpanded) { + if (dataset_norm == nullptr && (params.metric == cuvs::distance::DistanceType::L2Expanded || + params.metric == cuvs::distance::DistanceType::L2SqrtExpanded || + params.metric == cuvs::distance::DistanceType::CosineExpanded)) { dataset_norm_buf.resize(n_rows, stream); for (IdxT offset = 0; offset < n_rows; offset += max_minibatch_size) { IdxT minibatch_size = std::min(max_minibatch_size, n_rows - offset); - compute_norm(handle, - dataset_norm_buf.data() + offset, - dataset + dim * offset, - dim, - minibatch_size, - mapping_op, - device_memory); + if (params.metric == cuvs::distance::DistanceType::CosineExpanded) + compute_norm(handle, + dataset_norm_buf.data() + offset, + dataset + dim * offset, + dim, + minibatch_size, + mapping_op, + raft::sqrt_op{}, + device_memory); + else + compute_norm(handle, + dataset_norm_buf.data() + offset, + dataset + dim * offset, + dim, + minibatch_size, + mapping_op, + raft::identity_op{}, + device_memory); } dataset_norm = (const MathT*)dataset_norm_buf.data(); } diff --git a/cpp/src/cluster/kmeans_balanced.cuh b/cpp/src/cluster/kmeans_balanced.cuh index 040d17b369..306989891e 100644 --- a/cpp/src/cluster/kmeans_balanced.cuh +++ b/cpp/src/cluster/kmeans_balanced.cuh @@ -71,13 +71,15 @@ namespace cuvs::cluster::kmeans_balanced { * @param[out] centroids The generated centroids [dim = n_clusters x n_features] * @param[in] mapping_op (optional) Functor to convert from the input datatype to the arithmetic * datatype. If DataT == MathT, this must be the identity. + * @param[in] X_norm (optional) Dataset's row norms [dim = n_samples] */ template void fit(const raft::resources& handle, cuvs::cluster::kmeans::balanced_params const& params, raft::device_matrix_view X, raft::device_matrix_view centroids, - MappingOpT mapping_op = raft::identity_op()) + MappingOpT mapping_op = raft::identity_op(), + std::optional> X_norm = std::nullopt) { RAFT_EXPECTS(X.extent(1) == centroids.extent(1), "Number of features in dataset and centroids are different"); @@ -88,14 +90,16 @@ void fit(const raft::resources& handle, "The number of centroids must be strictly positive and cannot exceed the number of " "points in the training dataset."); - cuvs::cluster::kmeans::detail::build_hierarchical(handle, - params, - X.extent(1), - X.data_handle(), - X.extent(0), - centroids.data_handle(), - centroids.extent(0), - mapping_op); + cuvs::cluster::kmeans::detail::build_hierarchical( + handle, + params, + X.extent(1), + X.data_handle(), + X.extent(0), + centroids.data_handle(), + centroids.extent(0), + mapping_op, + X_norm.has_value() ? X_norm.value().data_handle() : nullptr); } /** @@ -125,6 +129,7 @@ void fit(const raft::resources& handle, * @param[out] labels The output labels [dim = n_samples] * @param[in] mapping_op (optional) Functor to convert from the input datatype to the arithmetic * datatype. If DataT == MathT, this must be the identity. + * @param[in] X_norm (optional) Dataset's row norms [dim = n_samples] */ template X, raft::device_matrix_view centroids, raft::device_vector_view labels, - MappingOpT mapping_op = raft::identity_op()) + MappingOpT mapping_op = raft::identity_op(), + std::optional> X_norm = std::nullopt) { RAFT_EXPECTS(X.extent(0) == labels.extent(0), "Number of rows in dataset and labels are different"); @@ -149,15 +155,18 @@ void predict(const raft::resources& handle, static_cast(std::numeric_limits::max()), "The chosen label type cannot represent all cluster labels"); - cuvs::cluster::kmeans::detail::predict(handle, - params, - centroids.data_handle(), - centroids.extent(0), - X.extent(1), - X.data_handle(), - X.extent(0), - labels.data_handle(), - mapping_op); + cuvs::cluster::kmeans::detail::predict( + handle, + params, + centroids.data_handle(), + centroids.extent(0), + X.extent(1), + X.data_handle(), + X.extent(0), + labels.data_handle(), + mapping_op, + raft::resource::get_workspace_resource(handle), + X_norm.has_value() ? X_norm.value().data_handle() : nullptr); } /** diff --git a/cpp/src/core/c_api.cpp b/cpp/src/core/c_api.cpp index 96504a2fee..a75e5a1ddf 100644 --- a/cpp/src/core/c_api.cpp +++ b/cpp/src/core/c_api.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include extern "C" cuvsError_t cuvsResourcesCreate(cuvsResources_t* res) @@ -82,6 +83,34 @@ extern "C" cuvsError_t cuvsRMMFree(cuvsResources_t res, void* ptr, size_t bytes) }); } +thread_local std::unique_ptr> pool_mr; + +extern "C" cuvsError_t cuvsRMMPoolMemoryResourceEnable(int initial_pool_size_percent, + int max_pool_size_percent) +{ + return cuvs::core::translate_exceptions([=] { + // Upstream memory resource needs to be a cuda_memory_resource + auto cuda_mr = rmm::mr::get_current_device_resource(); + auto* cuda_mr_casted = dynamic_cast(cuda_mr); + if (cuda_mr_casted == nullptr) { + throw std::runtime_error("Current memory resource is not a cuda_memory_resource"); + } + auto initial_size = rmm::percent_of_free_device_memory(initial_pool_size_percent); + auto max_size = rmm::percent_of_free_device_memory(max_pool_size_percent); + pool_mr = std::make_unique>( + cuda_mr_casted, initial_size, max_size); + rmm::mr::set_current_device_resource(pool_mr.get()); + }); +} + +extern "C" cuvsError_t cuvsRMMMemoryResourceReset() +{ + return cuvs::core::translate_exceptions([=] { + rmm::mr::set_current_device_resource(nullptr); + pool_mr.reset(); + }); +} + thread_local std::string last_error_text = ""; extern "C" const char* cuvsGetLastErrorText() diff --git a/cpp/src/distance/detail/distance.cuh b/cpp/src/distance/detail/distance.cuh index c8dde4ea4f..d6fd046469 100644 --- a/cpp/src/distance/detail/distance.cuh +++ b/cpp/src/distance/detail/distance.cuh @@ -27,6 +27,7 @@ #include #include #include +#include // to_float #include @@ -104,8 +105,8 @@ void distance_impl(raft::resources const& handle, { ops::canberra_distance_op distance_op{}; - const DataT* x_norm = nullptr; - const DataT* y_norm = nullptr; + const OutT* x_norm = nullptr; + const OutT* y_norm = nullptr; cudaStream_t stream = raft::resource::get_cuda_stream(handle); pairwise_matrix_dispatch( @@ -217,8 +218,8 @@ void distance_impl(raft::resources const& handle, cudaStream_t stream = raft::resource::get_cuda_stream(handle); - DataT* x_norm = workspace; - DataT* y_norm = workspace; + OutT* x_norm = reinterpret_cast(workspace); + OutT* y_norm = reinterpret_cast(workspace); // TODO: Column major case looks to have lower accuracy for X == Y, // perhaps the use of stridedSummationKernel could be causing this, // need to investigate and fix. @@ -255,8 +256,8 @@ void distance_impl(raft::resources const& handle, { ops::hamming_distance_op distance_op{k}; - const DataT* x_norm = nullptr; - const DataT* y_norm = nullptr; + const OutT* x_norm = nullptr; + const OutT* y_norm = nullptr; cudaStream_t stream = raft::resource::get_cuda_stream(handle); @@ -319,8 +320,8 @@ void distance_impl(raft::resources const& handle, // Then calculate Hellinger distance ops::hellinger_distance_op distance_op{}; - const DataT* x_norm = nullptr; - const DataT* y_norm = nullptr; + const OutT* x_norm = nullptr; + const OutT* y_norm = nullptr; pairwise_matrix_dispatch( distance_op, m, n, k, x, y, x_norm, y_norm, out, fin_op, stream, is_row_major); @@ -349,8 +350,8 @@ void distance_impl(raft::resources const& handle, { ops::jensen_shannon_distance_op distance_op{}; - const DataT* x_norm = nullptr; - const DataT* y_norm = nullptr; + const OutT* x_norm = nullptr; + const OutT* y_norm = nullptr; cudaStream_t stream = raft::resource::get_cuda_stream(handle); @@ -376,14 +377,24 @@ void distance_impl(raft::resources const& handle, cudaStream_t stream = raft::resource::get_cuda_stream(handle); auto unaryOp_lambda = [] __device__(DataT input) { - const bool x_zero = (input == 0); - return (!x_zero) * raft::log(input + x_zero); + auto input_ = raft::to_float(input); + const bool x_zero = (input_ == 0); + if constexpr (std::is_same_v) { + return __float2half((!x_zero) * raft::log(input_ + x_zero)); + } else { + return (!x_zero) * raft::log(input_ + x_zero); + } }; auto unaryOp_lambda_reverse = [] __device__(DataT input) { // reverse previous log (x) back to x using (e ^ log(x)) - const bool x_zero = (input == 0); - return (!x_zero) * raft::exp(input); + auto input_ = raft::to_float(input); + const bool x_zero = (input_ == 0); + if constexpr (std::is_same_v) { + return __float2half((!x_zero) * raft::exp(input_)); + } else { + return (!x_zero) * raft::exp(input_); + } }; if (x != y) { @@ -391,8 +402,8 @@ void distance_impl(raft::resources const& handle, (DataT*)y, y, n * k, unaryOp_lambda, stream); } - const DataT* x_norm = nullptr; - const DataT* y_norm = nullptr; + const OutT* x_norm = nullptr; + const OutT* y_norm = nullptr; // This op takes some shortcuts when x equals y. So its behavior changes based // on this. @@ -425,8 +436,8 @@ void distance_impl(raft::resources const& handle, { ops::l1_distance_op distance_op{}; - const DataT* x_norm = nullptr; - const DataT* y_norm = nullptr; + const OutT* x_norm = nullptr; + const OutT* y_norm = nullptr; cudaStream_t stream = raft::resource::get_cuda_stream(handle); pairwise_matrix_dispatch( @@ -460,8 +471,13 @@ void distance_impl_l2_expanded( // NOTE: different name ASSERT(!(worksize < (m + n) * sizeof(AccT)), "workspace size error"); ASSERT(workspace != nullptr, "workspace is null"); - DataT* x_norm = workspace; - DataT* y_norm = workspace; + // TODO: May we have a better method to avoid misalignment? + uintptr_t offset = alignof(OutT) - (reinterpret_cast(workspace) % alignof(OutT)); + if (offset == alignof(OutT)) { offset = 0; } + OutT* x_norm = reinterpret_cast(reinterpret_cast(workspace) + offset); + + offset = (reinterpret_cast(x_norm) % alignof(OutT)); + OutT* y_norm = x_norm; // TODO: Column major case looks to have lower accuracy for X == Y, // perhaps the use of stridedSummationKernel could be causing this, // need to investigate and fix. @@ -548,8 +564,8 @@ void distance_impl(raft::resources const& handle, ops::l2_unexp_distance_op l2_op(perform_sqrt); // The unexpanded L2 does not require the norms of a and b to be calculated. - const DataT* x_norm = nullptr; - const DataT* y_norm = nullptr; + const OutT* x_norm = nullptr; + const OutT* y_norm = nullptr; cudaStream_t stream = raft::resource::get_cuda_stream(handle); @@ -576,8 +592,8 @@ void distance_impl(raft::resources const& handle, ops::l2_unexp_distance_op l2_op(perform_sqrt); // The unexpanded L2 does not require the norms of a and b to be calculated. - const DataT* x_norm = nullptr; - const DataT* y_norm = nullptr; + const OutT* x_norm = nullptr; + const OutT* y_norm = nullptr; cudaStream_t stream = raft::resource::get_cuda_stream(handle); @@ -602,8 +618,8 @@ void distance_impl(raft::resources const& handle, { ops::l_inf_distance_op distance_op{}; - const DataT* x_norm = nullptr; - const DataT* y_norm = nullptr; + const OutT* x_norm = nullptr; + const OutT* y_norm = nullptr; cudaStream_t stream = raft::resource::get_cuda_stream(handle); @@ -628,8 +644,8 @@ void distance_impl(raft::resources const& handle, { ops::lp_unexp_distance_op distance_op{metric_arg}; - const DataT* x_norm = nullptr; - const DataT* y_norm = nullptr; + const OutT* x_norm = nullptr; + const OutT* y_norm = nullptr; cudaStream_t stream = raft::resource::get_cuda_stream(handle); @@ -654,8 +670,8 @@ void distance_impl(raft::resources const& handle, { ops::russel_rao_distance_op distance_op{k}; - const DataT* x_norm = nullptr; - const DataT* y_norm = nullptr; + const OutT* x_norm = nullptr; + const OutT* y_norm = nullptr; cudaStream_t stream = raft::resource::get_cuda_stream(handle); @@ -705,8 +721,8 @@ void distance(raft::resources const& handle, void* workspace, size_t worksize, FinalLambda fin_op, - bool isRowMajor = true, - InType metric_arg = 2.0f) + bool isRowMajor = true, + OutType metric_arg = 2.0f) { // raft distance support inputs as float/double and output as uint8_t/float/double. static_assert(!((sizeof(OutType) > 1) && (sizeof(AccType) != sizeof(OutType))), @@ -762,8 +778,8 @@ void distance(raft::resources const& handle, Index_ k, void* workspace, size_t worksize, - bool isRowMajor = true, - InType metric_arg = 2.0f) + bool isRowMajor = true, + OutType metric_arg = 2.0f) { auto fin_op = raft::identity_op(); diff --git a/cpp/src/distance/detail/distance_ops/canberra.cuh b/cpp/src/distance/detail/distance_ops/canberra.cuh index 8bbdc9945b..bf01caf986 100644 --- a/cpp/src/distance/detail/distance_ops/canberra.cuh +++ b/cpp/src/distance/detail/distance_ops/canberra.cuh @@ -19,6 +19,8 @@ #include // raft::abs #include // DI +#include + namespace cuvs::distance::detail::ops { /** @@ -50,17 +52,27 @@ struct canberra_distance_op { DI void core(AccT& acc, DataT& x, DataT& y) const { - const auto diff = raft::abs(x - y); - const auto add = raft::abs(x) + raft::abs(y); - // deal with potential for 0 in denominator by - // forcing 0/1 instead - acc += ((add != 0) * diff / (add + (add == 0))); + if constexpr ((std::is_same_v && std::is_same_v)) { + AccT _x = __half2float(x); + AccT _y = __half2float(y); + const auto diff = raft::abs(_x - _y); + const auto add = raft::abs(_x) + raft::abs(_y); + // deal with potential for 0 in denominator by + // forcing 0/1 instead + acc += ((add != 0) * diff / (add + (add == 0))); + } else { + const auto diff = raft::abs(x - y); + const auto add = raft::abs(x) + raft::abs(y); + // deal with potential for 0 in denominator by + // forcing 0/1 instead + acc += ((add != 0) * diff / (add + (add == 0))); + } }; template DI void epilog(AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh], - DataT* regxn, - DataT* regyn, + AccT* regxn, + AccT* regyn, IdxT gridStrideX, IdxT gridStrideY) const { diff --git a/cpp/src/distance/detail/distance_ops/correlation.cuh b/cpp/src/distance/detail/distance_ops/correlation.cuh index f033f3dfad..810d3f90c4 100644 --- a/cpp/src/distance/detail/distance_ops/correlation.cuh +++ b/cpp/src/distance/detail/distance_ops/correlation.cuh @@ -18,6 +18,8 @@ #include // DI +#include + namespace cuvs::distance::detail::ops { /** @brief The correlation distance @@ -34,20 +36,20 @@ struct correlation_distance_op { using AccT = AccType; using IdxT = IdxType; - const DataT* x2n; - const DataT* y2n; + const AccT* x2n; + const AccT* y2n; IdxT m; IdxT n; IdxT k; correlation_distance_op( - bool is_row_major, const DataT* x2n_, const DataT* y2n_, IdxT m_, IdxT n_, IdxT k_) noexcept + bool is_row_major, const AccT* x2n_, const AccT* y2n_, IdxT m_, IdxT n_, IdxT k_) noexcept : x2n(x2n_), y2n(y2n_), m(m_), n(n_), k(k_) { // The distance op is typically created before the row-major/col-major // swapping has been done. So we do it here. if (!is_row_major) { - std::swap(x2n, y2n); + std::swap(x2n, y2n); std::swap(m, n); } } @@ -63,15 +65,18 @@ struct correlation_distance_op { template static constexpr size_t shared_mem_size() { - return Policy::SmemSize + (2 * (Policy::Mblk + Policy::Nblk) * sizeof(DataT)); + return Policy::SmemSize + (2 * (Policy::Mblk + Policy::Nblk) * sizeof(AccT)); } - DI void core(AccT& acc, DataT& x, DataT& y) const { acc += x * y; }; + DI void core(AccT& acc, DataT& x, DataT& y) const + { + acc += raft::to_float(x) * raft::to_float(y); + }; template DI void epilog(AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh], - DataT* regxn, - DataT* regyn, + AccT* regxn, + AccT* regyn, IdxT gridStrideX, IdxT gridStrideY) const { @@ -80,23 +85,22 @@ struct correlation_distance_op { // changes, this will be where we find the bugs. extern __shared__ char smem[]; - DataT regx2n[Policy::AccRowsPerTh], regy2n[Policy::AccColsPerTh]; + AccT regx2n[Policy::AccRowsPerTh], regy2n[Policy::AccColsPerTh]; - DataT* sx2Norm = - (DataT*)(&smem[Policy::SmemSize + (Policy::Mblk + Policy::Nblk) * sizeof(DataT)]); - DataT* sy2Norm = (&sx2Norm[Policy::Mblk]); + AccT* sx2Norm = (AccT*)(&smem[Policy::SmemSize + (Policy::Mblk + Policy::Nblk) * sizeof(AccT)]); + AccT* sy2Norm = (&sx2Norm[Policy::Mblk]); // Load x & y norms required by this threadblock in shmem buffer if (gridStrideX == blockIdx.x * Policy::Nblk) { for (int i = threadIdx.x; i < Policy::Mblk; i += Policy::Nthreads) { auto idx = gridStrideY + i; - sx2Norm[i] = idx < m ? x2n[idx] : 0; + sx2Norm[i] = idx < m ? raft::to_float(x2n[idx]) : 0; } } for (int i = threadIdx.x; i < Policy::Nblk; i += Policy::Nthreads) { auto idx = gridStrideX + i; - sy2Norm[i] = idx < n ? y2n[idx] : 0; + sy2Norm[i] = idx < n ? raft::to_float(y2n[idx]) : 0; } __syncthreads(); diff --git a/cpp/src/distance/detail/distance_ops/cosine.cuh b/cpp/src/distance/detail/distance_ops/cosine.cuh index d487316513..b0a8b867c1 100644 --- a/cpp/src/distance/detail/distance_ops/cosine.cuh +++ b/cpp/src/distance/detail/distance_ops/cosine.cuh @@ -18,17 +18,19 @@ #include // DI +#include + namespace cuvs::distance::detail::ops { // Epilogue operator for CUTLASS based kernel template struct cosine_cutlass_op { __device__ cosine_cutlass_op() noexcept {} - __device__ AccT operator()(DataT& aNorm, const DataT& bNorm, DataT& accVal) const noexcept + __device__ AccT operator()(AccT& aNorm, const AccT& bNorm, AccT& accVal) const noexcept { return static_cast(1.0) - static_cast(accVal / (aNorm * bNorm)); } - __device__ AccT operator()(DataT aData) const noexcept { return aData; } + __device__ AccT operator()(DataT aData) const noexcept { return raft::to_float(aData); } }; /** @@ -55,15 +57,22 @@ struct cosine_distance_op { template static constexpr size_t shared_mem_size() { - return Policy::SmemSize + ((Policy::Mblk + Policy::Nblk) * sizeof(DataT)); + return Policy::SmemSize + ((Policy::Mblk + Policy::Nblk) * sizeof(AccT)); } - DI void core(AccT& acc, DataT& x, DataT& y) const { acc += x * y; }; + DI void core(AccT& acc, DataT& x, DataT& y) const + { + if constexpr ((std::is_same_v && std::is_same_v)) { + acc += __half2float(x) * __half2float(y); + } else { + acc += x * y; + } + }; template DI void epilog(AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh], - DataT* regxn, - DataT* regyn, + AccT* regxn, + AccT* regyn, IdxT gridStrideX, IdxT gridStrideY) const { @@ -71,7 +80,11 @@ struct cosine_distance_op { for (int i = 0; i < Policy::AccRowsPerTh; ++i) { #pragma unroll for (int j = 0; j < Policy::AccColsPerTh; ++j) { - acc[i][j] = 1.0 - (acc[i][j] / (regxn[i] * regyn[j])); + if constexpr ((std::is_same_v && std::is_same_v)) { + acc[i][j] = 1.0 - (acc[i][j] / (__half2float(regxn[i]) * __half2float(regyn[j]))); + } else { + acc[i][j] = 1.0 - (acc[i][j] / (regxn[i] * regyn[j])); + } } } } diff --git a/cpp/src/distance/detail/distance_ops/hamming.cuh b/cpp/src/distance/detail/distance_ops/hamming.cuh index 7c6553f387..8548df7520 100644 --- a/cpp/src/distance/detail/distance_ops/hamming.cuh +++ b/cpp/src/distance/detail/distance_ops/hamming.cuh @@ -54,12 +54,12 @@ struct hamming_distance_op { template DI void epilog(AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh], - DataT* regxn, - DataT* regyn, + AccT* regxn, + AccT* regyn, IdxT gridStrideX, IdxT gridStrideY) const { - const DataT one_over_k = DataT(1.0) / k; + const AccT one_over_k = AccT(1.0) / k; #pragma unroll for (int i = 0; i < Policy::AccRowsPerTh; ++i) { #pragma unroll diff --git a/cpp/src/distance/detail/distance_ops/hellinger.cuh b/cpp/src/distance/detail/distance_ops/hellinger.cuh index ad5ca31564..5d9dd22591 100644 --- a/cpp/src/distance/detail/distance_ops/hellinger.cuh +++ b/cpp/src/distance/detail/distance_ops/hellinger.cuh @@ -50,14 +50,14 @@ struct hellinger_distance_op { DI void core(AccT& acc, DataT& x, DataT& y) const { // This is sqrt(x) * sqrt(y). - const auto product = x * y; + const AccT product = raft::to_float(x) * raft::to_float(y); acc += product; }; template DI void epilog(AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh], - DataT* regxn, - DataT* regyn, + AccT* regxn, + AccT* regyn, IdxT gridStrideX, IdxT gridStrideY) const { diff --git a/cpp/src/distance/detail/distance_ops/jensen_shannon.cuh b/cpp/src/distance/detail/distance_ops/jensen_shannon.cuh index 2166394941..8f8324ed0e 100644 --- a/cpp/src/distance/detail/distance_ops/jensen_shannon.cuh +++ b/cpp/src/distance/detail/distance_ops/jensen_shannon.cuh @@ -52,19 +52,21 @@ struct jensen_shannon_distance_op { DI void core(AccT& acc, DataT& x, DataT& y) const { - const DataT m = 0.5f * (x + y); + AccT x_ = raft::to_float(x); + AccT y_ = raft::to_float(y); + const AccT m = 0.5f * (x_ + y_); const bool m_zero = (m == 0); const auto logM = (!m_zero) * raft::log(m + m_zero); - const bool x_zero = (x == 0); - const bool y_zero = (y == 0); - acc += (-x * (logM - raft::log(x + x_zero))) + (-y * (logM - raft::log(y + y_zero))); + const bool x_zero = (x_ == 0); + const bool y_zero = (y_ == 0); + acc += (-x_ * (logM - raft::log(x_ + x_zero))) + (-y_ * (logM - raft::log(y_ + y_zero))); }; template DI void epilog(AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh], - DataT* regxn, - DataT* regyn, + AccT* regxn, + AccT* regyn, IdxT gridStrideX, IdxT gridStrideY) const { diff --git a/cpp/src/distance/detail/distance_ops/kl_divergence.cuh b/cpp/src/distance/detail/distance_ops/kl_divergence.cuh index 929c3a5596..7f183b159e 100644 --- a/cpp/src/distance/detail/distance_ops/kl_divergence.cuh +++ b/cpp/src/distance/detail/distance_ops/kl_divergence.cuh @@ -59,31 +59,33 @@ struct kl_divergence_op { { // TODO: make sure that these branches get hoisted out of main loop.. Could // be quite expensive otherwise. + AccT x_ = raft::to_float(x); + AccT y_ = raft::to_float(y); if (x_equal_y) { if (is_row_major) { - const bool x_zero = (x == 0); - const bool y_zero = (y == 0); - acc += x * (raft::log(x + x_zero) - (!y_zero) * raft::log(y + y_zero)); + const bool x_zero = (x_ == 0); + const bool y_zero = (y_ == 0); + acc += x_ * (raft::log(x_ + x_zero) - (!y_zero) * raft::log(y_ + y_zero)); } else { - const bool y_zero = (y == 0); - const bool x_zero = (x == 0); - acc += y * (raft::log(y + y_zero) - (!x_zero) * raft::log(x + x_zero)); + const bool y_zero = (y_ == 0); + const bool x_zero = (x_ == 0); + acc += y_ * (raft::log(y_ + y_zero) - (!x_zero) * raft::log(x_ + x_zero)); } } else { if (is_row_major) { - const bool x_zero = (x == 0); - acc += x * (raft::log(x + x_zero) - y); + const bool x_zero = (x_ == 0); + acc += x_ * (raft::log(x_ + x_zero) - y_); } else { - const bool y_zero = (y == 0); - acc += y * (raft::log(y + y_zero) - x); + const bool y_zero = (y_ == 0); + acc += y_ * (raft::log(y_ + y_zero) - x_); } } }; template DI void epilog(AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh], - DataT* regxn, - DataT* regyn, + AccT* regxn, + AccT* regyn, IdxT gridStrideX, IdxT gridStrideY) const { diff --git a/cpp/src/distance/detail/distance_ops/l1.cuh b/cpp/src/distance/detail/distance_ops/l1.cuh index 76eaffaf30..278702ea33 100644 --- a/cpp/src/distance/detail/distance_ops/l1.cuh +++ b/cpp/src/distance/detail/distance_ops/l1.cuh @@ -46,12 +46,15 @@ struct l1_distance_op { return Policy::SmemSize; } - DI void core(AccT& acc, DataT& x, DataT& y) const { acc += raft::abs(x - y); }; + DI void core(AccT& acc, DataT& x, DataT& y) const + { + acc += raft::abs(raft::to_float(x) - raft::to_float(y)); + }; template DI void epilog(AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh], - DataT* regxn, - DataT* regyn, + AccT* regxn, + AccT* regyn, IdxT gridStrideX, IdxT gridStrideY) const { diff --git a/cpp/src/distance/detail/distance_ops/l2_exp.cuh b/cpp/src/distance/detail/distance_ops/l2_exp.cuh index f45c412069..04817aa8b1 100644 --- a/cpp/src/distance/detail/distance_ops/l2_exp.cuh +++ b/cpp/src/distance/detail/distance_ops/l2_exp.cuh @@ -19,6 +19,8 @@ #include #include // DI +#include + namespace cuvs::distance::detail::ops { /** @@ -52,8 +54,8 @@ struct l2_exp_cutlass_op { * Self-neighboring points should have (aNorm == bNorm) == accVal and the dot product (accVal) * can sometimes have round-off errors, which will cause (aNorm == bNorm) ~ accVal instead. */ - outVal = outVal * !((outVal * outVal < get_clamp_precision()) * (aNorm == bNorm)); - return sqrt ? raft::sqrt(outVal * (outVal > 0)) : outVal; + outVal = outVal * AccT(!((outVal * outVal < get_clamp_precision()) * (aNorm == bNorm))); + return sqrt ? raft::sqrt(outVal * static_cast(outVal > AccT(0))) : outVal; } __device__ AccT operator()(DataT aData) const noexcept { return aData; } @@ -88,15 +90,22 @@ struct l2_exp_distance_op { template static constexpr size_t shared_mem_size() { - return Policy::SmemSize + ((Policy::Mblk + Policy::Nblk) * sizeof(DataT)); + return Policy::SmemSize + ((Policy::Mblk + Policy::Nblk) * sizeof(AccT)); } - DI void core(AccT& acc, DataT& x, DataT& y) const { acc += x * y; }; + DI void core(AccT& acc, DataT& x, DataT& y) const + { + if constexpr ((std::is_same_v && std::is_same_v)) { + acc += __half2float(x) * __half2float(y); + } else { + acc += x * y; + } + }; template DI void epilog(AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh], - DataT* regxn, - DataT* regyn, + AccT* regxn, + AccT* regyn, IdxT gridStrideX, IdxT gridStrideY) const { @@ -104,8 +113,8 @@ struct l2_exp_distance_op { for (int i = 0; i < Policy::AccRowsPerTh; ++i) { #pragma unroll for (int j = 0; j < Policy::AccColsPerTh; ++j) { - DataT accVal = acc[i][j]; - DataT val = regxn[i] + regyn[j] - (DataT)2.0 * accVal; + AccT accVal = acc[i][j]; + AccT val = regxn[i] + regyn[j] - (AccT)2.0 * accVal; /** * Self-neighboring points should have (aNorm == bNorm) == accVal and the dot product @@ -113,7 +122,8 @@ struct l2_exp_distance_op { * instead. */ acc[i][j] = - val * (val > 0) * !((val * val < get_clamp_precision()) * (regxn[i] == regyn[j])); + val * static_cast((val > AccT(0))) * + static_cast(!((val * val < get_clamp_precision()) * (regxn[i] == regyn[j]))); } } if (sqrt) { diff --git a/cpp/src/distance/detail/distance_ops/l2_unexp.cuh b/cpp/src/distance/detail/distance_ops/l2_unexp.cuh index aa6cc27f3e..f12820d8ec 100644 --- a/cpp/src/distance/detail/distance_ops/l2_unexp.cuh +++ b/cpp/src/distance/detail/distance_ops/l2_unexp.cuh @@ -18,6 +18,8 @@ #include // DI +#include + namespace cuvs::distance::detail::ops { /** @@ -53,14 +55,19 @@ struct l2_unexp_distance_op { DI void core(AccT& acc, DataT& x, DataT& y) const { - const auto diff = x - y; - acc += diff * diff; + if constexpr ((std::is_same_v && std::is_same_v)) { + const auto diff = __half2float(x) - __half2float(y); + acc += diff * diff; + } else { + const auto diff = x - y; + acc += diff * diff; + } }; template DI void epilog(AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh], - DataT* regxn, - DataT* regyn, + AccT* regxn, + AccT* regyn, IdxT gridStrideX, IdxT gridStrideY) const { diff --git a/cpp/src/distance/detail/distance_ops/l_inf.cuh b/cpp/src/distance/detail/distance_ops/l_inf.cuh index d8f9384d7e..d8559a7d1b 100644 --- a/cpp/src/distance/detail/distance_ops/l_inf.cuh +++ b/cpp/src/distance/detail/distance_ops/l_inf.cuh @@ -55,8 +55,8 @@ struct l_inf_distance_op { template DI void epilog(AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh], - DataT* regxn, - DataT* regyn, + AccT* regxn, + AccT* regyn, IdxT gridStrideX, IdxT gridStrideY) const { diff --git a/cpp/src/distance/detail/distance_ops/lp_unexp.cuh b/cpp/src/distance/detail/distance_ops/lp_unexp.cuh index 6136f9f3eb..2adf0befa0 100644 --- a/cpp/src/distance/detail/distance_ops/lp_unexp.cuh +++ b/cpp/src/distance/detail/distance_ops/lp_unexp.cuh @@ -53,18 +53,18 @@ struct lp_unexp_distance_op { DI void core(AccT& acc, DataT& x, DataT& y) const { - const auto diff = raft::abs(x - y); - acc += raft::pow(diff, p); + const AccT diff = raft::abs(raft::to_float(x) - raft::to_float(y)); + acc += raft::pow(diff, raft::to_float(p)); }; template DI void epilog(AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh], - DataT* regxn, - DataT* regyn, + AccT* regxn, + AccT* regyn, IdxT gridStrideX, IdxT gridStrideY) const { - const auto one_over_p = 1.0f / p; + const AccT one_over_p = 1.0f / static_cast(raft::to_float(p)); #pragma unroll for (int i = 0; i < Policy::AccRowsPerTh; ++i) { #pragma unroll diff --git a/cpp/src/distance/detail/distance_ops/russel_rao.cuh b/cpp/src/distance/detail/distance_ops/russel_rao.cuh index 5dffdcdb89..4988c73536 100644 --- a/cpp/src/distance/detail/distance_ops/russel_rao.cuh +++ b/cpp/src/distance/detail/distance_ops/russel_rao.cuh @@ -52,12 +52,15 @@ struct russel_rao_distance_op { return Policy::SmemSize; } - DI void core(AccT& acc, DataT& x, DataT& y) const { acc += x * y; }; + DI void core(AccT& acc, DataT& x, DataT& y) const + { + acc += raft::to_float(x) * raft::to_float(y); + }; template DI void epilog(AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh], - DataT* regxn, - DataT* regyn, + AccT* regxn, + AccT* regyn, IdxT gridStrideX, IdxT gridStrideY) const { diff --git a/cpp/src/distance/detail/distance_ops/template.cuh b/cpp/src/distance/detail/distance_ops/template.cuh index bdb9332379..cb26e210db 100644 --- a/cpp/src/distance/detail/distance_ops/template.cuh +++ b/cpp/src/distance/detail/distance_ops/template.cuh @@ -52,8 +52,8 @@ struct template_distance_op { template DI void epilog(AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh], - DataT* regxn, - DataT* regyn, + AccT* regxn, + AccT* regyn, IdxT gridStrideX, IdxT gridStrideY) const { diff --git a/cpp/src/distance/detail/masked_distance_base.cuh b/cpp/src/distance/detail/masked_distance_base.cuh index 2c41ee3be0..ec7270baa2 100644 --- a/cpp/src/distance/detail/masked_distance_base.cuh +++ b/cpp/src/distance/detail/masked_distance_base.cuh @@ -266,7 +266,7 @@ struct MaskedDistances : public BaseClass { for (int i = 0; i < P::AccRowsPerTh; ++i) { #pragma unroll for (int j = 0; j < P::AccColsPerTh; ++j) { - acc[i][j] = BaseClass::Zero; + acc[i][j] = BaseClass::Zero(); } } } diff --git a/cpp/src/distance/detail/pairwise_distance_base.cuh b/cpp/src/distance/detail/pairwise_distance_base.cuh index 990f845fd4..72d75ec12b 100644 --- a/cpp/src/distance/detail/pairwise_distance_base.cuh +++ b/cpp/src/distance/detail/pairwise_distance_base.cuh @@ -72,8 +72,8 @@ struct PairwiseDistances : public BaseClass { private: typedef Policy P; - const DataT* xn; - const DataT* yn; + const OutT* xn; + const OutT* yn; const DataT* const yBase; OutT* dOutput; char* smem; @@ -99,8 +99,8 @@ struct PairwiseDistances : public BaseClass { IdxT _lda, IdxT _ldb, IdxT _ldd, - const DataT* _xn, - const DataT* _yn, + const OutT* _xn, + const OutT* _yn, OutT* _dOutput, char* _smem, OpT _distance_op, @@ -154,7 +154,7 @@ struct PairwiseDistances : public BaseClass { // Epilog: if (distance_op.use_norms) { - DataT regxn[P::AccRowsPerTh], regyn[P::AccColsPerTh]; + OutT regxn[P::AccRowsPerTh], regyn[P::AccColsPerTh]; load_norms(tile_idx_m, tile_idx_n, regxn, regyn); // Overlap ldg with epilog computation ldgNextGridStride(tile_idx_m, tile_idx_n); @@ -200,7 +200,7 @@ struct PairwiseDistances : public BaseClass { for (int i = 0; i < P::AccRowsPerTh; ++i) { #pragma unroll for (int j = 0; j < P::AccColsPerTh; ++j) { - acc[i][j] = BaseClass::Zero; + acc[i][j] = BaseClass::Zero(); } } } @@ -242,23 +242,23 @@ struct PairwiseDistances : public BaseClass { DI void load_norms(IdxT tile_idx_m, IdxT tile_idx_n, - DataT (®xn)[P::AccRowsPerTh], - DataT (®yn)[P::AccColsPerTh]) + OutT (®xn)[P::AccRowsPerTh], + OutT (®yn)[P::AccColsPerTh]) { - DataT* sxNorm = (DataT*)(&smem[P::SmemSize]); - DataT* syNorm = (&sxNorm[P::Mblk]); + OutT* sxNorm = (OutT*)(&smem[P::SmemSize]); + OutT* syNorm = (&sxNorm[P::Mblk]); // Load x & y norms required by this threadblock in shmem buffer if (tile_idx_n == blockIdx.x * P::Nblk) { for (int i = threadIdx.x; i < P::Mblk; i += P::Nthreads) { auto idx = tile_idx_m + i; - sxNorm[i] = idx < this->m ? xn[idx] : 0; + sxNorm[i] = idx < this->m ? xn[idx] : OutT(0); } } for (int i = threadIdx.x; i < P::Nblk; i += P::Nthreads) { auto idx = tile_idx_n + i; - syNorm[i] = idx < this->n ? yn[idx] : 0; + syNorm[i] = idx < this->n ? yn[idx] : OutT(0); } __syncthreads(); @@ -285,7 +285,7 @@ struct PairwiseDistances : public BaseClass { auto colId = startx + j * P::AccThCols; if (rowId < this->m && colId < this->n) { // Promote to 64 bit index for final write, as output array can be > 2^31 - dOutput[std::size_t(rowId) * this->n + colId] = fin_op(acc[i][j], 0); + dOutput[std::size_t(rowId) * this->n + colId] = fin_op(acc[i][j], AccT(0)); } } } diff --git a/cpp/src/distance/detail/pairwise_distance_cutlass_base.cuh b/cpp/src/distance/detail/pairwise_distance_cutlass_base.cuh index d41c5d30cd..d4d86d7f4d 100644 --- a/cpp/src/distance/detail/pairwise_distance_cutlass_base.cuh +++ b/cpp/src/distance/detail/pairwise_distance_cutlass_base.cuh @@ -58,8 +58,8 @@ template std::enable_if_t::value> cutlassDistanceKernel(const DataT* x, const DataT* y, - const DataT* xn, - const DataT* yn, + const OutT* xn, + const OutT* yn, IdxT m, IdxT n, IdxT k, @@ -77,12 +77,12 @@ std::enable_if_t::value> cutlassDistanceKernel(const Da auto dist_op = distance_op.get_cutlass_op(); using DistanceFn = decltype(dist_op); using EpilogueOutputOp = - epilogue::thread::PairwiseDistanceEpilogueElementwise; constexpr int batch_count = 1; @@ -143,13 +143,13 @@ std::enable_if_t::value> cutlassDistanceKernel(const Da epilog_op_param, a, b, - xn, // C matrix eq vector param, which here is A norm - nullptr, // tensor_Z, - (DataT*)yn + offsetN, // this is broadcast vec, which is required to be non-const param - dOutput + offsetN, // Output distance matrix - (int64_t)0, // batch stride A - (int64_t)0, // batch stride B - (int64_t)0, // batch stride Norm A + xn, // C matrix eq vector param, which here is A norm + nullptr, // tensor_Z, + (OutT*)yn + offsetN, // this is broadcast vec, which is required to be non-const param + dOutput + offsetN, // Output distance matrix + (int64_t)0, // batch stride A + (int64_t)0, // batch stride B + (int64_t)0, // batch stride Norm A (int64_t)0, (int64_t)0, // batch stride Norm B (int64_t)0, // batch stride Output diff --git a/cpp/src/distance/detail/pairwise_distance_gemm.h b/cpp/src/distance/detail/pairwise_distance_gemm.h index 6ac13f27b6..a746138125 100644 --- a/cpp/src/distance/detail/pairwise_distance_gemm.h +++ b/cpp/src/distance/detail/pairwise_distance_gemm.h @@ -19,11 +19,15 @@ #include "./pairwise_distance_epilogue.h" #include +#include +#include #include #include #include #include +#include + ///////////////////////////////////////////////////////////////////////////////////////////////// namespace cuvs { @@ -235,8 +239,105 @@ struct PairwiseDistanceGemm; }; +template < + /// Layout type for A matrix operand + int kAlignmentA, + /// Layout type for B matrix operand + int kAlignmentB, + /// Element type for C and D matrix operands + typename ElementC_, + /// Element type for internal accumulation + typename ElementAccumulator, + /// Epilogue output operator - must satisfy concept of 'EpilogueWithBroadcastOp' + typename EpilogueOutputOp, + /// Number of stages used in the pipelined mainloop + int Stages, + /// data layout row/column major of inputs + bool isRowMajor> +struct PairwiseDistanceGemm { + // using Transform = cutlass::ComplexTransform::kNone; + // Threadblock-level tile size (concept: GemmShape) + using ThreadblockShape = + cutlass::gemm::GemmShape<128, 128, 32>; // <- threadblock tile M = 64, N = 64, K = 16 + /// Warp-level tile size (concept: GemmShape) + // This code section describes tile size a warp will compute + using WarpShape = cutlass::gemm::GemmShape<64, 64, 32>; // <- warp tile M = 32, N = 32, K = 16 + /// Warp-level tile size (concept: GemmShape) + // This code section describes the size of MMA op + using InstructionShape = cutlass::gemm::GemmShape<16, 8, 16>; + + // Operation performed by GEMM + using Operator = cutlass::arch::OpMultiplyAdd; + // This code section describes whether you want to use tensor cores or regular SIMT cores on GPU + // SM + using OperatorClass = cutlass::arch::OpClassTensorOp; + + // This code section describes CUDA SM architecture number + using ArchTag = cutlass::arch::Sm80; + + // This code section describes how threadblocks are scheduled on GPU + /// Threadblock-level swizzling operator + using ThreadblockSwizzle = cutlass::gemm::threadblock::GemmBatchedIdentityThreadblockSwizzle; + + /// data layout for final output matrix. + // we keep this same layout even for column major inputs + using LayoutOutput = cutlass::layout::RowMajor; + + typedef typename std::conditional::type NormXLayout; + + typedef typename std:: + conditional::type LayoutA_; + + typedef typename std:: + conditional::type LayoutB_; + + using GemmBase = typename cutlass::gemm::device::GemmUniversal::GemmKernel; + + // Replace epilogue + using Epilogue = typename cuvs::epilogue::threadblock::PairwiseDistanceEpilogue< + typename GemmBase::Epilogue::Shape, + typename GemmBase::Epilogue::WarpMmaOperator, + GemmBase::Epilogue::kPartitionsK, + ElementC_, + typename EpilogueOutputOp::ElementT, + ElementC_, + EpilogueOutputOp, + NormXLayout, + GemmBase::Epilogue::kElementsPerAccess>::Epilogue; + + // Compose the GEMM kernel + using GemmKernel = cutlass::gemm::kernel:: + GemmWithFusedEpilogue; +}; + ///////////////////////////////////////////////////////////////////////////////////////////////// } // namespace kernel } // namespace gemm -} // namespace cuvs \ No newline at end of file +} // namespace cuvs diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh b/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh index bc8189c709..3107f0fa44 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh @@ -38,8 +38,8 @@ void pairwise_matrix_dispatch(OpT distance_op, IdxT k, const DataT* x, const DataT* y, - const DataT* x_norm, - const DataT* y_norm, + const OutT* x_norm, + const OutT* y_norm, OutT* out, FinOpT fin_op, cudaStream_t stream, @@ -47,9 +47,9 @@ void pairwise_matrix_dispatch(OpT distance_op, }; // namespace cuvs::distance::detail -#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY +#endif // CUVS_EXPLICIT_INSTANTIATE_ONLY -#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ +#define instantiate_cuvs_distance_detail_pairwise_matrix_dispatch( \ OpT, DataT, AccT, OutT, FinOpT, IdxT) \ extern template void cuvs::distance::detail:: \ pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ @@ -59,136 +59,70 @@ void pairwise_matrix_dispatch(OpT distance_op, IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ bool is_row_major) +#define instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default(OpT, IdxT) \ + instantiate_cuvs_distance_detail_pairwise_matrix_dispatch( \ + OpT, float, float, float, raft::identity_op, IdxT); \ + instantiate_cuvs_distance_detail_pairwise_matrix_dispatch( \ + OpT, double, double, double, raft::identity_op, IdxT); \ + instantiate_cuvs_distance_detail_pairwise_matrix_dispatch( \ + OpT, half, float, float, raft::identity_op, IdxT); + +#define instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo(OpT, IdxT, FinOpT) \ + instantiate_cuvs_distance_detail_pairwise_matrix_dispatch( \ + OpT, float, float, float, FinOpT, IdxT); \ + instantiate_cuvs_distance_detail_pairwise_matrix_dispatch( \ + OpT, double, double, double, FinOpT, IdxT); \ + instantiate_cuvs_distance_detail_pairwise_matrix_dispatch( \ + OpT, half, float, float, FinOpT, IdxT); + /* * Hierarchy of instantiations: * * This file defines extern template instantiations of the distance kernels. The - * instantiation of the public API is handled in raft/distance/distance-ext.cuh. + * instantiation of the public API is handled in cuvs/distance/distance-ext.cuh. * * After adding an instance here, make sure to also add the instance there. */ // The following two instances are used in the RBF kernel object. Note the use of int64_t for the // index type. -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::l2_unexp_distance_op, - float, - float, - float, - cuvs::distance::kernels::detail::rbf_fin_op, - int64_t); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::l2_unexp_distance_op, - double, - double, - double, - cuvs::distance::kernels::detail::rbf_fin_op, - int64_t); - -// Rest of instances -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::canberra_distance_op, float, float, float, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::canberra_distance_op, - double, - double, - double, - raft::identity_op, - int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::correlation_distance_op, - float, - float, - float, - raft::identity_op, - int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::correlation_distance_op, - double, - double, - double, - raft::identity_op, - int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::cosine_distance_op, float, float, float, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::cosine_distance_op, double, double, double, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::hamming_distance_op, float, float, float, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::hamming_distance_op, double, double, double, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::hellinger_distance_op, float, float, float, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::hellinger_distance_op, - double, - double, - double, - raft::identity_op, - int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::jensen_shannon_distance_op, - float, - float, - float, - raft::identity_op, - int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::jensen_shannon_distance_op, - double, - double, - double, - raft::identity_op, - int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::kl_divergence_op, float, float, float, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::kl_divergence_op, double, double, double, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::l1_distance_op, float, float, float, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::l1_distance_op, double, double, double, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::l2_exp_distance_op, float, float, float, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::l2_exp_distance_op, double, double, double, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::l2_unexp_distance_op, float, float, float, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( + cuvs::distance::detail::ops::canberra_distance_op, int); +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( + cuvs::distance::detail::ops::correlation_distance_op, int); +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( + cuvs::distance::detail::ops::cosine_distance_op, int); +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( + cuvs::distance::detail::ops::hamming_distance_op, int); +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( + cuvs::distance::detail::ops::hellinger_distance_op, int); +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( + cuvs::distance::detail::ops::jensen_shannon_distance_op, int); +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( + cuvs::distance::detail::ops::kl_divergence_op, int); +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( + cuvs::distance::detail::ops::l1_distance_op, int); +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( + cuvs::distance::detail::ops::l2_exp_distance_op, int); +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( + cuvs::distance::detail::ops::l2_unexp_distance_op, int); +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( + cuvs::distance::detail::ops::l_inf_distance_op, int); +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( + cuvs::distance::detail::ops::lp_unexp_distance_op, int); +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( + cuvs::distance::detail::ops::russel_rao_distance_op, int); +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo( cuvs::distance::detail::ops::l2_unexp_distance_op, - double, - double, - double, - raft::identity_op, - int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::l_inf_distance_op, float, float, float, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::l_inf_distance_op, double, double, double, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::lp_unexp_distance_op, float, float, float, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::lp_unexp_distance_op, - double, - double, - double, - raft::identity_op, - int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::russel_rao_distance_op, float, float, float, raft::identity_op, int); -instantiate_raft_distance_detail_pairwise_matrix_dispatch( - cuvs::distance::detail::ops::russel_rao_distance_op, - double, - double, - double, - raft::identity_op, - int); + int64_t, + cuvs::distance::kernels::detail::rbf_fin_op); -#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch +#undef instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo +#undef instantiate_cuvs_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch-inl.cuh b/cpp/src/distance/detail/pairwise_matrix/dispatch-inl.cuh index fc3849febe..96d7c265e7 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch-inl.cuh +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch-inl.cuh @@ -72,8 +72,8 @@ void pairwise_matrix_dispatch(OpT distance_op, IdxT k, const DataT* x, const DataT* y, - const DataT* x_norm, - const DataT* y_norm, + const OutT* x_norm, + const OutT* y_norm, OutT* out, FinOpT fin_op, cudaStream_t stream, @@ -113,7 +113,13 @@ void pairwise_matrix_dispatch(OpT distance_op, void* kernel_ptr = reinterpret_cast(sm60_wrapper.kernel_ptr); auto runtime_arch = arch::kernel_virtual_arch(kernel_ptr); - if (cutlass_range.contains(runtime_arch)) { + // TODO: the cutlass doesn't support the odd `k` on half DataT. + bool if_unsupported_on_half = (sizeof(DataT) == 2) && ((k % 2) != 0); + + if (if_unsupported_on_half) { + auto any_range = arch::SM_range(arch::SM_min(), arch::SM_future()); + pairwise_matrix_sm60_dispatch(distance_op, params, any_range, stream); + } else if (cutlass_range.contains(runtime_arch) && !if_unsupported_on_half) { // If device is SM_80 or later, use CUTLASS-based kernel. pairwise_matrix_sm80_dispatch(distance_op, params, cutlass_range, stream); } else { diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch.cuh b/cpp/src/distance/detail/pairwise_matrix/dispatch.cuh index 06b039c3a1..0521a5713a 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch.cuh +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch.cuh @@ -19,6 +19,4 @@ #include "dispatch-inl.cuh" #endif -#ifdef RAFT_COMPILED #include "dispatch-ext.cuh" -#endif diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py b/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py index e013db1e18..1bd51aef91 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py @@ -56,8 +56,8 @@ IdxT k, \\ const DataT* x, \\ const DataT* y, \\ - const DataT* x_norm, \\ - const DataT* y_norm, \\ + const OutT* x_norm, \\ + const OutT* y_norm, \\ OutT* out, \\ FinOpT fin_op, \\ cudaStream_t stream, \\ @@ -77,6 +77,12 @@ OutT="double", IdxT="int", ), + dict( + DataT="half", + AccT="float", + OutT="float", + IdxT="int", + ), ] op_instances = [ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu index f82df6cc03..c2c44dc53c 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu index a20ca5f472..00099dcae4 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_half_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_half_float_float_int.cu new file mode 100644 index 0000000000..0b70f23413 --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_half_float_float_int.cu @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::canberra_distance_op, half, float, float, raft::identity_op, int); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu index 7bb7e4a96e..d9b6feb9c4 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu index 34fcc4be42..dfb6f62a8b 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_half_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_half_float_float_int.cu new file mode 100644 index 0000000000..b2c959b556 --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_half_float_float_int.cu @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::correlation_distance_op, half, float, float, raft::identity_op, int); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu index cb23743c1f..d7046d4e25 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu @@ -38,8 +38,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu index ad71ff2957..215805ddea 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu @@ -38,8 +38,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_half_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_half_float_float_int.cu new file mode 100644 index 0000000000..5f9928958b --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_half_float_float_int.cu @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include "dispatch_sm80.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::cosine_distance_op, half, float, float, raft::identity_op, int); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu index e81d544112..d558604d8d 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu index ddbdab6021..632523194a 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_half_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_half_float_float_int.cu new file mode 100644 index 0000000000..76360ebd87 --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_half_float_float_int.cu @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::hamming_distance_op, half, float, float, raft::identity_op, int); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu index d2acecaf0e..707d9c08df 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu index 034d76679e..7dceab56c8 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_half_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_half_float_float_int.cu new file mode 100644 index 0000000000..35e7adf062 --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_half_float_float_int.cu @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::hellinger_distance_op, half, float, float, raft::identity_op, int); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu index 030faeecd2..e3e0744797 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu index f7551a5665..6eff405503 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_half_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_half_float_float_int.cu new file mode 100644 index 0000000000..24302c6e6c --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_half_float_float_int.cu @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::jensen_shannon_distance_op, + half, + float, + float, + raft::identity_op, + int); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu index 6640d39492..4f45adf274 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu index 60cafa4742..b2cac754fe 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_half_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_half_float_float_int.cu new file mode 100644 index 0000000000..9347a026c1 --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_half_float_float_int.cu @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::kl_divergence_op, half, float, float, raft::identity_op, int); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu index 8f6e3a35d2..82d9d1abe0 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu index 73868a486b..ad5f06048a 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_half_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_half_float_float_int.cu new file mode 100644 index 0000000000..99043958f0 --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_half_float_float_int.cu @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::l1_distance_op, half, float, float, raft::identity_op, int); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu index 8ac80b77d2..b2911f16b4 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu @@ -38,8 +38,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu index abebb91219..93a4166439 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu @@ -38,8 +38,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_half_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_half_float_float_int.cu new file mode 100644 index 0000000000..e30499ae7c --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_half_float_float_int.cu @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include "dispatch_sm80.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::l2_exp_distance_op, half, float, float, raft::identity_op, int); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu index ffa6bf02ba..eecab9ec40 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu index acef42a4e0..9f58f5f859 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_half_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_half_float_float_int.cu new file mode 100644 index 0000000000..73e9352eed --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_half_float_float_int.cu @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::l2_unexp_distance_op, half, float, float, raft::identity_op, int); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu index c2bbbf06b5..812dda4500 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu index 163b9f37ba..f95dd7a870 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_half_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_half_float_float_int.cu new file mode 100644 index 0000000000..ba9c976c46 --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_half_float_float_int.cu @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::l_inf_distance_op, half, float, float, raft::identity_op, int); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu index d13532ac60..a88875f3a2 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu index 65e0163d78..b8b3775d1c 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_half_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_half_float_float_int.cu new file mode 100644 index 0000000000..ef323f57d0 --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_half_float_float_int.cu @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::lp_unexp_distance_op, half, float, float, raft::identity_op, int); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_rbf.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_rbf.cu index 23f2b34e8c..1cb0ed8aeb 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_rbf.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_rbf.cu @@ -38,8 +38,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ @@ -61,4 +61,12 @@ instantiate_raft_distance_detail_pairwise_matrix_dispatch( cuvs::distance::kernels::detail::rbf_fin_op, int64_t); +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::l2_unexp_distance_op, + half, + float, + float, + cuvs::distance::kernels::detail::rbf_fin_op, + int64_t); + #undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu index 1a5e5cf981..1afbd690e4 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu index a9b1f6bb43..217f61b84f 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu @@ -37,8 +37,8 @@ IdxT k, \ const DataT* x, \ const DataT* y, \ - const DataT* x_norm, \ - const DataT* y_norm, \ + const OutT* x_norm, \ + const OutT* y_norm, \ OutT* out, \ FinOpT fin_op, \ cudaStream_t stream, \ diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_half_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_half_float_float_int.cu new file mode 100644 index 0000000000..c65fb24f62 --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_half_float_float_int.cu @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::russel_rao_distance_op, half, float, float, raft::identity_op, int); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/params.cuh b/cpp/src/distance/detail/pairwise_matrix/params.cuh index aa419aca0d..739c4a9f62 100644 --- a/cpp/src/distance/detail/pairwise_matrix/params.cuh +++ b/cpp/src/distance/detail/pairwise_matrix/params.cuh @@ -27,8 +27,8 @@ struct pairwise_matrix_params { IdxT ld_out; const DataT* x; const DataT* y; - const DataT* x_norm; - const DataT* y_norm; + const OutT* x_norm; + const OutT* y_norm; OutT* out; FinOpT fin_op; bool is_row_major; diff --git a/cpp/src/distance/distance-ext.cuh b/cpp/src/distance/distance-ext.cuh index 148951afad..e7fa30f03b 100644 --- a/cpp/src/distance/distance-ext.cuh +++ b/cpp/src/distance/distance-ext.cuh @@ -24,6 +24,8 @@ #include // rmm::device_uvector +#include + #ifdef CUVS_EXPLICIT_INSTANTIATE_ONLY namespace cuvs { @@ -45,8 +47,8 @@ void distance(raft::resources const& handle, void* workspace, size_t worksize, FinalLambda fin_op, - bool isRowMajor = true, - DataT metric_arg = 2.0f) RAFT_EXPLICIT; + bool isRowMajor = true, + OutT metric_arg = 2.0f) RAFT_EXPLICIT; template +template void pairwise_distance(raft::resources const& handle, const Type* x, const Type* y, - Type* dist, + DistT* dist, IdxT m, IdxT n, IdxT k, rmm::device_uvector& workspace, cuvs::distance::DistanceType metric, - bool isRowMajor = true, - Type metric_arg = 2.0f) RAFT_EXPLICIT; + bool isRowMajor = true, + DistT metric_arg = DistT(2.0f)) RAFT_EXPLICIT; -template +template void pairwise_distance(raft::resources const& handle, const Type* x, const Type* y, - Type* dist, + DistT* dist, IdxT m, IdxT n, IdxT k, cuvs::distance::DistanceType metric, - bool isRowMajor = true, - Type metric_arg = 2.0f) RAFT_EXPLICIT; + bool isRowMajor = true, + DistT metric_arg = DistT(2.0f)) RAFT_EXPLICIT; template void distance(raft::resources const& handle, - raft::device_matrix_view const x, - raft::device_matrix_view const y, + raft::device_matrix_view const x, + raft::device_matrix_view const y, raft::device_matrix_view dist, - DataT metric_arg = 2.0f) RAFT_EXPLICIT; + OutT metric_arg = 2.0f) RAFT_EXPLICIT; -template +template void pairwise_distance(raft::resources const& handle, - device_matrix_view const x, - device_matrix_view const y, - device_matrix_view dist, + raft::device_matrix_view const x, + raft::device_matrix_view const y, + raft::device_matrix_view dist, cuvs::distance::DistanceType metric, - Type metric_arg = 2.0f) RAFT_EXPLICIT; + DistT metric_arg = DistT(2.0f)) RAFT_EXPLICIT; }; // namespace distance }; // namespace cuvs -#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY +#endif // CUVS_EXPLICIT_INSTANTIATE_ONLY /* * Hierarchy of instantiations: @@ -158,909 +163,220 @@ void pairwise_distance(raft::resources const& handle, * dispatch-ext.cuh and the corresponding .cu files. */ -#define instantiate_raft_distance_distance(DT, DataT, AccT, OutT, FinalLambda, IdxT) \ - extern template void cuvs::distance::distance( \ - raft::resources const& handle, \ - const DataT* x, \ - const DataT* y, \ - OutT* dist, \ - IdxT m, \ - IdxT n, \ - IdxT k, \ - void* workspace, \ - size_t worksize, \ - FinalLambda fin_op, \ - bool isRowMajor, \ - DataT metric_arg) +#define instantiate_cuvs_distance_distance(DistT, DataT, AccT, OutT, IdxT) \ + extern template void \ + cuvs::distance::distance( \ + raft::resources const& handle, \ + const DataT* x, \ + const DataT* y, \ + OutT* dist, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + void* workspace, \ + size_t worksize, \ + raft::identity_op fin_op, \ + bool isRowMajor, \ + OutT metric_arg); \ + \ + extern template void cuvs::distance::distance( \ + raft::resources const& handle, \ + const DataT* x, \ + const DataT* y, \ + OutT* dist, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + void* workspace, \ + size_t worksize, \ + bool isRowMajor, \ + OutT metric_arg); \ + \ + extern template void cuvs::distance::distance( \ + raft::resources const& handle, \ + const DataT* x, \ + const DataT* y, \ + OutT* dist, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + bool isRowMajor, \ + OutT metric_arg); \ + \ + extern template void \ + cuvs::distance::distance( \ + raft::resources const& handle, \ + raft::device_matrix_view const x, \ + raft::device_matrix_view const y, \ + raft::device_matrix_view dist, \ + OutT metric_arg); \ + \ + extern template void \ + cuvs::distance::distance( \ + raft::resources const& handle, \ + raft::device_matrix_view const x, \ + raft::device_matrix_view const y, \ + raft::device_matrix_view dist, \ + OutT metric_arg) + +#define instantiate_cuvs_distance_distance_by_algo(DistT) \ + instantiate_cuvs_distance_distance(DistT, float, float, float, int); \ + instantiate_cuvs_distance_distance(DistT, double, double, double, int); \ + instantiate_cuvs_distance_distance(DistT, half, float, float, int) + +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::Canberra); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::CorrelationExpanded); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::CosineExpanded); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::HammingUnexpanded); + +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::HellingerExpanded); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::InnerProduct); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::JensenShannon); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::KLDivergence); + +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::L1); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::L2Expanded); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::L2SqrtExpanded); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::L2SqrtUnexpanded); + +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::L2Unexpanded); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::Linf); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::LpUnexpanded); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::RusselRaoExpanded); + +#undef instantiate_cuvs_distance_distance_by_algo +#undef instantiate_cuvs_distance_distance // The following two instances are used in test/distance/gram.cu. Note the use // of int64_t for the index type. -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2Unexpanded, - float, - float, - float, - cuvs::distance::kernels::detail::rbf_fin_op, - int64_t); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2Unexpanded, - double, - double, - double, - cuvs::distance::kernels::detail::rbf_fin_op, - int64_t); - -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CorrelationExpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CorrelationExpanded, - double, - double, - double, - raft::identity_op, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CosineExpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CosineExpanded, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HammingUnexpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HammingUnexpanded, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HellingerExpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HellingerExpanded, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::InnerProduct, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::InnerProduct, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::JensenShannon, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::JensenShannon, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::KLDivergence, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::KLDivergence, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L1, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L1, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtExpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtExpanded, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtUnexpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtUnexpanded, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Unexpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Unexpanded, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Linf, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Linf, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::LpUnexpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::LpUnexpanded, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::RusselRaoExpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::RusselRaoExpanded, double, double, double, raft::identity_op, int); - -#undef instantiate_raft_distance_distance - -// Same, but without raft::identity_op -#define instantiate_raft_distance_distance(DT, DataT, AccT, OutT, IdxT) \ - extern template void cuvs::distance::distance( \ - raft::resources const& handle, \ - const DataT* x, \ - const DataT* y, \ - OutT* dist, \ - IdxT m, \ - IdxT n, \ - IdxT k, \ - void* workspace, \ - size_t worksize, \ - bool isRowMajor, \ +#define instantiate_cuvs_distance_distance_extra(DistT, DataT, AccT, OutT, FinalLambda, IdxT) \ + extern template void cuvs::distance::distance( \ + raft::resources const& handle, \ + const DataT* x, \ + const DataT* y, \ + OutT* dist, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + void* workspace, \ + size_t worksize, \ + FinalLambda fin_op, \ + bool isRowMajor, \ DataT metric_arg) -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CorrelationExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CorrelationExpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CosineExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CosineExpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HammingUnexpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HammingUnexpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HellingerExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HellingerExpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::InnerProduct, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::InnerProduct, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::JensenShannon, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::JensenShannon, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::KLDivergence, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::KLDivergence, double, double, double, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L1, float, float, float, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L1, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtExpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtUnexpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtUnexpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Unexpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Unexpanded, double, double, double, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::Linf, float, float, float, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::Linf, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::LpUnexpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::LpUnexpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::RusselRaoExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::RusselRaoExpanded, double, double, double, int); - -#undef instantiate_raft_distance_distance - -// Same, but without workspace -#define instantiate_raft_distance_distance(DT, DataT, AccT, OutT, IdxT) \ - extern template void cuvs::distance::distance( \ - raft::resources const& handle, \ - const DataT* x, \ - const DataT* y, \ - OutT* dist, \ - IdxT m, \ - IdxT n, \ - IdxT k, \ - bool isRowMajor, \ - DataT metric_arg) - -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CorrelationExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CorrelationExpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CosineExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CosineExpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HammingUnexpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HammingUnexpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HellingerExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HellingerExpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::InnerProduct, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::InnerProduct, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::JensenShannon, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::JensenShannon, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::KLDivergence, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::KLDivergence, double, double, double, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L1, float, float, float, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L1, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtExpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtUnexpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtUnexpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Unexpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Unexpanded, double, double, double, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::Linf, float, float, float, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::Linf, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::LpUnexpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::LpUnexpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::RusselRaoExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::RusselRaoExpanded, double, double, double, int); - -#undef instantiate_raft_distance_distance - -#define instantiate_raft_distance_getWorkspaceSize(DistT, DataT, AccT, OutT, IdxT) \ - extern template size_t cuvs::distance::getWorkspaceSize( \ - const DataT* x, const DataT* y, IdxT m, IdxT n, IdxT k) - -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::Canberra, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::Canberra, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::CorrelationExpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::CorrelationExpanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::CosineExpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::CosineExpanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::HammingUnexpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::HammingUnexpanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::HellingerExpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::HellingerExpanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::InnerProduct, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::InnerProduct, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::JensenShannon, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::JensenShannon, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::KLDivergence, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::KLDivergence, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L1, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L1, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Expanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Expanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2SqrtExpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2SqrtExpanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2SqrtUnexpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2SqrtUnexpanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Unexpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Unexpanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::Linf, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::Linf, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::LpUnexpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::LpUnexpanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::RusselRaoExpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::RusselRaoExpanded, double, double, double, int); - -#undef instantiate_raft_distance_getWorkspaceSize - -#define instantiate_raft_distance_getWorkspaceSize(DistT, DataT, AccT, OutT, IdxT, layout) \ - extern template size_t cuvs::distance::getWorkspaceSize( \ - raft::device_matrix_view const& x, \ - raft::device_matrix_view const& y) - -// We could consider not taking template parameters for this function. The -// number of instantiations seems a bit excessive.. -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::Canberra, float, float, float, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::Canberra, double, double, double, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::Canberra, float, float, float, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::Canberra, double, double, double, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::CorrelationExpanded, - float, - float, - float, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::CorrelationExpanded, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::CorrelationExpanded, - float, - float, - float, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::CorrelationExpanded, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::CosineExpanded, - float, - float, - float, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::CosineExpanded, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::CosineExpanded, - float, - float, - float, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::CosineExpanded, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::HammingUnexpanded, - float, - float, - float, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::HammingUnexpanded, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::HammingUnexpanded, - float, - float, - float, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::HammingUnexpanded, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::HellingerExpanded, - float, - float, - float, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::HellingerExpanded, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::HellingerExpanded, - float, - float, - float, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::HellingerExpanded, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::InnerProduct, float, float, float, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::InnerProduct, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::InnerProduct, float, float, float, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::InnerProduct, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::JensenShannon, float, float, float, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::JensenShannon, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::JensenShannon, float, float, float, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::JensenShannon, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::KLDivergence, float, float, float, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::KLDivergence, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::KLDivergence, float, float, float, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::KLDivergence, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L1, float, float, float, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L1, double, double, double, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L1, float, float, float, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L1, double, double, double, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Expanded, float, float, float, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Expanded, double, double, double, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Expanded, float, float, float, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Expanded, double, double, double, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2SqrtExpanded, - float, - float, - float, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2SqrtExpanded, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2SqrtExpanded, - float, - float, - float, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2SqrtExpanded, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2SqrtUnexpanded, - float, - float, - float, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2SqrtUnexpanded, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2SqrtUnexpanded, - float, - float, - float, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2SqrtUnexpanded, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Unexpanded, float, float, float, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2Unexpanded, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Unexpanded, float, float, float, int, raft::layout_f_contiguous); - -#undef instantiate_raft_distance_getWorkspaceSize - -#define instantiate_raft_distance_pairwise_distance(DataT, IdxT) \ +instantiate_cuvs_distance_distance_extra(cuvs::distance::DistanceType::L2Unexpanded, + float, + float, + float, + cuvs::distance::kernels::detail::rbf_fin_op, + int64_t); +instantiate_cuvs_distance_distance_extra(cuvs::distance::DistanceType::L2Unexpanded, + double, + double, + double, + cuvs::distance::kernels::detail::rbf_fin_op, + int64_t); + +#undef instantiate_cuvs_distance_distance_extra + +#define instantiate_cuvs_distance_getWorkspaceSize(DistT, DataT, AccT, OutT, IdxT) \ + extern template size_t cuvs::distance::getWorkspaceSize( \ + const DataT* x, const DataT* y, IdxT m, IdxT n, IdxT k); \ + \ + extern template size_t \ + cuvs::distance::getWorkspaceSize( \ + raft::device_matrix_view const& x, \ + raft::device_matrix_view const& y); \ + \ + extern template size_t \ + cuvs::distance::getWorkspaceSize( \ + raft::device_matrix_view const& x, \ + raft::device_matrix_view const& y) + +#define instantiate_cuvs_distance_getWorkspaceSize_by_algo(DistT) \ + instantiate_cuvs_distance_getWorkspaceSize(DistT, float, float, float, int); \ + instantiate_cuvs_distance_getWorkspaceSize(DistT, double, double, double, int); \ + instantiate_cuvs_distance_getWorkspaceSize(DistT, half, float, float, int); \ + instantiate_cuvs_distance_getWorkspaceSize(DistT, float, float, float, int64_t); \ + instantiate_cuvs_distance_getWorkspaceSize(DistT, double, double, double, int64_t); \ + instantiate_cuvs_distance_getWorkspaceSize(DistT, half, float, float, int64_t) + +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::Canberra); +instantiate_cuvs_distance_getWorkspaceSize_by_algo( + cuvs::distance::DistanceType::CorrelationExpanded); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::CosineExpanded); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::HammingUnexpanded); + +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::HellingerExpanded); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::InnerProduct); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::JensenShannon); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::KLDivergence); + +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::L1); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::L2Expanded); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::L2SqrtExpanded); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::L2SqrtUnexpanded); + +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::L2Unexpanded); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::Linf); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::LpUnexpanded); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::RusselRaoExpanded); + +#undef instantiate_cuvs_distance_getWorkspaceSize_by_algo +#undef instantiate_cuvs_distance_getWorkspaceSize + +#define instantiate_cuvs_distance_pairwise_distance(DataT, IdxT, DistT) \ extern template void cuvs::distance::pairwise_distance(raft::resources const& handle, \ const DataT* x, \ const DataT* y, \ - DataT* dist, \ + DistT* dist, \ IdxT m, \ IdxT n, \ IdxT k, \ rmm::device_uvector& workspace, \ cuvs::distance::DistanceType metric, \ bool isRowMajor, \ - DataT metric_arg) + DistT metric_arg) -instantiate_raft_distance_pairwise_distance(float, int); -instantiate_raft_distance_pairwise_distance(double, int); +instantiate_cuvs_distance_pairwise_distance(float, int, float); +instantiate_cuvs_distance_pairwise_distance(double, int, double); +instantiate_cuvs_distance_pairwise_distance(half, int, float); -#undef instantiate_raft_distance_pairwise_distance +#undef instantiate_cuvs_distance_pairwise_distance // Same, but without workspace -#define instantiate_raft_distance_pairwise_distance(DataT, IdxT) \ +#define instantiate_cuvs_distance_pairwise_distance(DataT, IdxT, DistT) \ extern template void cuvs::distance::pairwise_distance(raft::resources const& handle, \ const DataT* x, \ const DataT* y, \ - DataT* dist, \ + DistT* dist, \ IdxT m, \ IdxT n, \ IdxT k, \ cuvs::distance::DistanceType metric, \ bool isRowMajor, \ - DataT metric_arg) - -instantiate_raft_distance_pairwise_distance(float, int); -instantiate_raft_distance_pairwise_distance(double, int); - -#undef instantiate_raft_distance_pairwise_distance - -// Version with mdspan -#define instantiate_raft_distance_distance(DistT, DataT, AccT, OutT, layout, IdxT) \ - extern template void cuvs::distance::distance( \ - raft::resources const& handle, \ - raft::device_matrix_view const x, \ - raft::device_matrix_view const y, \ - raft::device_matrix_view dist, \ - DataT metric_arg) - -// Again, we might want to consider reigning in the number of instantiations... -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, double, double, double, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, double, double, double, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CorrelationExpanded, - float, - float, - float, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CorrelationExpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CorrelationExpanded, - float, - float, - float, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CorrelationExpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CosineExpanded, - float, - float, - float, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CosineExpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CosineExpanded, - float, - float, - float, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CosineExpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::HammingUnexpanded, - float, - float, - float, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::HammingUnexpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::HammingUnexpanded, - float, - float, - float, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::HammingUnexpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::HellingerExpanded, - float, - float, - float, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::HellingerExpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::HellingerExpanded, - float, - float, - float, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::HellingerExpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::InnerProduct, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::InnerProduct, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::InnerProduct, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::InnerProduct, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::JensenShannon, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::JensenShannon, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::JensenShannon, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::JensenShannon, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::KLDivergence, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::KLDivergence, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::KLDivergence, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::KLDivergence, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L1, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L1, double, double, double, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L1, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L1, double, double, double, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, double, double, double, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, double, double, double, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2SqrtExpanded, - float, - float, - float, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2SqrtExpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2SqrtExpanded, - float, - float, - float, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2SqrtExpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2SqrtUnexpanded, - float, - float, - float, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2SqrtUnexpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2SqrtUnexpanded, - float, - float, - float, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2SqrtUnexpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Unexpanded, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2Unexpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Unexpanded, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2Unexpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Linf, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Linf, double, double, double, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Linf, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Linf, double, double, double, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::LpUnexpanded, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::LpUnexpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::LpUnexpanded, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::LpUnexpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::RusselRaoExpanded, - float, - float, - float, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::RusselRaoExpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::RusselRaoExpanded, - float, - float, - float, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::RusselRaoExpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); - -#undef instantiate_raft_distance_distance - -#define instantiate_raft_distance_pairwise_distance(DataT, layout, IdxT) \ - extern template void cuvs::distance::pairwise_distance( \ - raft::resources const& handle, \ - raft::device_matrix_view const x, \ - raft::device_matrix_view const y, \ - raft::device_matrix_view dist, \ - cuvs::distance::DistanceType metric, \ - DataT metric_arg) - -instantiate_raft_distance_pairwise_distance(float, raft::layout_c_contiguous, int); -instantiate_raft_distance_pairwise_distance(float, raft::layout_f_contiguous, int); -instantiate_raft_distance_pairwise_distance(double, raft::layout_c_contiguous, int); -instantiate_raft_distance_pairwise_distance(double, raft::layout_f_contiguous, int); - -#undef instantiate_raft_distance_pairwise_distance + DistT metric_arg) + +instantiate_cuvs_distance_pairwise_distance(float, int, float); +instantiate_cuvs_distance_pairwise_distance(double, int, double); +instantiate_cuvs_distance_pairwise_distance(half, int, float); + +#undef instantiate_cuvs_distance_pairwise_distance + +#define instantiate_cuvs_distance_pairwise_distance(DataT, layout, IdxT, DistT) \ + extern template void cuvs::distance::pairwise_distance( \ + raft::resources const& handle, \ + raft::device_matrix_view const x, \ + raft::device_matrix_view const y, \ + raft::device_matrix_view dist, \ + cuvs::distance::DistanceType metric, \ + DistT metric_arg) + +instantiate_cuvs_distance_pairwise_distance(float, raft::layout_c_contiguous, int, float); +instantiate_cuvs_distance_pairwise_distance(float, raft::layout_f_contiguous, int, float); +instantiate_cuvs_distance_pairwise_distance(double, raft::layout_c_contiguous, int, double); +instantiate_cuvs_distance_pairwise_distance(double, raft::layout_f_contiguous, int, double); +instantiate_cuvs_distance_pairwise_distance(half, raft::layout_c_contiguous, int, float); +instantiate_cuvs_distance_pairwise_distance(half, raft::layout_f_contiguous, int, float); + +#undef instantiate_cuvs_distance_pairwise_distance diff --git a/cpp/src/distance/distance-inl.cuh b/cpp/src/distance/distance-inl.cuh index 6236901c3d..e047d31448 100644 --- a/cpp/src/distance/distance-inl.cuh +++ b/cpp/src/distance/distance-inl.cuh @@ -75,8 +75,8 @@ void distance(raft::resources const& handle, void* workspace, size_t worksize, FinalLambda fin_op, - bool isRowMajor = true, - DataT metric_arg = 2.0f) + bool isRowMajor = true, + OutT metric_arg = 2.0f) { detail::distance( handle, x, y, dist, m, n, k, workspace, worksize, fin_op, isRowMajor, metric_arg); @@ -115,8 +115,8 @@ void distance(raft::resources const& handle, IdxT k, void* workspace, size_t worksize, - bool isRowMajor = true, - DataT metric_arg = 2.0f) + bool isRowMajor = true, + OutT metric_arg = 2.0f) { detail::distance( handle, x, y, dist, m, n, k, workspace, worksize, isRowMajor, metric_arg); @@ -206,8 +206,8 @@ void distance(raft::resources const& handle, IdxT m, IdxT n, IdxT k, - bool isRowMajor = true, - DataT metric_arg = 2.0f) + bool isRowMajor = true, + OutT metric_arg = 2.0f) { auto stream = raft::resource::get_cuda_stream(handle); rmm::device_uvector workspace(0, stream); @@ -222,6 +222,7 @@ void distance(raft::resources const& handle, * into compile time for the purpose of dispatch * @tparam Type input/accumulation/output data-type * @tparam IdxT indexing type + * @tparam DistT output type, equal to Type by default * @param handle raft handle for managing expensive resources * @param x first set of points * @param y second set of points @@ -235,25 +236,25 @@ void distance(raft::resources const& handle, * @param isRowMajor whether the matrices are row-major or col-major * @param metric_arg metric argument (used for Minkowski distance) */ -template +template void pairwise_distance(raft::resources const& handle, const Type* x, const Type* y, - Type* dist, + DistT* dist, IdxT m, IdxT n, IdxT k, rmm::device_uvector& workspace, cuvs::distance::DistanceType metric, - bool isRowMajor = true, - Type metric_arg = 2.0f) + bool isRowMajor = true, + DistT metric_arg = 2.0f) { cudaStream_t stream = raft::resource::get_cuda_stream(handle); auto dispatch = [&](auto distance_type) { - auto worksize = getWorkspaceSize(x, y, m, n, k); + auto worksize = getWorkspaceSize(x, y, m, n, k); workspace.resize(worksize, stream); - detail::distance( + detail::distance( handle, x, y, dist, m, n, k, workspace.data(), worksize, isRowMajor, metric_arg); }; @@ -315,6 +316,7 @@ void pairwise_distance(raft::resources const& handle, * into compile time for the purpose of dispatch * @tparam Type input/accumulation/output data-type * @tparam IdxT indexing type + * @tparam DistT output type, equal to Type by default * @param handle raft handle for managing expensive resources * @param x first set of points * @param y second set of points @@ -326,21 +328,21 @@ void pairwise_distance(raft::resources const& handle, * @param isRowMajor whether the matrices are row-major or col-major * @param metric_arg metric argument (used for Minkowski distance) */ -template +template void pairwise_distance(raft::resources const& handle, const Type* x, const Type* y, - Type* dist, + DistT* dist, IdxT m, IdxT n, IdxT k, cuvs::distance::DistanceType metric, - bool isRowMajor = true, - Type metric_arg = 2.0f) + bool isRowMajor = true, + DistT metric_arg = 2.0f) { auto stream = raft::resource::get_cuda_stream(handle); rmm::device_uvector workspace(0, stream); - pairwise_distance( + pairwise_distance( handle, x, y, dist, m, n, k, workspace, metric, isRowMajor, metric_arg); } @@ -397,7 +399,7 @@ void distance(raft::resources const& handle, raft::device_matrix_view const x, raft::device_matrix_view const y, raft::device_matrix_view dist, - DataT metric_arg = 2.0f) + OutT metric_arg = 2.0f) { RAFT_EXPECTS(x.extent(1) == y.extent(1), "Number of columns must be equal."); RAFT_EXPECTS(dist.extent(0) == x.extent(0), @@ -428,6 +430,7 @@ void distance(raft::resources const& handle, * into compile time for the purpose of dispatch * @tparam Type input/accumulation/output data-type * @tparam IdxT indexing type + * @tparam DistT output type, equal to Type by default * @param handle raft handle for managing expensive resources * @param x first matrix of points (size mxk) * @param y second matrix of points (size nxk) @@ -435,13 +438,16 @@ void distance(raft::resources const& handle, * @param metric distance metric * @param metric_arg metric argument (used for Minkowski distance) */ -template +template void pairwise_distance(raft::resources const& handle, raft::device_matrix_view const x, raft::device_matrix_view const y, - raft::device_matrix_view dist, + raft::device_matrix_view dist, cuvs::distance::DistanceType metric, - Type metric_arg = 2.0f) + DistT metric_arg = DistT(2.0f)) { RAFT_EXPECTS(x.extent(1) == y.extent(1), "Number of columns must be equal."); RAFT_EXPECTS(dist.extent(0) == x.extent(0), diff --git a/cpp/src/distance/distance.cu b/cpp/src/distance/distance.cu index 02c071d13c..72be93f10a 100644 --- a/cpp/src/distance/distance.cu +++ b/cpp/src/distance/distance.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,910 +25,219 @@ * kernels is handled in distance/detail/pairwise_matrix/dispatch_*.cu. * */ - -#define instantiate_raft_distance_distance(DT, DataT, AccT, OutT, FinalLambda, IdxT) \ - template void cuvs::distance::distance( \ - raft::resources const& handle, \ - const DataT* x, \ - const DataT* y, \ - OutT* dist, \ - IdxT m, \ - IdxT n, \ - IdxT k, \ - void* workspace, \ - size_t worksize, \ - FinalLambda fin_op, \ - bool isRowMajor, \ - DataT metric_arg) +#define instantiate_cuvs_distance_distance(DistT, DataT, AccT, OutT, IdxT) \ + template void cuvs::distance::distance( \ + raft::resources const& handle, \ + const DataT* x, \ + const DataT* y, \ + OutT* dist, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + void* workspace, \ + size_t worksize, \ + raft::identity_op fin_op, \ + bool isRowMajor, \ + OutT metric_arg); \ + \ + template void cuvs::distance::distance( \ + raft::resources const& handle, \ + const DataT* x, \ + const DataT* y, \ + OutT* dist, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + void* workspace, \ + size_t worksize, \ + bool isRowMajor, \ + OutT metric_arg); \ + \ + template void cuvs::distance::distance( \ + raft::resources const& handle, \ + const DataT* x, \ + const DataT* y, \ + OutT* dist, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + bool isRowMajor, \ + OutT metric_arg); \ + \ + template void \ + cuvs::distance::distance( \ + raft::resources const& handle, \ + raft::device_matrix_view const x, \ + raft::device_matrix_view const y, \ + raft::device_matrix_view dist, \ + OutT metric_arg); \ + \ + template void \ + cuvs::distance::distance( \ + raft::resources const& handle, \ + raft::device_matrix_view const x, \ + raft::device_matrix_view const y, \ + raft::device_matrix_view dist, \ + OutT metric_arg) + +#define instantiate_cuvs_distance_distance_by_algo(DistT) \ + instantiate_cuvs_distance_distance(DistT, float, float, float, int); \ + instantiate_cuvs_distance_distance(DistT, double, double, double, int); \ + instantiate_cuvs_distance_distance(DistT, half, float, float, int) + +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::Canberra); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::CorrelationExpanded); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::CosineExpanded); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::HammingUnexpanded); + +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::HellingerExpanded); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::InnerProduct); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::JensenShannon); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::KLDivergence); + +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::L1); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::L2Expanded); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::L2SqrtExpanded); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::L2SqrtUnexpanded); + +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::L2Unexpanded); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::Linf); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::LpUnexpanded); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::RusselRaoExpanded); + +#undef instantiate_cuvs_distance_distance_by_algo +#undef instantiate_cuvs_distance_distance // The following two instances are used in test/distance/gram.cu. Note the use // of int64_t for the index type. -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2Unexpanded, - float, - float, - float, - cuvs::distance::kernels::detail::rbf_fin_op, - int64_t); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2Unexpanded, - double, - double, - double, - cuvs::distance::kernels::detail::rbf_fin_op, - int64_t); - -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CorrelationExpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CorrelationExpanded, - double, - double, - double, - raft::identity_op, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CosineExpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CosineExpanded, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HammingUnexpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HammingUnexpanded, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HellingerExpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HellingerExpanded, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::InnerProduct, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::InnerProduct, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::JensenShannon, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::JensenShannon, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::KLDivergence, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::KLDivergence, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L1, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L1, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtExpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtExpanded, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtUnexpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtUnexpanded, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Unexpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Unexpanded, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Linf, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Linf, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::LpUnexpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::LpUnexpanded, double, double, double, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::RusselRaoExpanded, float, float, float, raft::identity_op, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::RusselRaoExpanded, double, double, double, raft::identity_op, int); - -#undef instantiate_raft_distance_distance - -// Same, but without raft::identity_op -#define instantiate_raft_distance_distance(DT, DataT, AccT, OutT, IdxT) \ - template void cuvs::distance::distance( \ - raft::resources const& handle, \ - const DataT* x, \ - const DataT* y, \ - OutT* dist, \ - IdxT m, \ - IdxT n, \ - IdxT k, \ - void* workspace, \ - size_t worksize, \ - bool isRowMajor, \ - DataT metric_arg) - -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CorrelationExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CorrelationExpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CosineExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CosineExpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HammingUnexpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HammingUnexpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HellingerExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HellingerExpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::InnerProduct, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::InnerProduct, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::JensenShannon, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::JensenShannon, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::KLDivergence, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::KLDivergence, double, double, double, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L1, float, float, float, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L1, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtExpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtUnexpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtUnexpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Unexpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Unexpanded, double, double, double, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::Linf, float, float, float, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::Linf, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::LpUnexpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::LpUnexpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::RusselRaoExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::RusselRaoExpanded, double, double, double, int); - -#undef instantiate_raft_distance_distance - -// Same, but without workspace -#define instantiate_raft_distance_distance(DT, DataT, AccT, OutT, IdxT) \ - template void cuvs::distance::distance( \ - raft::resources const& handle, \ - const DataT* x, \ - const DataT* y, \ - OutT* dist, \ - IdxT m, \ - IdxT n, \ - IdxT k, \ - bool isRowMajor, \ +#define instantiate_cuvs_distance_distance_extra(DistT, DataT, AccT, OutT, FinalLambda, IdxT) \ + template void cuvs::distance::distance( \ + raft::resources const& handle, \ + const DataT* x, \ + const DataT* y, \ + OutT* dist, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + void* workspace, \ + size_t worksize, \ + FinalLambda fin_op, \ + bool isRowMajor, \ DataT metric_arg) -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CorrelationExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CorrelationExpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CosineExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::CosineExpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HammingUnexpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HammingUnexpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HellingerExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::HellingerExpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::InnerProduct, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::InnerProduct, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::JensenShannon, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::JensenShannon, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::KLDivergence, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::KLDivergence, double, double, double, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L1, float, float, float, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L1, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtExpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtUnexpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2SqrtUnexpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Unexpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Unexpanded, double, double, double, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::Linf, float, float, float, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::Linf, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::LpUnexpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::LpUnexpanded, double, double, double, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::RusselRaoExpanded, float, float, float, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::RusselRaoExpanded, double, double, double, int); - -#undef instantiate_raft_distance_distance - -#define instantiate_raft_distance_getWorkspaceSize(DistT, DataT, AccT, OutT, IdxT) \ - template size_t cuvs::distance::getWorkspaceSize( \ - const DataT* x, const DataT* y, IdxT m, IdxT n, IdxT k) - -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::Canberra, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::Canberra, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::CorrelationExpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::CorrelationExpanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::CosineExpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::CosineExpanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::HammingUnexpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::HammingUnexpanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::HellingerExpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::HellingerExpanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::InnerProduct, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::InnerProduct, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::JensenShannon, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::JensenShannon, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::KLDivergence, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::KLDivergence, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L1, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L1, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Expanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Expanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2SqrtExpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2SqrtExpanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2SqrtUnexpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2SqrtUnexpanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Unexpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Unexpanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::Linf, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::Linf, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::LpUnexpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::LpUnexpanded, double, double, double, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::RusselRaoExpanded, float, float, float, int); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::RusselRaoExpanded, double, double, double, int); - -#undef instantiate_raft_distance_getWorkspaceSize - -#define instantiate_raft_distance_getWorkspaceSize(DistT, DataT, AccT, OutT, IdxT, layout) \ - template size_t cuvs::distance::getWorkspaceSize( \ - raft::device_matrix_view const& x, \ - raft::device_matrix_view const& y) - -// We could consider not taking template parameters for this function. The -// number of instantiations seems a bit excessive.. -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::Canberra, float, float, float, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::Canberra, double, double, double, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::Canberra, float, float, float, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::Canberra, double, double, double, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::CorrelationExpanded, - float, - float, - float, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::CorrelationExpanded, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::CorrelationExpanded, - float, - float, - float, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::CorrelationExpanded, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::CosineExpanded, - float, - float, - float, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::CosineExpanded, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::CosineExpanded, - float, - float, - float, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::CosineExpanded, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::HammingUnexpanded, - float, - float, - float, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::HammingUnexpanded, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::HammingUnexpanded, - float, - float, - float, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::HammingUnexpanded, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::HellingerExpanded, - float, - float, - float, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::HellingerExpanded, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::HellingerExpanded, - float, - float, - float, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::HellingerExpanded, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::InnerProduct, float, float, float, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::InnerProduct, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::InnerProduct, float, float, float, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::InnerProduct, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::JensenShannon, float, float, float, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::JensenShannon, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::JensenShannon, float, float, float, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::JensenShannon, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::KLDivergence, float, float, float, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::KLDivergence, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::KLDivergence, float, float, float, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::KLDivergence, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L1, float, float, float, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L1, double, double, double, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L1, float, float, float, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L1, double, double, double, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Expanded, float, float, float, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Expanded, double, double, double, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Expanded, float, float, float, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Expanded, double, double, double, int, raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2SqrtExpanded, - float, - float, - float, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2SqrtExpanded, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2SqrtExpanded, - float, - float, - float, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2SqrtExpanded, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2SqrtUnexpanded, - float, - float, - float, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2SqrtUnexpanded, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2SqrtUnexpanded, - float, - float, - float, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2SqrtUnexpanded, - double, - double, - double, - int, - raft::layout_f_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Unexpanded, float, float, float, int, raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize(cuvs::distance::DistanceType::L2Unexpanded, - double, - double, - double, - int, - raft::layout_c_contiguous); -instantiate_raft_distance_getWorkspaceSize( - cuvs::distance::DistanceType::L2Unexpanded, float, float, float, int, raft::layout_f_contiguous); - -#undef instantiate_raft_distance_getWorkspaceSize - -#define instantiate_raft_distance_pairwise_distance(DataT, IdxT) \ +instantiate_cuvs_distance_distance_extra(cuvs::distance::DistanceType::L2Unexpanded, + float, + float, + float, + cuvs::distance::kernels::detail::rbf_fin_op, + int64_t); +instantiate_cuvs_distance_distance_extra(cuvs::distance::DistanceType::L2Unexpanded, + double, + double, + double, + cuvs::distance::kernels::detail::rbf_fin_op, + int64_t); + +#undef instantiate_cuvs_distance_distance_extra + +#define instantiate_cuvs_distance_getWorkspaceSize(DistT, DataT, AccT, OutT, IdxT) \ + template size_t cuvs::distance::getWorkspaceSize( \ + const DataT* x, const DataT* y, IdxT m, IdxT n, IdxT k); \ + \ + template size_t \ + cuvs::distance::getWorkspaceSize( \ + raft::device_matrix_view const& x, \ + raft::device_matrix_view const& y); \ + \ + template size_t \ + cuvs::distance::getWorkspaceSize( \ + raft::device_matrix_view const& x, \ + raft::device_matrix_view const& y) + +#define instantiate_cuvs_distance_getWorkspaceSize_by_algo(DistT) \ + instantiate_cuvs_distance_getWorkspaceSize(DistT, float, float, float, int); \ + instantiate_cuvs_distance_getWorkspaceSize(DistT, double, double, double, int); \ + instantiate_cuvs_distance_getWorkspaceSize(DistT, half, float, float, int); \ + instantiate_cuvs_distance_getWorkspaceSize(DistT, float, float, float, int64_t); \ + instantiate_cuvs_distance_getWorkspaceSize(DistT, double, double, double, int64_t); \ + instantiate_cuvs_distance_getWorkspaceSize(DistT, half, float, float, int64_t) + +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::Canberra); +instantiate_cuvs_distance_getWorkspaceSize_by_algo( + cuvs::distance::DistanceType::CorrelationExpanded); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::CosineExpanded); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::HammingUnexpanded); + +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::HellingerExpanded); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::InnerProduct); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::JensenShannon); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::KLDivergence); + +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::L1); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::L2Expanded); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::L2SqrtExpanded); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::L2SqrtUnexpanded); + +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::L2Unexpanded); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::Linf); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::LpUnexpanded); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::RusselRaoExpanded); + +#undef instantiate_cuvs_distance_getWorkspaceSize_by_algo +#undef instantiate_cuvs_distance_getWorkspaceSize + +#define instantiate_cuvs_distance_pairwise_distance(DataT, IdxT, DistT) \ template void cuvs::distance::pairwise_distance(raft::resources const& handle, \ const DataT* x, \ const DataT* y, \ - DataT* dist, \ + DistT* dist, \ IdxT m, \ IdxT n, \ IdxT k, \ rmm::device_uvector& workspace, \ cuvs::distance::DistanceType metric, \ bool isRowMajor, \ - DataT metric_arg) + DistT metric_arg) -instantiate_raft_distance_pairwise_distance(float, int); -instantiate_raft_distance_pairwise_distance(double, int); +instantiate_cuvs_distance_pairwise_distance(float, int, float); +instantiate_cuvs_distance_pairwise_distance(double, int, double); +instantiate_cuvs_distance_pairwise_distance(half, int, float); -#undef instantiate_raft_distance_pairwise_distance +#undef instantiate_cuvs_distance_pairwise_distance // Same, but without workspace -#define instantiate_raft_distance_pairwise_distance(DataT, IdxT) \ +#define instantiate_cuvs_distance_pairwise_distance(DataT, IdxT, DistT) \ template void cuvs::distance::pairwise_distance(raft::resources const& handle, \ const DataT* x, \ const DataT* y, \ - DataT* dist, \ + DistT* dist, \ IdxT m, \ IdxT n, \ IdxT k, \ cuvs::distance::DistanceType metric, \ bool isRowMajor, \ - DataT metric_arg) - -instantiate_raft_distance_pairwise_distance(float, int); -instantiate_raft_distance_pairwise_distance(double, int); - -#undef instantiate_raft_distance_pairwise_distance - -// Version with mdspan -#define instantiate_raft_distance_distance(DistT, DataT, AccT, OutT, layout, IdxT) \ - template void cuvs::distance::distance( \ - raft::resources const& handle, \ - raft::device_matrix_view const x, \ - raft::device_matrix_view const y, \ - raft::device_matrix_view dist, \ - DataT metric_arg) - -// Again, we might want to consider reigning in the number of instantiations... -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, double, double, double, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Canberra, double, double, double, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CorrelationExpanded, - float, - float, - float, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CorrelationExpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CorrelationExpanded, - float, - float, - float, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CorrelationExpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CosineExpanded, - float, - float, - float, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CosineExpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CosineExpanded, - float, - float, - float, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::CosineExpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::HammingUnexpanded, - float, - float, - float, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::HammingUnexpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::HammingUnexpanded, - float, - float, - float, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::HammingUnexpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::HellingerExpanded, - float, - float, - float, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::HellingerExpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::HellingerExpanded, - float, - float, - float, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::HellingerExpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::InnerProduct, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::InnerProduct, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::InnerProduct, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::InnerProduct, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::JensenShannon, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::JensenShannon, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::JensenShannon, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::JensenShannon, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::KLDivergence, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::KLDivergence, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::KLDivergence, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::KLDivergence, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L1, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L1, double, double, double, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L1, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L1, double, double, double, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, double, double, double, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Expanded, double, double, double, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2SqrtExpanded, - float, - float, - float, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2SqrtExpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2SqrtExpanded, - float, - float, - float, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2SqrtExpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2SqrtUnexpanded, - float, - float, - float, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2SqrtUnexpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2SqrtUnexpanded, - float, - float, - float, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2SqrtUnexpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Unexpanded, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2Unexpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::L2Unexpanded, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::L2Unexpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Linf, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Linf, double, double, double, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Linf, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::Linf, double, double, double, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::LpUnexpanded, float, float, float, raft::layout_c_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::LpUnexpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance( - cuvs::distance::DistanceType::LpUnexpanded, float, float, float, raft::layout_f_contiguous, int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::LpUnexpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::RusselRaoExpanded, - float, - float, - float, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::RusselRaoExpanded, - double, - double, - double, - raft::layout_c_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::RusselRaoExpanded, - float, - float, - float, - raft::layout_f_contiguous, - int); -instantiate_raft_distance_distance(cuvs::distance::DistanceType::RusselRaoExpanded, - double, - double, - double, - raft::layout_f_contiguous, - int); - -#undef instantiate_raft_distance_distance - -#define instantiate_raft_distance_pairwise_distance(DataT, layout, IdxT) \ - template void cuvs::distance::pairwise_distance( \ - raft::resources const& handle, \ - raft::device_matrix_view const x, \ - raft::device_matrix_view const y, \ - raft::device_matrix_view dist, \ - cuvs::distance::DistanceType metric, \ - DataT metric_arg) - -instantiate_raft_distance_pairwise_distance(float, raft::layout_c_contiguous, int); -instantiate_raft_distance_pairwise_distance(float, raft::layout_f_contiguous, int); -instantiate_raft_distance_pairwise_distance(double, raft::layout_c_contiguous, int); -instantiate_raft_distance_pairwise_distance(double, raft::layout_f_contiguous, int); - -#undef instantiate_raft_distance_pairwise_distance + DistT metric_arg) + +instantiate_cuvs_distance_pairwise_distance(float, int, float); +instantiate_cuvs_distance_pairwise_distance(double, int, double); +instantiate_cuvs_distance_pairwise_distance(half, int, float); + +#undef instantiate_cuvs_distance_pairwise_distance + +#define instantiate_cuvs_distance_pairwise_distance(DataT, layout, IdxT, DistT) \ + template void cuvs::distance::pairwise_distance( \ + raft::resources const& handle, \ + raft::device_matrix_view const x, \ + raft::device_matrix_view const y, \ + raft::device_matrix_view dist, \ + cuvs::distance::DistanceType metric, \ + DistT metric_arg) + +instantiate_cuvs_distance_pairwise_distance(float, raft::layout_c_contiguous, int, float); +instantiate_cuvs_distance_pairwise_distance(float, raft::layout_f_contiguous, int, float); +instantiate_cuvs_distance_pairwise_distance(double, raft::layout_c_contiguous, int, double); +instantiate_cuvs_distance_pairwise_distance(double, raft::layout_f_contiguous, int, double); +instantiate_cuvs_distance_pairwise_distance(half, raft::layout_c_contiguous, int, float); +instantiate_cuvs_distance_pairwise_distance(half, raft::layout_f_contiguous, int, float); + +#undef instantiate_cuvs_distance_pairwise_distance diff --git a/cpp/src/distance/distance.cuh b/cpp/src/distance/distance.cuh index b5bfc07cb2..d1bfc8212e 100644 --- a/cpp/src/distance/distance.cuh +++ b/cpp/src/distance/distance.cuh @@ -19,6 +19,4 @@ #include "distance-inl.cuh" #endif -#ifdef RAFT_COMPILED #include "distance-ext.cuh" -#endif diff --git a/cpp/src/distance/pairwise_distance.cu b/cpp/src/distance/pairwise_distance.cu index 10f096b966..a802ce91c6 100644 --- a/cpp/src/distance/pairwise_distance.cu +++ b/cpp/src/distance/pairwise_distance.cu @@ -61,6 +61,24 @@ void pairwise_distance( handle, x_v, y_v, d_v, metric, metric_arg); } +void pairwise_distance( + raft::resources const& handle, + raft::device_matrix_view const x, + raft::device_matrix_view const y, + raft::device_matrix_view dist, + cuvs::distance::DistanceType metric, + float metric_arg) +{ + auto x_v = raft::make_device_matrix_view( + x.data_handle(), x.extent(0), x.extent(1)); + auto y_v = raft::make_device_matrix_view( + y.data_handle(), y.extent(0), y.extent(1)); + auto d_v = raft::make_device_matrix_view( + dist.data_handle(), dist.extent(0), dist.extent(1)); + pairwise_distance( + handle, x_v, y_v, d_v, metric, metric_arg); +} + void pairwise_distance( raft::resources const& handle, raft::device_matrix_view const x, @@ -97,6 +115,24 @@ void pairwise_distance( handle, x_v, y_v, d_v, metric, metric_arg); } +void pairwise_distance( + raft::resources const& handle, + raft::device_matrix_view const x, + raft::device_matrix_view const y, + raft::device_matrix_view dist, + cuvs::distance::DistanceType metric, + float metric_arg) +{ + auto x_v = raft::make_device_matrix_view( + x.data_handle(), x.extent(0), x.extent(1)); + auto y_v = raft::make_device_matrix_view( + y.data_handle(), y.extent(0), y.extent(1)); + auto d_v = raft::make_device_matrix_view( + dist.data_handle(), dist.extent(0), dist.extent(1)); + pairwise_distance( + handle, x_v, y_v, d_v, metric, metric_arg); +} + /** @} */ // end group pairwise_distance_runtime } // namespace cuvs::distance diff --git a/cpp/src/neighbors/brute_force.cu b/cpp/src/neighbors/brute_force.cu index ce21e2d395..c76feb0159 100644 --- a/cpp/src/neighbors/brute_force.cu +++ b/cpp/src/neighbors/brute_force.cu @@ -21,12 +21,12 @@ #include namespace cuvs::neighbors::brute_force { -template -index::index(raft::resources const& res, - raft::host_matrix_view dataset, - std::optional>&& norms, - cuvs::distance::DistanceType metric, - T metric_arg) +template +index::index(raft::resources const& res, + raft::host_matrix_view dataset, + std::optional>&& norms, + cuvs::distance::DistanceType metric, + DistT metric_arg) : cuvs::neighbors::index(), metric_(metric), dataset_(raft::make_device_matrix(res, 0, 0)), @@ -38,12 +38,12 @@ index::index(raft::resources const& res, raft::resource::sync_stream(res); } -template -index::index(raft::resources const& res, - raft::device_matrix_view dataset, - std::optional>&& norms, - cuvs::distance::DistanceType metric, - T metric_arg) +template +index::index(raft::resources const& res, + raft::device_matrix_view dataset, + std::optional>&& norms, + cuvs::distance::DistanceType metric, + DistT metric_arg) : cuvs::neighbors::index(), metric_(metric), dataset_(raft::make_device_matrix(res, 0, 0)), @@ -54,12 +54,12 @@ index::index(raft::resources const& res, update_dataset(res, dataset); } -template -index::index(raft::resources const& res, - raft::device_matrix_view dataset_view, - std::optional> norms_view, - cuvs::distance::DistanceType metric, - T metric_arg) +template +index::index(raft::resources const& res, + raft::device_matrix_view dataset_view, + std::optional> norms_view, + cuvs::distance::DistanceType metric, + DistT metric_arg) : cuvs::neighbors::index(), metric_(metric), dataset_(raft::make_device_matrix(res, 0, 0)), @@ -69,12 +69,12 @@ index::index(raft::resources const& res, { } -template -index::index(raft::resources const& res, - raft::device_matrix_view dataset_view, - std::optional>&& norms, - cuvs::distance::DistanceType metric, - T metric_arg) +template +index::index(raft::resources const& res, + raft::device_matrix_view dataset_view, + std::optional>&& norms, + cuvs::distance::DistanceType metric, + DistT metric_arg) : cuvs::neighbors::index(), metric_(metric), dataset_( @@ -99,12 +99,12 @@ index::index(raft::resources const& res, dataset_view_ = raft::make_const_mdspan(dataset_.view()); } -template -index::index(raft::resources const& res, - raft::device_matrix_view dataset_view, - std::optional> norms_view, - cuvs::distance::DistanceType metric, - T metric_arg) +template +index::index(raft::resources const& res, + raft::device_matrix_view dataset_view, + std::optional> norms_view, + cuvs::distance::DistanceType metric, + DistT metric_arg) : cuvs::neighbors::index(), metric_(metric), dataset_( @@ -129,73 +129,74 @@ index::index(raft::resources const& res, dataset_view_ = raft::make_const_mdspan(dataset_.view()); } -template -void index::update_dataset(raft::resources const& res, - raft::device_matrix_view dataset) +template +void index::update_dataset( + raft::resources const& res, raft::device_matrix_view dataset) { dataset_view_ = dataset; } -template -void index::update_dataset(raft::resources const& res, - raft::host_matrix_view dataset) +template +void index::update_dataset( + raft::resources const& res, raft::host_matrix_view dataset) { dataset_ = raft::make_device_matrix(res, dataset.extent(0), dataset.extent(1)); raft::copy(res, dataset_.view(), dataset); dataset_view_ = raft::make_const_mdspan(dataset_.view()); } -#define CUVS_INST_BFKNN(T) \ +#define CUVS_INST_BFKNN(T, DistT) \ auto build(raft::resources const& res, \ raft::device_matrix_view dataset, \ cuvs::distance::DistanceType metric, \ - T metric_arg) \ - ->cuvs::neighbors::brute_force::index \ + DistT metric_arg) \ + ->cuvs::neighbors::brute_force::index \ { \ - return detail::build(res, dataset, metric, metric_arg); \ + return detail::build(res, dataset, metric, metric_arg); \ } \ auto build(raft::resources const& res, \ raft::device_matrix_view dataset, \ cuvs::distance::DistanceType metric, \ - T metric_arg) \ - ->cuvs::neighbors::brute_force::index \ + DistT metric_arg) \ + ->cuvs::neighbors::brute_force::index \ { \ - return detail::build(res, dataset, metric, metric_arg); \ + return detail::build(res, dataset, metric, metric_arg); \ } \ \ void search( \ raft::resources const& res, \ - const cuvs::neighbors::brute_force::index& idx, \ + const cuvs::neighbors::brute_force::index& idx, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ - raft::device_matrix_view distances, \ + raft::device_matrix_view distances, \ std::optional> sample_filter = std::nullopt) \ { \ if (!sample_filter.has_value()) { \ - detail::brute_force_search(res, idx, queries, neighbors, distances); \ + detail::brute_force_search(res, idx, queries, neighbors, distances); \ } else { \ - detail::brute_force_search_filtered( \ + detail::brute_force_search_filtered( \ res, idx, queries, *sample_filter, neighbors, distances); \ } \ } \ void search( \ raft::resources const& res, \ - const cuvs::neighbors::brute_force::index& idx, \ + const cuvs::neighbors::brute_force::index& idx, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ - raft::device_matrix_view distances, \ + raft::device_matrix_view distances, \ std::optional> sample_filter = std::nullopt) \ { \ if (!sample_filter.has_value()) { \ - detail::brute_force_search(res, idx, queries, neighbors, distances); \ + detail::brute_force_search(res, idx, queries, neighbors, distances); \ } else { \ RAFT_FAIL("filtered search isn't available with col_major queries yet"); \ } \ } \ \ - template struct cuvs::neighbors::brute_force::index; + template struct cuvs::neighbors::brute_force::index; -CUVS_INST_BFKNN(float); +CUVS_INST_BFKNN(float, float); +CUVS_INST_BFKNN(half, float); #undef CUVS_INST_BFKNN diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp index 868b3dec02..164448f2c9 100644 --- a/cpp/src/neighbors/cagra_c.cpp +++ b/cpp/src/neighbors/cagra_c.cpp @@ -130,6 +130,14 @@ void _serialize(cuvsResources_t res, cuvs::neighbors::cagra::serialize(*res_ptr, std::string(filename), *index_ptr, include_dataset); } +template +void _serialize_to_hnswlib(cuvsResources_t res, const char* filename, cuvsCagraIndex_t index) +{ + auto res_ptr = reinterpret_cast(res); + auto index_ptr = reinterpret_cast*>(index->addr); + cuvs::neighbors::cagra::serialize_to_hnswlib(*res_ptr, std::string(filename), *index_ptr); +} + template void* _deserialize(cuvsResources_t res, const char* filename) { @@ -326,3 +334,20 @@ extern "C" cuvsError_t cuvsCagraSerialize(cuvsResources_t res, } }); } + +extern "C" cuvsError_t cuvsCagraSerializeToHnswlib(cuvsResources_t res, + const char* filename, + cuvsCagraIndex_t index) +{ + return cuvs::core::translate_exceptions([=] { + if (index->dtype.code == kDLFloat && index->dtype.bits == 32) { + _serialize_to_hnswlib(res, filename, index); + } else if (index->dtype.code == kDLInt && index->dtype.bits == 8) { + _serialize_to_hnswlib(res, filename, index); + } else if (index->dtype.code == kDLUInt && index->dtype.bits == 8) { + _serialize_to_hnswlib(res, filename, index); + } else { + RAFT_FAIL("Unsupported index dtype: %d and bits: %d", index->dtype.code, index->dtype.bits); + } + }); +} diff --git a/cpp/src/neighbors/cagra_optimize.cu b/cpp/src/neighbors/cagra_optimize.cu index 4c152cf121..cba66a5e9b 100644 --- a/cpp/src/neighbors/cagra_optimize.cu +++ b/cpp/src/neighbors/cagra_optimize.cu @@ -23,13 +23,19 @@ void optimize(raft::resources const& handle, raft::device_matrix_view knn_graph, raft::host_matrix_view new_graph) { - cuvs::neighbors::cagra::optimize(handle, knn_graph, new_graph); + cuvs::neighbors::cagra::optimize< + uint32_t, + raft::host_device_accessor, + raft::memory_type::device>>(handle, knn_graph, new_graph); } void optimize(raft::resources const& handle, raft::host_matrix_view knn_graph, raft::host_matrix_view new_graph) { - cuvs::neighbors::cagra::optimize(handle, knn_graph, new_graph); + cuvs::neighbors::cagra::optimize< + uint32_t, + raft::host_device_accessor, + raft::memory_type::host>>(handle, knn_graph, new_graph); } } // namespace cuvs::neighbors::cagra \ No newline at end of file diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh index b00d6617cd..e907568f5e 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh @@ -24,7 +24,7 @@ namespace cuvs::neighbors::cagra::detail { namespace multi_cta_search { -#ifdef CUVS_EXPLICIT_INSTANTIATE_ONLY +#ifdef _CUVS_EXPLICIT_INSTANTIATE_ONLY template ((size_t)8) * 1024 * 1024 * 1024) { + targetUsage = 1024 * 1024 * 1024; + } else if (totalMem > ((size_t)4) * 1024 * 1024 * 1024) { + targetUsage = 768 * 1024 * 1024; + } - // 512 seems to be a batch size sweetspot for float32. - // If we are on float16, increase to 512. - // If the k size (vec dim) of the matrix multiplication is small (<= 32), - // increase to 1024. - size_t preferredTileRows = 512; - if (dim <= 32) { preferredTileRows = 1024; } - - tileRows = std::min(preferredTileRows, numQueries); - - // tileCols is the remainder size - tileCols = std::min(targetUsage / preferredTileRows, numCentroids); + tileCols = std::min(targetUsage / (2 * elementSize * tileRows), numCentroids); + } } } // namespace cuvs::neighbors::detail::faiss_select diff --git a/cpp/src/neighbors/detail/fused_l2_knn.cuh b/cpp/src/neighbors/detail/fused_l2_knn.cuh index 13ea4d4189..be904673ea 100644 --- a/cpp/src/neighbors/detail/fused_l2_knn.cuh +++ b/cpp/src/neighbors/detail/fused_l2_knn.cuh @@ -196,8 +196,8 @@ template __launch_bounds__(Policy::Nthreads, 2) RAFT_KERNEL fusedL2kNN(const DataT* x, const DataT* y, - const DataT* _xn, - const DataT* _yn, + const OutT* _xn, + const OutT* _yn, const IdxT m, const IdxT n, const IdxT k, @@ -342,8 +342,8 @@ __launch_bounds__(Policy::Nthreads, 2) RAFT_KERNEL fusedL2kNN(const DataT* x, auto epilog_lambda = [&distance_op, numOfNN, m, n, ldd, out_dists, out_inds, keyMax, identity] __device__( AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh], - DataT * regxn, - DataT * regyn, + OutT * regxn, + OutT * regyn, IdxT gridStrideX, IdxT gridStrideY) { // Use ::template to disambiguate (See: @@ -536,8 +536,8 @@ void fusedL2UnexpKnnImpl(const DataT* x, void* workspace, size_t& worksize) { - typedef typename raft::linalg::Policy2x8::Policy RowPolicy; - typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy; + typedef typename raft::linalg::Policy2x8::Policy RowPolicy; + typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy; typedef typename std::conditional::type KPolicy; @@ -705,8 +705,8 @@ template void fusedL2ExpKnnImpl(const DataT* x, const DataT* y, - const DataT* xn, - const DataT* yn, + const AccT* xn, + const AccT* yn, IdxT m, IdxT n, IdxT k, @@ -721,8 +721,8 @@ void fusedL2ExpKnnImpl(const DataT* x, void* workspace, size_t& worksize) { - typedef typename raft::linalg::Policy2x8::Policy RowPolicy; - typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy; + typedef typename raft::linalg::Policy2x8::Policy RowPolicy; + typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy; typedef typename std::conditional::type KPolicy; @@ -777,7 +777,7 @@ void fusedL2ExpKnnImpl(const DataT* x, int32_t* mutexes = nullptr; if (grid.x > 1) { const auto numMutexes = raft::ceildiv(m, KPolicy::Mblk); - const auto normsSize = (x != y) ? (m + n) * sizeof(DataT) : n * sizeof(DataT); + const auto normsSize = (x != y) ? (m + n) * sizeof(AccT) : n * sizeof(AccT); const auto requiredSize = sizeof(int32_t) * numMutexes + normsSize; if (worksize < requiredSize) { worksize = requiredSize; @@ -790,8 +790,8 @@ void fusedL2ExpKnnImpl(const DataT* x, // calculate norms if they haven't been passed in if (!xn) { - DataT* xn_ = (DataT*)workspace; - workspace = xn_ + m; + AccT* xn_ = (AccT*)workspace; + workspace = xn_ + m; raft::linalg::rowNorm( xn_, x, k, m, raft::linalg::L2Norm, isRowMajor, stream, raft::identity_op{}); xn = xn_; @@ -800,7 +800,7 @@ void fusedL2ExpKnnImpl(const DataT* x, if (x == y) { yn = xn; } else { - DataT* yn_ = (DataT*)(workspace); + AccT* yn_ = (AccT*)(workspace); raft::linalg::rowNorm( yn_, y, k, n, raft::linalg::L2Norm, isRowMajor, stream, raft::identity_op{}); yn = yn_; @@ -843,8 +843,8 @@ void fusedL2ExpKnn(IdxT m, IdxT ldd, const DataT* x, const DataT* y, - const DataT* xn, - const DataT* yn, + const AccT* xn, + const AccT* yn, bool sqrt, OutT* out_dists, IdxT* out_inds, @@ -930,10 +930,13 @@ void fusedL2ExpKnn(IdxT m, * @param[in] rowMajorQuery are the query array in row-major layout? * @param[in] stream stream to order kernel launch */ -template +template void fusedL2Knn(size_t D, value_idx* out_inds, - value_t* out_dists, + distance_t* out_dists, const value_t* index, const value_t* query, size_t n_index_rows, @@ -943,8 +946,8 @@ void fusedL2Knn(size_t D, bool rowMajorQuery, cudaStream_t stream, cuvs::distance::DistanceType metric, - const value_t* index_norms = NULL, - const value_t* query_norms = NULL) + const distance_t* index_norms = NULL, + const distance_t* query_norms = NULL) { // Validate the input data ASSERT(k > 0, "l2Knn: k must be > 0"); @@ -975,83 +978,87 @@ void fusedL2Knn(size_t D, tempWorksize = cuvs::distance::getWorkspaceSize(query, index, n_query_rows, n_index_rows, D); worksize = tempWorksize; workspace.resize(worksize, stream); - fusedL2ExpKnn(n_query_rows, - n_index_rows, - D, - lda, - ldb, - ldd, - query, - index, - query_norms, - index_norms, - sqrt, - out_dists, - out_inds, - k, - stream, - workspace.data(), - worksize); + fusedL2ExpKnn( + n_query_rows, + n_index_rows, + D, + lda, + ldb, + ldd, + query, + index, + query_norms, + index_norms, + sqrt, + out_dists, + out_inds, + k, + stream, + workspace.data(), + worksize); if (worksize > tempWorksize) { workspace.resize(worksize, stream); - fusedL2ExpKnn(n_query_rows, - n_index_rows, - D, - lda, - ldb, - ldd, - query, - index, - query_norms, - index_norms, - sqrt, - out_dists, - out_inds, - k, - stream, - workspace.data(), - worksize); + fusedL2ExpKnn( + n_query_rows, + n_index_rows, + D, + lda, + ldb, + ldd, + query, + index, + query_norms, + index_norms, + sqrt, + out_dists, + out_inds, + k, + stream, + workspace.data(), + worksize); } break; case cuvs::distance::DistanceType::L2Unexpanded: case cuvs::distance::DistanceType::L2SqrtUnexpanded: - fusedL2UnexpKnn(n_query_rows, - n_index_rows, - D, - lda, - ldb, - ldd, - query, - index, - sqrt, - out_dists, - out_inds, - k, - stream, - workspace.data(), - worksize); + fusedL2UnexpKnn( + n_query_rows, + n_index_rows, + D, + lda, + ldb, + ldd, + query, + index, + sqrt, + out_dists, + out_inds, + k, + stream, + workspace.data(), + worksize); if (worksize) { workspace.resize(worksize, stream); - fusedL2UnexpKnn(n_query_rows, - n_index_rows, - D, - lda, - ldb, - ldd, - query, - index, - sqrt, - out_dists, - out_inds, - k, - stream, - workspace.data(), - worksize); + fusedL2UnexpKnn( + n_query_rows, + n_index_rows, + D, + lda, + ldb, + ldd, + query, + index, + sqrt, + out_dists, + out_inds, + k, + stream, + workspace.data(), + worksize); } break; default: printf("only L2 distance metric is supported\n"); break; diff --git a/cpp/src/neighbors/detail/haversine_distance.cuh b/cpp/src/neighbors/detail/haversine_distance.cuh index fc6aa477d1..ee972cf350 100644 --- a/cpp/src/neighbors/detail/haversine_distance.cuh +++ b/cpp/src/neighbors/detail/haversine_distance.cuh @@ -22,15 +22,30 @@ #include #include +#include + namespace cuvs::neighbors::detail { -template -DI value_t compute_haversine(value_t x1, value_t y1, value_t x2, value_t y2) +template +DI distance_t compute_haversine(value_t x1, value_t y1, value_t x2, value_t y2) { - value_t sin_0 = raft::sin(0.5 * (x1 - y1)); - value_t sin_1 = raft::sin(0.5 * (x2 - y2)); - value_t rdist = sin_0 * sin_0 + raft::cos(x1) * raft::cos(y1) * sin_1 * sin_1; - - return 2 * raft::asin(raft::sqrt(rdist)); + if constexpr ((std::is_same_v && std::is_same_v)) { + distance_t _x1 = __half2float(x1); + distance_t _y1 = __half2float(y1); + distance_t _x2 = __half2float(x2); + distance_t _y2 = __half2float(y2); + + distance_t sin_0 = raft::sin(distance_t(0.5) * (_x1 - _y1)); + distance_t sin_1 = raft::sin(distance_t(0.5) * (_x2 - _y2)); + distance_t rdist = sin_0 * sin_0 + raft::cos(_x1) * raft::cos(_y1) * sin_1 * sin_1; + + return static_cast(2) * raft::asin(raft::sqrt(rdist)); + } else { + distance_t sin_0 = raft::sin(distance_t(0.5) * (x1 - y1)); + distance_t sin_1 = raft::sin(distance_t(0.5) * (x2 - y2)); + distance_t rdist = sin_0 * sin_0 + raft::cos(x1) * raft::cos(y1) * sin_1 * sin_1; + + return static_cast(2) * raft::asin(raft::sqrt(rdist)); + } } /** @@ -46,9 +61,14 @@ DI value_t compute_haversine(value_t x1, value_t y1, value_t x2, value_t y2) * @param[in] n_index_rows number of rows in index array * @param[in] k number of closest neighbors to return */ -template +template RAFT_KERNEL haversine_knn_kernel(value_idx* out_inds, - value_t* out_dists, + distance_t* out_dists, const value_t* index, const value_t* query, size_t n_index_rows, @@ -56,12 +76,12 @@ RAFT_KERNEL haversine_knn_kernel(value_idx* out_inds, { constexpr int kNumWarps = tpb / raft::WarpSize; - __shared__ value_t smemK[kNumWarps * warp_q]; + __shared__ distance_t smemK[kNumWarps * warp_q]; __shared__ value_idx smemV[kNumWarps * warp_q]; using namespace raft::neighbors::detail::faiss_select; - BlockSelect, warp_q, thread_q, tpb> heap( - std::numeric_limits::max(), std::numeric_limits::max(), smemK, smemV, k); + BlockSelect, warp_q, thread_q, tpb> heap( + std::numeric_limits::max(), std::numeric_limits::max(), smemK, smemV, k); // Grid is exactly sized to rows available int limit = raft::Pow2::roundDown(n_index_rows); @@ -77,7 +97,7 @@ RAFT_KERNEL haversine_knn_kernel(value_idx* out_inds, value_t y1 = idx_ptr[0]; value_t y2 = idx_ptr[1]; - value_t dist = compute_haversine(x1, y1, x2, y2); + distance_t dist = compute_haversine(x1, y1, x2, y2); heap.add(dist, i); } @@ -88,7 +108,7 @@ RAFT_KERNEL haversine_knn_kernel(value_idx* out_inds, value_t y1 = idx_ptr[0]; value_t y2 = idx_ptr[1]; - value_t dist = compute_haversine(x1, y1, x2, y2); + distance_t dist = compute_haversine(x1, y1, x2, y2); heap.addThreadQ(dist, i); } @@ -117,9 +137,9 @@ RAFT_KERNEL haversine_knn_kernel(value_idx* out_inds, * @param[in] k number of closest neighbors to return * @param[in] stream stream to order kernel launch */ -template +template void haversine_knn(value_idx* out_inds, - value_t* out_dists, + distance_t* out_dists, const value_t* index, const value_t* query, size_t n_index_rows, diff --git a/cpp/src/neighbors/detail/knn_brute_force.cuh b/cpp/src/neighbors/detail/knn_brute_force.cuh index 559d33cc28..88986af7d9 100644 --- a/cpp/src/neighbors/detail/knn_brute_force.cuh +++ b/cpp/src/neighbors/detail/knn_brute_force.cuh @@ -48,9 +48,9 @@ #include #include +#include #include #include - #include #include @@ -62,7 +62,7 @@ namespace cuvs::neighbors::detail { * Calculates brute force knn, using a fixed memory budget * by tiling over both the rows and columns of pairwise_distances */ -template +template void tiled_brute_force_knn(const raft::resources& handle, const ElementType* search, // size (m ,d) const ElementType* index, // size (n ,d) @@ -70,25 +70,23 @@ void tiled_brute_force_knn(const raft::resources& handle, size_t n, size_t d, size_t k, - ElementType* distances, // size (m, k) - IndexType* indices, // size (m, k) + DistanceT* distances, // size (m, k) + IndexType* indices, // size (m, k) cuvs::distance::DistanceType metric, - float metric_arg = 2.0, - size_t max_row_tile_size = 0, - size_t max_col_tile_size = 0, - const ElementType* precomputed_index_norms = nullptr, - const ElementType* precomputed_search_norms = nullptr, - const uint32_t* filter_bitmap = nullptr) + DistanceT metric_arg = 2.0, + size_t max_row_tile_size = 0, + size_t max_col_tile_size = 0, + const DistanceT* precomputed_index_norms = nullptr, + const DistanceT* precomputed_search_norms = nullptr, + const uint32_t* filter_bitmap = nullptr) { // Figure out the number of rows/cols to tile for - size_t tile_rows = 0; - size_t tile_cols = 0; - auto stream = raft::resource::get_cuda_stream(handle); - auto device_memory = raft::resource::get_workspace_resource(handle); - auto total_mem = rmm::available_device_memory().second; + size_t tile_rows = 0; + size_t tile_cols = 0; + auto stream = raft::resource::get_cuda_stream(handle); cuvs::neighbors::detail::faiss_select::chooseTileSize( - m, n, d, sizeof(ElementType), total_mem, tile_rows, tile_cols); + m, n, d, sizeof(DistanceT), tile_rows, tile_cols); // for unittesting, its convenient to be able to put a max size on the tiles // so we can test the tiling logic without having to use huge inputs. @@ -99,13 +97,13 @@ void tiled_brute_force_knn(const raft::resources& handle, tile_cols = std::max(tile_cols, k); // stores pairwise distances for the current tile - rmm::device_uvector temp_distances(tile_rows * tile_cols, stream); + rmm::device_uvector temp_distances(tile_rows * tile_cols, stream); // calculate norms for L2 expanded distances - this lets us avoid calculating // norms repeatedly per-tile, and just do once for the entire input auto pairwise_metric = metric; - rmm::device_uvector search_norms(0, stream); - rmm::device_uvector index_norms(0, stream); + rmm::device_uvector search_norms(0, stream); + rmm::device_uvector index_norms(0, stream); if (metric == cuvs::distance::DistanceType::L2Expanded || metric == cuvs::distance::DistanceType::L2SqrtExpanded || metric == cuvs::distance::DistanceType::CosineExpanded) { @@ -162,14 +160,14 @@ void tiled_brute_force_knn(const raft::resources& handle, if (n < k) { raft::matrix::fill(handle, raft::make_device_matrix_view(distances, m, k), - std::numeric_limits::lowest()); + std::numeric_limits::lowest()); if constexpr (std::is_signed_v) { raft::matrix::fill(handle, raft::make_device_matrix_view(indices, m, k), IndexType{-1}); } } - rmm::device_uvector temp_out_distances(tile_rows * temp_out_cols, stream); + rmm::device_uvector temp_out_distances(tile_rows * temp_out_cols, stream); rmm::device_uvector temp_out_indices(tile_rows * temp_out_cols, stream); bool select_min = cuvs::distance::is_min_close(metric); @@ -189,7 +187,7 @@ void tiled_brute_force_knn(const raft::resources& handle, search + i * d, current_query_size, d), raft::make_device_matrix_view( index + j * d, current_centroid_size, d), - raft::make_device_matrix_view( + raft::make_device_matrix_view( temp_distances.data(), current_query_size, current_centroid_size), pairwise_metric, metric_arg); @@ -208,7 +206,7 @@ void tiled_brute_force_knn(const raft::resources& handle, IndexType row = i + (idx / current_centroid_size); IndexType col = j + (idx % current_centroid_size); - cuvs::distance::detail::ops::l2_exp_cutlass_op l2_op(sqrt); + cuvs::distance::detail::ops::l2_exp_cutlass_op l2_op(sqrt); return l2_op(row_norms[row], col_norms[col], dist[idx]); }); } else if (metric == cuvs::distance::DistanceType::CosineExpanded) { @@ -222,16 +220,16 @@ void tiled_brute_force_knn(const raft::resources& handle, [=] __device__(IndexType idx) { IndexType row = i + (idx / current_centroid_size); IndexType col = j + (idx % current_centroid_size); - auto val = 1.0 - dist[idx] / (row_norms[row] * col_norms[col]); + auto val = DistanceT(1.0) - dist[idx] / DistanceT(row_norms[row] * col_norms[col]); return val; }); } if (filter_bitmap != nullptr) { - auto distances_ptr = temp_distances.data(); - auto count = thrust::make_counting_iterator(0); - ElementType masked_distance = select_min ? std::numeric_limits::infinity() - : std::numeric_limits::lowest(); + auto distances_ptr = temp_distances.data(); + auto count = thrust::make_counting_iterator(0); + DistanceT masked_distance = select_min ? std::numeric_limits::infinity() + : std::numeric_limits::lowest(); thrust::for_each(raft::resource::get_thrust_policy(handle), count, count + current_query_size * current_centroid_size, @@ -250,10 +248,10 @@ void tiled_brute_force_knn(const raft::resources& handle, cuvs::selection::select_k( handle, - raft::make_device_matrix_view( + raft::make_device_matrix_view( temp_distances.data(), current_query_size, current_centroid_size), std::nullopt, - raft::make_device_matrix_view( + raft::make_device_matrix_view( distances + i * k, current_query_size, current_k), raft::make_device_matrix_view( indices + i * k, current_query_size, current_k), @@ -269,10 +267,10 @@ void tiled_brute_force_knn(const raft::resources& handle, // concatenation. // Fix both of these problems in a single pass here if (tile_cols != n) { - const ElementType* in_distances = distances + i * k; - const IndexType* in_indices = indices + i * k; - ElementType* out_distances = temp_out_distances.data(); - IndexType* out_indices = temp_out_indices.data(); + const DistanceT* in_distances = distances + i * k; + const IndexType* in_indices = indices + i * k; + DistanceT* out_distances = temp_out_distances.data(); + IndexType* out_indices = temp_out_indices.data(); auto count = thrust::make_counting_iterator(0); thrust::for_each(raft::resource::get_thrust_policy(handle), @@ -292,11 +290,11 @@ void tiled_brute_force_knn(const raft::resources& handle, // select the actual top-k items here from the temporary output cuvs::selection::select_k( handle, - raft::make_device_matrix_view( + raft::make_device_matrix_view( temp_out_distances.data(), current_query_size, temp_out_cols), raft::make_device_matrix_view( temp_out_indices.data(), current_query_size, temp_out_cols), - raft::make_device_matrix_view( + raft::make_device_matrix_view( distances + i * k, current_query_size, k), raft::make_device_matrix_view( indices + i * k, current_query_size, k), @@ -330,7 +328,10 @@ void tiled_brute_force_knn(const raft::resources& handle, * @param[in] metric corresponds to the cuvs::distance::DistanceType enum (default is L2Expanded) * @param[in] metricArg metric argument to use. Corresponds to the p arg for lp norm */ -template +template void brute_force_knn_impl( raft::resources const& handle, std::vector& input, @@ -339,47 +340,46 @@ void brute_force_knn_impl( value_t* search_items, IntType n, IdxType* res_I, - value_t* res_D, + DistType* res_D, IntType k, bool rowMajorIndex = true, bool rowMajorQuery = true, std::vector* translations = nullptr, cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded, - float metricArg = 0, - std::vector* input_norms = nullptr, - const value_t* search_norms = nullptr) + DistType metricArg = 0, + std::vector* input_norms = nullptr, + const DistType* search_norms = nullptr) { auto userStream = raft::resource::get_cuda_stream(handle); ASSERT(input.size() == sizes.size(), "input and sizes vectors should be the same size"); - std::vector* id_ranges; - if (translations == nullptr) { + std::vector id_ranges; + if (translations != nullptr) { + // use the given translations + id_ranges.insert(id_ranges.end(), translations->begin(), translations->end()); + } else if (input.size() > 1) { // If we don't have explicit translations // for offsets of the indices, build them // from the local partitions - id_ranges = new std::vector(); IdxType total_n = 0; for (size_t i = 0; i < input.size(); i++) { - id_ranges->push_back(total_n); + id_ranges.push_back(total_n); total_n += sizes[i]; } - } else { - // otherwise, use the given translations - id_ranges = translations; } - int device; - RAFT_CUDA_TRY(cudaGetDevice(&device)); - - rmm::device_uvector trans(id_ranges->size(), userStream); - raft::update_device(trans.data(), id_ranges->data(), id_ranges->size(), userStream); + rmm::device_uvector trans(0, userStream); + if (id_ranges.size() > 0) { + trans.resize(id_ranges.size(), userStream); + raft::update_device(trans.data(), id_ranges.data(), id_ranges.size(), userStream); + } - rmm::device_uvector all_D(0, userStream); + rmm::device_uvector all_D(0, userStream); rmm::device_uvector all_I(0, userStream); - value_t* out_D = res_D; - IdxType* out_I = res_I; + DistType* out_D = res_D; + IdxType* out_I = res_I; if (input.size() > 1) { all_D.resize(input.size() * k * n, userStream); @@ -419,8 +419,8 @@ void brute_force_knn_impl( size_t total_rows_processed = 0; for (size_t i = 0; i < input.size(); i++) { - value_t* out_d_ptr = out_D + (i * k * n); - IdxType* out_i_ptr = out_I + (i * k * n); + DistType* out_d_ptr = out_D + (i * k * n); + IdxType* out_i_ptr = out_I + (i * k * n); auto stream = raft::resource::get_next_usable_stream(handle, i); @@ -448,13 +448,13 @@ void brute_force_knn_impl( if (metric == cuvs::distance::DistanceType::L2SqrtExpanded || metric == cuvs::distance::DistanceType::L2SqrtUnexpanded || metric == cuvs::distance::DistanceType::LpUnexpanded) { - value_t p = 0.5; // standard l2 + DistType p = 0.5; // standard l2 if (metric == cuvs::distance::DistanceType::LpUnexpanded) p = 1.0 / metricArg; - raft::linalg::unaryOp( + raft::linalg::unaryOp( res_D, res_D, n * k, - [p] __device__(value_t input) { return powf(fabsf(input), p); }, + [p] __device__(DistType input) { return powf(fabsf(input), p); }, stream); } } else { @@ -510,18 +510,19 @@ void brute_force_knn_impl( // no translations or partitions to combine, it can be skipped. knn_merge_parts(out_D, out_I, res_D, res_I, n, input.size(), k, userStream, trans.data()); } - - if (translations == nullptr) delete id_ranges; }; -template +template void brute_force_search( raft::resources const& res, - const cuvs::neighbors::brute_force::index& idx, + const cuvs::neighbors::brute_force::index& idx, raft::device_matrix_view queries, raft::device_matrix_view neighbors, - raft::device_matrix_view distances, - std::optional> query_norms = std::nullopt) + raft::device_matrix_view distances, + std::optional> query_norms = std::nullopt) { RAFT_EXPECTS(neighbors.extent(1) == distances.extent(1), "Value of k must match for outputs"); RAFT_EXPECTS(idx.dataset().extent(1) == queries.extent(1), @@ -532,36 +533,37 @@ void brute_force_search( std::vector dataset = {const_cast(idx.dataset().data_handle())}; std::vector sizes = {idx.dataset().extent(0)}; - std::vector norms; - if (idx.has_norms()) { norms.push_back(const_cast(idx.norms().data_handle())); } - - brute_force_knn_impl(res, - dataset, - sizes, - d, - const_cast(queries.data_handle()), - queries.extent(0), - neighbors.data_handle(), - distances.data_handle(), - k, - true, - std::is_same_v, - nullptr, - idx.metric(), - idx.metric_arg(), - norms.size() ? &norms : nullptr, - query_norms ? query_norms->data_handle() : nullptr); + std::vector norms; + if (idx.has_norms()) { norms.push_back(const_cast(idx.norms().data_handle())); } + + brute_force_knn_impl( + res, + dataset, + sizes, + d, + const_cast(queries.data_handle()), + queries.extent(0), + neighbors.data_handle(), + distances.data_handle(), + k, + true, + std::is_same_v, + nullptr, + idx.metric(), + idx.metric_arg(), + norms.size() ? &norms : nullptr, + query_norms ? query_norms->data_handle() : nullptr); } -template +template void brute_force_search_filtered( raft::resources const& res, - const cuvs::neighbors::brute_force::index& idx, + const cuvs::neighbors::brute_force::index& idx, raft::device_matrix_view queries, cuvs::core::bitmap_view filter, raft::device_matrix_view neighbors, - raft::device_matrix_view distances, - std::optional> query_norms = std::nullopt) + raft::device_matrix_view distances, + std::optional> query_norms = std::nullopt) { auto metric = idx.metric(); @@ -604,26 +606,26 @@ void brute_force_search_filtered( if (sparsity > 0.01f) { raft::resources stream_pool_handle(res); raft::resource::set_cuda_stream(stream_pool_handle, stream); - auto idx_norm = idx.has_norms() ? const_cast(idx.norms().data_handle()) : nullptr; - - tiled_brute_force_knn(stream_pool_handle, - queries.data_handle(), - idx.dataset().data_handle(), - n_queries, - n_dataset, - dim, - k, - distances.data_handle(), - neighbors.data_handle(), - metric, - 2.0, - 0, - 0, - idx_norm, - nullptr, - filter.data()); + auto idx_norm = idx.has_norms() ? const_cast(idx.norms().data_handle()) : nullptr; + + tiled_brute_force_knn(stream_pool_handle, + queries.data_handle(), + idx.dataset().data_handle(), + n_queries, + n_dataset, + dim, + k, + distances.data_handle(), + neighbors.data_handle(), + metric, + DistanceT{2.0}, + 0, + 0, + idx_norm, + nullptr, + filter.data()); } else { - auto csr = raft::make_device_csr_matrix(res, n_queries, n_dataset, nnz_h); + auto csr = raft::make_device_csr_matrix(res, n_queries, n_dataset, nnz_h); // fill csr raft::sparse::convert::bitmap_to_csr(res, filter, csr); @@ -639,20 +641,20 @@ void brute_force_search_filtered( auto dataset_view = raft::make_device_matrix_view( idx.dataset().data_handle(), n_dataset, dim); - auto csr_view = raft::make_device_csr_matrix_view( + auto csr_view = raft::make_device_csr_matrix_view( csr.get_elements().data(), compressed_csr_view); raft::sparse::linalg::masked_matmul(res, queries, dataset_view, filter, csr_view); // post process - std::optional> query_norms_; + std::optional> query_norms_; if (metric == cuvs::distance::DistanceType::L2Expanded || metric == cuvs::distance::DistanceType::L2SqrtExpanded || metric == cuvs::distance::DistanceType::CosineExpanded) { if (metric == cuvs::distance::DistanceType::CosineExpanded) { if (!query_norms) { - query_norms_ = raft::make_device_vector(res, n_queries); - raft::linalg::rowNorm((T*)(query_norms_->data_handle()), + query_norms_ = raft::make_device_vector(res, n_queries); + raft::linalg::rowNorm((DistanceT*)(query_norms_->data_handle()), queries.data_handle(), dim, n_queries, @@ -663,8 +665,8 @@ void brute_force_search_filtered( } } else { if (!query_norms) { - query_norms_ = raft::make_device_vector(res, n_queries); - raft::linalg::rowNorm((T*)(query_norms_->data_handle()), + query_norms_ = raft::make_device_vector(res, n_queries); + raft::linalg::rowNorm((DistanceT*)(query_norms_->data_handle()), queries.data_handle(), dim, n_queries, @@ -686,7 +688,7 @@ void brute_force_search_filtered( } // select k - auto const_csr_view = raft::make_device_csr_matrix_view( + auto const_csr_view = raft::make_device_csr_matrix_view( csr.get_elements().data(), compressed_csr_view); std::optional> no_opt = std::nullopt; bool select_min = cuvs::distance::is_min_close(metric); @@ -697,21 +699,21 @@ void brute_force_search_filtered( return; } -template -cuvs::neighbors::brute_force::index build( +template +cuvs::neighbors::brute_force::index build( raft::resources const& res, raft::device_matrix_view dataset, cuvs::distance::DistanceType metric, - T metric_arg) + DistT metric_arg) { // certain distance metrics can benefit by pre-calculating the norms for the index dataset // which lets us avoid calculating these at query time - std::optional> norms; + std::optional> norms; if (metric == cuvs::distance::DistanceType::L2Expanded || metric == cuvs::distance::DistanceType::L2SqrtExpanded || metric == cuvs::distance::DistanceType::CosineExpanded) { - norms = raft::make_device_vector(res, dataset.extent(0)); + norms = raft::make_device_vector(res, dataset.extent(0)); // cosine needs the l2norm, where as l2 distances needs the squared norm if (metric == cuvs::distance::DistanceType::CosineExpanded) { raft::linalg::norm(res, @@ -729,6 +731,7 @@ cuvs::neighbors::brute_force::index build( } } - return cuvs::neighbors::brute_force::index(res, dataset, std::move(norms), metric, metric_arg); + return cuvs::neighbors::brute_force::index( + res, dataset, std::move(norms), metric, metric_arg); } } // namespace cuvs::neighbors::detail diff --git a/cpp/src/neighbors/detail/knn_utils.cuh b/cpp/src/neighbors/detail/knn_utils.cuh index 1cc709fa40..60d5f6e30e 100644 --- a/cpp/src/neighbors/detail/knn_utils.cuh +++ b/cpp/src/neighbors/detail/knn_utils.cuh @@ -21,15 +21,16 @@ #include #include +#include #include namespace cuvs::neighbors::detail { -template -RAFT_KERNEL epilogue_on_csr_kernel(value_t* __restrict__ compressed_C, +template +RAFT_KERNEL epilogue_on_csr_kernel(output_t* __restrict__ compressed_C, const value_idx* __restrict__ rows, const value_idx* __restrict__ cols, - const value_t* __restrict__ Q_sq_norms, + const output_t* __restrict__ Q_sq_norms, const value_t* __restrict__ R_sq_norms, value_idx nnz, expansion_f expansion_func) @@ -43,13 +44,13 @@ RAFT_KERNEL epilogue_on_csr_kernel(value_t* __restrict__ compressed_C, compressed_C[tid] = expansion_func(compressed_C[tid], Q_sq_norms[i], R_sq_norms[j]); } -template +template void epilogue_on_csr(raft::resources const& handle, - value_t* compressed_C, + output_t* compressed_C, const value_idx nnz, const value_idx* rows, const value_idx* cols, - const value_t* Q_sq_norms, + const output_t* Q_sq_norms, const value_t* R_sq_norms, cuvs::distance::DistanceType metric) { @@ -65,8 +66,12 @@ void epilogue_on_csr(raft::resources const& handle, Q_sq_norms, R_sq_norms, nnz, - [] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) -> value_t { - return value_t(-2.0) * dot + q_norm + r_norm; + [] __device__ __host__(output_t dot, output_t q_norm, value_t r_norm) -> output_t { + if constexpr (std::is_same_v) { + return output_t(-2.0) * dot + q_norm + __half2float(r_norm); + } else { + return output_t(-2.0) * dot + q_norm + r_norm; + } }); } else if (metric == cuvs::distance::DistanceType::L2SqrtExpanded) { epilogue_on_csr_kernel<<>>( @@ -76,8 +81,12 @@ void epilogue_on_csr(raft::resources const& handle, Q_sq_norms, R_sq_norms, nnz, - [] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) -> value_t { - return raft::sqrt(value_t(-2.0) * dot + q_norm + r_norm); + [] __device__ __host__(output_t dot, output_t q_norm, value_t r_norm) -> output_t { + if constexpr (std::is_same_v) { + return raft::sqrt(output_t(-2.0) * dot + q_norm + __half2float(r_norm)); + } else { + return raft::sqrt(output_t(-2.0) * dot + q_norm + r_norm); + } }); } else if (metric == cuvs::distance::DistanceType::CosineExpanded) { epilogue_on_csr_kernel<<>>( @@ -87,8 +96,12 @@ void epilogue_on_csr(raft::resources const& handle, Q_sq_norms, R_sq_norms, nnz, - [] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) -> value_t { - return value_t(1.0) - dot / (q_norm * r_norm); + [] __device__ __host__(output_t dot, output_t q_norm, value_t r_norm) -> output_t { + if constexpr (std::is_same_v) { + return output_t(1.0) - dot / (q_norm * __half2float(r_norm)); + } else { + return output_t(1.0) - dot / (q_norm * r_norm); + } }); } RAFT_CUDA_TRY(cudaPeekAtLastError()); diff --git a/cpp/src/neighbors/hnsw_c.cpp b/cpp/src/neighbors/hnsw_c.cpp new file mode 100644 index 0000000000..ab5268a6d3 --- /dev/null +++ b/cpp/src/neighbors/hnsw_c.cpp @@ -0,0 +1,166 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cuvs/distance/distance.h" +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace { +template +void _search(cuvsResources_t res, + cuvsHnswSearchParams params, + cuvsHnswIndex index, + DLManagedTensor* queries_tensor, + DLManagedTensor* neighbors_tensor, + DLManagedTensor* distances_tensor) +{ + auto res_ptr = reinterpret_cast(res); + auto index_ptr = reinterpret_cast*>(index.addr); + + auto search_params = cuvs::neighbors::hnsw::search_params(); + search_params.ef = params.ef; + search_params.num_threads = params.numThreads; + + using queries_mdspan_type = raft::host_matrix_view; + using neighbors_mdspan_type = raft::host_matrix_view; + using distances_mdspan_type = raft::host_matrix_view; + auto queries_mds = cuvs::core::from_dlpack(queries_tensor); + auto neighbors_mds = cuvs::core::from_dlpack(neighbors_tensor); + auto distances_mds = cuvs::core::from_dlpack(distances_tensor); + cuvs::neighbors::hnsw::search( + *res_ptr, search_params, *index_ptr, queries_mds, neighbors_mds, distances_mds); +} + +template +void* _deserialize(cuvsResources_t res, const char* filename, int dim, cuvsDistanceType metric) +{ + auto res_ptr = reinterpret_cast(res); + cuvs::neighbors::hnsw::index* index = nullptr; + cuvs::neighbors::hnsw::deserialize(*res_ptr, std::string(filename), dim, metric, &index); + return index; +} +} // namespace + +extern "C" cuvsError_t cuvsHnswSearchParamsCreate(cuvsHnswSearchParams_t* params) +{ + return cuvs::core::translate_exceptions( + [=] { *params = new cuvsHnswSearchParams{.ef = 200, .numThreads = 0}; }); +} + +extern "C" cuvsError_t cuvsHnswSearchParamsDestroy(cuvsHnswSearchParams_t params) +{ + return cuvs::core::translate_exceptions([=] { delete params; }); +} + +extern "C" cuvsError_t cuvsHnswIndexCreate(cuvsHnswIndex_t* index) +{ + return cuvs::core::translate_exceptions([=] { *index = new cuvsHnswIndex{}; }); +} + +extern "C" cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index_c_ptr) +{ + return cuvs::core::translate_exceptions([=] { + auto index = *index_c_ptr; + + if (index.dtype.code == kDLFloat) { + auto index_ptr = reinterpret_cast*>(index.addr); + delete index_ptr; + } else if (index.dtype.code == kDLInt) { + auto index_ptr = reinterpret_cast*>(index.addr); + delete index_ptr; + } else if (index.dtype.code == kDLUInt) { + auto index_ptr = reinterpret_cast*>(index.addr); + delete index_ptr; + } + delete index_c_ptr; + }); +} + +extern "C" cuvsError_t cuvsHnswSearch(cuvsResources_t res, + cuvsHnswSearchParams_t params, + cuvsHnswIndex_t index_c_ptr, + DLManagedTensor* queries_tensor, + DLManagedTensor* neighbors_tensor, + DLManagedTensor* distances_tensor) +{ + return cuvs::core::translate_exceptions([=] { + auto queries = queries_tensor->dl_tensor; + auto neighbors = neighbors_tensor->dl_tensor; + auto distances = distances_tensor->dl_tensor; + + RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(queries), + "queries should have host compatible memory"); + RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(neighbors), + "neighbors should have host compatible memory"); + RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(distances), + "distances should have host compatible memory"); + + RAFT_EXPECTS(neighbors.dtype.code == kDLUInt && neighbors.dtype.bits == 64, + "neighbors should be of type uint64_t"); + RAFT_EXPECTS(distances.dtype.code == kDLFloat && distances.dtype.bits == 32, + "distances should be of type float32"); + + auto index = *index_c_ptr; + RAFT_EXPECTS(queries.dtype.code == index.dtype.code, "type mismatch between index and queries"); + RAFT_EXPECTS(queries.dtype.bits == 32, "number of bits in queries dtype should be 32"); + + if (index.dtype.code == kDLFloat) { + _search( + res, *params, index, queries_tensor, neighbors_tensor, distances_tensor); + } else if (index.dtype.code == kDLUInt) { + _search( + res, *params, index, queries_tensor, neighbors_tensor, distances_tensor); + } else if (index.dtype.code == kDLInt) { + _search(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor); + } else { + RAFT_FAIL("Unsupported index dtype: %d and bits: %d", queries.dtype.code, queries.dtype.bits); + } + }); +} + +extern "C" cuvsError_t cuvsHnswDeserialize(cuvsResources_t res, + const char* filename, + int dim, + cuvsDistanceType metric, + cuvsHnswIndex_t index) +{ + return cuvs::core::translate_exceptions([=] { + if (index->dtype.code == kDLFloat && index->dtype.bits == 32) { + index->addr = reinterpret_cast(_deserialize(res, filename, dim, metric)); + index->dtype.code = kDLFloat; + } else if (index->dtype.code == kDLUInt && index->dtype.bits == 8) { + index->addr = reinterpret_cast(_deserialize(res, filename, dim, metric)); + index->dtype.code = kDLInt; + } else if (index->dtype.code == kDLInt && index->dtype.bits == 8) { + index->addr = reinterpret_cast(_deserialize(res, filename, dim, metric)); + index->dtype.code = kDLUInt; + } else { + RAFT_FAIL("Unsupported dtype in file %s", filename); + } + }); +} diff --git a/cpp/src/neighbors/ivf_common.cuh b/cpp/src/neighbors/ivf_common.cuh index 60d43bed6c..fb73fb8a90 100644 --- a/cpp/src/neighbors/ivf_common.cuh +++ b/cpp/src/neighbors/ivf_common.cuh @@ -254,6 +254,7 @@ void postprocess_distances(ScoreOutT* out, // [n_queries, topk] raft::linalg::unaryOp(out, in, len, raft::sqrt_op{}, stream); } } break; + case distance::DistanceType::CosineExpanded: case distance::DistanceType::InnerProduct: { float factor = (account_for_max_close ? -1.0 : 1.0) * scaling_factor * scaling_factor; if (factor != 1.0) { diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh index e8df3e3d6a..fb110d8104 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh @@ -335,6 +335,37 @@ void extend(raft::resources const& handle, if (!index->center_norms().has_value()) { index->allocate_center_norms(handle); if (index->center_norms().has_value()) { + if (index->metric() == cuvs::distance::DistanceType::CosineExpanded) { + raft::linalg::rowNorm(index->center_norms()->data_handle(), + index->centers().data_handle(), + dim, + n_lists, + raft::linalg::L2Norm, + true, + stream, + raft::sqrt_op{}); + } else { + raft::linalg::rowNorm(index->center_norms()->data_handle(), + index->centers().data_handle(), + dim, + n_lists, + raft::linalg::L2Norm, + true, + stream); + } + RAFT_LOG_TRACE_VEC(index->center_norms()->data_handle(), std::min(dim, 20)); + } + } else if (index->center_norms().has_value() && index->adaptive_centers()) { + if (index->metric() == cuvs::distance::DistanceType::CosineExpanded) { + raft::linalg::rowNorm(index->center_norms()->data_handle(), + index->centers().data_handle(), + dim, + n_lists, + raft::linalg::L2Norm, + true, + stream, + raft::sqrt_op{}); + } else { raft::linalg::rowNorm(index->center_norms()->data_handle(), index->centers().data_handle(), dim, @@ -342,16 +373,7 @@ void extend(raft::resources const& handle, raft::linalg::L2Norm, true, stream); - RAFT_LOG_TRACE_VEC(index->center_norms()->data_handle(), std::min(dim, 20)); } - } else if (index->center_norms().has_value() && index->adaptive_centers()) { - raft::linalg::rowNorm(index->center_norms()->data_handle(), - index->centers().data_handle(), - dim, - n_lists, - raft::linalg::L2Norm, - true, - stream); RAFT_LOG_TRACE_VEC(index->center_norms()->data_handle(), std::min(dim, 20)); } } @@ -384,7 +406,8 @@ inline auto build(raft::resources const& handle, "unsupported data type"); RAFT_EXPECTS(n_rows > 0 && dim > 0, "empty dataset"); RAFT_EXPECTS(n_rows >= params.n_lists, "number of rows can't be less than n_lists"); - + RAFT_EXPECTS(params.metric != cuvs::distance::DistanceType::CosineExpanded || dim > 1, + "Cosine metric requires more than one dim"); index index(handle, params, dim); utils::memzero( index.accum_sorted_sizes().data_handle(), index.accum_sorted_sizes().size(), stream); @@ -414,7 +437,7 @@ inline auto build(raft::resources const& handle, index.centers().data_handle(), index.n_lists(), index.dim()); cuvs::cluster::kmeans::balanced_params kmeans_params; kmeans_params.n_iters = params.kmeans_n_iters; - kmeans_params.metric = static_cast(index.metric()); + kmeans_params.metric = index.metric(); cuvs::cluster::kmeans_balanced::fit( handle, kmeans_params, trainset_const_view, centers_view, utils::mapping{}); } diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build_float_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_build_float_int64_t.cu deleted file mode 100644 index 56bb71094f..0000000000 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build_float_int64_t.cu +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by generate_ivf_flat.py - * - * Make changes there and run in this directory: - * - * > python generate_ivf_flat.py - * - */ - -#include - -#include "ivf_flat_build.cuh" - -namespace cuvs::neighbors::ivf_flat { - -#define CUVS_INST_IVF_FLAT_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index( \ - std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ - } \ - \ - void build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::device_matrix_view dataset, \ - cuvs::neighbors::ivf_flat::index& idx) \ - { \ - cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ - } \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::host_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index( \ - std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ - } \ - \ - void build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::host_matrix_view dataset, \ - cuvs::neighbors::ivf_flat::index& idx) \ - { \ - cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ - } -CUVS_INST_IVF_FLAT_BUILD(float, int64_t); - -#undef CUVS_INST_IVF_FLAT_BUILD - -} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_build_int8_t_int64_t.cu deleted file mode 100644 index 4803868c0c..0000000000 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build_int8_t_int64_t.cu +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by generate_ivf_flat.py - * - * Make changes there and run in this directory: - * - * > python generate_ivf_flat.py - * - */ - -#include - -#include "ivf_flat_build.cuh" - -namespace cuvs::neighbors::ivf_flat { - -#define CUVS_INST_IVF_FLAT_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index( \ - std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ - } \ - \ - void build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::device_matrix_view dataset, \ - cuvs::neighbors::ivf_flat::index& idx) \ - { \ - cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ - } \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::host_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index( \ - std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ - } \ - \ - void build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::host_matrix_view dataset, \ - cuvs::neighbors::ivf_flat::index& idx) \ - { \ - cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ - } -CUVS_INST_IVF_FLAT_BUILD(int8_t, int64_t); - -#undef CUVS_INST_IVF_FLAT_BUILD - -} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_build_uint8_t_int64_t.cu deleted file mode 100644 index e087f94c4b..0000000000 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build_uint8_t_int64_t.cu +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by generate_ivf_flat.py - * - * Make changes there and run in this directory: - * - * > python generate_ivf_flat.py - * - */ - -#include - -#include "ivf_flat_build.cuh" - -namespace cuvs::neighbors::ivf_flat { - -#define CUVS_INST_IVF_FLAT_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index( \ - std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ - } \ - \ - void build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::device_matrix_view dataset, \ - cuvs::neighbors::ivf_flat::index& idx) \ - { \ - cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ - } \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::host_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index( \ - std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ - } \ - \ - void build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::host_matrix_view dataset, \ - cuvs::neighbors::ivf_flat::index& idx) \ - { \ - cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ - } -CUVS_INST_IVF_FLAT_BUILD(uint8_t, int64_t); - -#undef CUVS_INST_IVF_FLAT_BUILD - -} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_extend_float_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_extend_float_int64_t.cu deleted file mode 100644 index 2636067bf8..0000000000 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_extend_float_int64_t.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by generate_ivf_flat.py - * - * Make changes there and run in this directory: - * - * > python generate_ivf_flat.py - * - */ - -#include - -#include "ivf_flat_build.cuh" - -namespace cuvs::neighbors::ivf_flat { - -#define CUVS_INST_IVF_FLAT_EXTEND(T, IdxT) \ - auto extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index(std::move( \ - cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ - } \ - \ - void extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - cuvs::neighbors::ivf_flat::index* idx) \ - { \ - cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ - } \ - auto extend(raft::resources const& handle, \ - raft::host_matrix_view new_vectors, \ - std::optional> new_indices, \ - const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index(std::move( \ - cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ - } \ - \ - void extend(raft::resources const& handle, \ - raft::host_matrix_view new_vectors, \ - std::optional> new_indices, \ - cuvs::neighbors::ivf_flat::index* idx) \ - { \ - cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ - } -CUVS_INST_IVF_FLAT_EXTEND(float, int64_t); - -#undef CUVS_INST_IVF_FLAT_EXTEND - -} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_extend_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_extend_int8_t_int64_t.cu deleted file mode 100644 index 191cb9f391..0000000000 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_extend_int8_t_int64_t.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by generate_ivf_flat.py - * - * Make changes there and run in this directory: - * - * > python generate_ivf_flat.py - * - */ - -#include - -#include "ivf_flat_build.cuh" - -namespace cuvs::neighbors::ivf_flat { - -#define CUVS_INST_IVF_FLAT_EXTEND(T, IdxT) \ - auto extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index(std::move( \ - cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ - } \ - \ - void extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - cuvs::neighbors::ivf_flat::index* idx) \ - { \ - cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ - } \ - auto extend(raft::resources const& handle, \ - raft::host_matrix_view new_vectors, \ - std::optional> new_indices, \ - const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index(std::move( \ - cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ - } \ - \ - void extend(raft::resources const& handle, \ - raft::host_matrix_view new_vectors, \ - std::optional> new_indices, \ - cuvs::neighbors::ivf_flat::index* idx) \ - { \ - cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ - } -CUVS_INST_IVF_FLAT_EXTEND(int8_t, int64_t); - -#undef CUVS_INST_IVF_FLAT_EXTEND - -} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_extend_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_extend_uint8_t_int64_t.cu deleted file mode 100644 index 29b7e7b69d..0000000000 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_extend_uint8_t_int64_t.cu +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by generate_ivf_flat.py - * - * Make changes there and run in this directory: - * - * > python generate_ivf_flat.py - * - */ - -#include - -#include "ivf_flat_build.cuh" - -namespace cuvs::neighbors::ivf_flat { - -#define CUVS_INST_IVF_FLAT_EXTEND(T, IdxT) \ - auto extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index(std::move( \ - cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ - } \ - \ - void extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - cuvs::neighbors::ivf_flat::index* idx) \ - { \ - cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ - } \ - auto extend(raft::resources const& handle, \ - raft::host_matrix_view new_vectors, \ - std::optional> new_indices, \ - const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index(std::move( \ - cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ - } \ - \ - void extend(raft::resources const& handle, \ - raft::host_matrix_view new_vectors, \ - std::optional> new_indices, \ - cuvs::neighbors::ivf_flat::index* idx) \ - { \ - cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ - } -CUVS_INST_IVF_FLAT_EXTEND(uint8_t, int64_t); - -#undef CUVS_INST_IVF_FLAT_EXTEND - -} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh index ce29a7e7c9..86ef559289 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh @@ -104,13 +104,16 @@ __device__ inline void copy_vectorized(T* out, const T* in, uint32_t n) * @tparam AccT type of the accumulated value (an optimization for 8bit values to be loaded as 32bit * values) */ -template +template struct loadAndComputeDist { Lambda compute_dist; AccT& dist; + AccT& norm_query; + AccT& norm_data; - __device__ __forceinline__ loadAndComputeDist(AccT& dist, Lambda op) - : dist(dist), compute_dist(op) + __device__ __forceinline__ + loadAndComputeDist(AccT& dist, Lambda op, AccT& norm_query, AccT& norm_data) + : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) { } @@ -134,6 +137,10 @@ struct loadAndComputeDist { #pragma unroll for (int k = 0; k < Veclen; ++k) { compute_dist(dist, queryRegs[k], encV[k]); + if constexpr (ComputeNorm) { + norm_query += queryRegs[k] * queryRegs[k]; + norm_data += encV[k] * encV[k]; + } } } } @@ -163,7 +170,12 @@ struct loadAndComputeDist { const int d = (i * kUnroll + j) * Veclen; #pragma unroll for (int k = 0; k < Veclen; ++k) { - compute_dist(dist, raft::shfl(queryReg, d + k, raft::WarpSize), encV[k]); + T q = raft::shfl(queryReg, d + k, raft::WarpSize); + compute_dist(dist, q, encV[k]); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += encV[k] * encV[k]; + } } } } @@ -184,20 +196,28 @@ struct loadAndComputeDist { raft::ldg(enc, data + loadDataIdx); #pragma unroll for (int k = 0; k < Veclen; k++) { - compute_dist(dist, raft::shfl(queryReg, d + k, raft::WarpSize), enc[k]); + T q = raft::shfl(queryReg, d + k, raft::WarpSize); + compute_dist(dist, q, enc[k]); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += enc[k] * enc[k]; + } } } } }; // This handles uint8_t 8, 16 Veclens -template -struct loadAndComputeDist { +template +struct loadAndComputeDist { Lambda compute_dist; uint32_t& dist; + uint32_t& norm_query; + uint32_t& norm_data; - __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, Lambda op) - : dist(dist), compute_dist(op) + __device__ __forceinline__ + loadAndComputeDist(uint32_t& dist, Lambda op, uint32_t& norm_query, uint32_t& norm_data) + : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) { } @@ -220,6 +240,10 @@ struct loadAndComputeDist { #pragma unroll for (int k = 0; k < veclen_int; k++) { compute_dist(dist, queryRegs[k], encV[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryRegs[k], queryRegs[k], norm_query); + norm_data = raft::dp4a(encV[k], encV[k], norm_data); + } } } } @@ -244,7 +268,12 @@ struct loadAndComputeDist { const int d = (i * kUnroll + j) * veclen_int; #pragma unroll for (int k = 0; k < veclen_int; ++k) { - compute_dist(dist, raft::shfl(queryReg, d + k, raft::WarpSize), encV[k]); + uint32_t q = raft::shfl(queryReg, d + k, raft::WarpSize); + compute_dist(dist, q, encV[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(encV[k], encV[k], norm_data); + } } } } @@ -267,6 +296,10 @@ struct loadAndComputeDist { for (int k = 0; k < veclen_int; k++) { uint32_t q = raft::shfl(queryReg, (d / 4) + k, raft::WarpSize); compute_dist(dist, q, enc[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(enc[k], enc[k], norm_data); + } } } } @@ -274,13 +307,16 @@ struct loadAndComputeDist { // Keep this specialized uint8 Veclen = 4, because compiler is generating suboptimal code while // using above common template of int2/int4 -template -struct loadAndComputeDist { +template +struct loadAndComputeDist { Lambda compute_dist; uint32_t& dist; + uint32_t& norm_query; + uint32_t& norm_data; - __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, Lambda op) - : dist(dist), compute_dist(op) + __device__ __forceinline__ + loadAndComputeDist(uint32_t& dist, Lambda op, uint32_t& norm_query, uint32_t& norm_data) + : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) { } @@ -294,6 +330,10 @@ struct loadAndComputeDist { uint32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; uint32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; compute_dist(dist, queryRegs, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryRegs, queryRegs, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } } } __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, @@ -313,6 +353,10 @@ struct loadAndComputeDist { uint32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); compute_dist(dist, q, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } } } } @@ -330,17 +374,24 @@ struct loadAndComputeDist { uint32_t enc = reinterpret_cast(data)[lane_id]; uint32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); compute_dist(dist, q, enc); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(enc, enc, norm_data); + } } } }; -template -struct loadAndComputeDist { +template +struct loadAndComputeDist { Lambda compute_dist; uint32_t& dist; + uint32_t& norm_query; + uint32_t& norm_data; - __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, Lambda op) - : dist(dist), compute_dist(op) + __device__ __forceinline__ + loadAndComputeDist(uint32_t& dist, Lambda op, uint32_t& norm_query, uint32_t& norm_data) + : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) { } @@ -354,6 +405,10 @@ struct loadAndComputeDist { uint32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; uint32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; compute_dist(dist, queryRegs, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryRegs, queryRegs, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } } } @@ -374,6 +429,10 @@ struct loadAndComputeDist { uint32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); compute_dist(dist, q, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } } } } @@ -391,17 +450,24 @@ struct loadAndComputeDist { uint32_t enc = reinterpret_cast(data)[lane_id]; uint32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); compute_dist(dist, q, enc); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(enc, enc, norm_data); + } } } }; -template -struct loadAndComputeDist { +template +struct loadAndComputeDist { Lambda compute_dist; uint32_t& dist; + uint32_t& norm_query; + uint32_t& norm_data; - __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, Lambda op) - : dist(dist), compute_dist(op) + __device__ __forceinline__ + loadAndComputeDist(uint32_t& dist, Lambda op, uint32_t& norm_query, uint32_t& norm_data) + : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) { } @@ -415,6 +481,10 @@ struct loadAndComputeDist { uint32_t encV = data[loadIndex + j * kIndexGroupSize]; uint32_t queryRegs = query_shared[shmemIndex + j]; compute_dist(dist, queryRegs, encV); + if constexpr (ComputeNorm) { + norm_query += queryRegs * queryRegs; + norm_data += encV * encV; + } } } @@ -434,6 +504,10 @@ struct loadAndComputeDist { uint32_t encV = data[lane_id + j * kIndexGroupSize]; uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); compute_dist(dist, q, encV); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += encV * encV; + } } } } @@ -451,18 +525,25 @@ struct loadAndComputeDist { uint32_t enc = data[lane_id]; uint32_t q = raft::shfl(queryReg, d, raft::WarpSize); compute_dist(dist, q, enc); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += enc * enc; + } } } }; // This device function is for int8 veclens 4, 8 and 16 -template -struct loadAndComputeDist { +template +struct loadAndComputeDist { Lambda compute_dist; int32_t& dist; + int32_t& norm_query; + int32_t& norm_data; - __device__ __forceinline__ loadAndComputeDist(int32_t& dist, Lambda op) - : dist(dist), compute_dist(op) + __device__ __forceinline__ + loadAndComputeDist(int32_t& dist, Lambda op, int32_t& norm_query, int32_t& norm_data) + : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) { } @@ -485,6 +566,10 @@ struct loadAndComputeDist { #pragma unroll for (int k = 0; k < veclen_int; k++) { compute_dist(dist, queryRegs[k], encV[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryRegs[k], queryRegs[k], norm_query); + norm_data = raft::dp4a(encV[k], encV[k], norm_data); + } } } } @@ -513,6 +598,10 @@ struct loadAndComputeDist { for (int k = 0; k < veclen_int; ++k) { int32_t q = raft::shfl(queryReg, d + k, raft::WarpSize); compute_dist(dist, q, encV[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(encV[k], encV[k], norm_data); + } } } } @@ -531,17 +620,24 @@ struct loadAndComputeDist { for (int k = 0; k < veclen_int; k++) { int32_t q = raft::shfl(queryReg, (d / 4) + k, raft::WarpSize); // Here 4 is for 1 - int; compute_dist(dist, q, enc[k]); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(enc[k], enc[k], norm_data); + } } } } }; -template -struct loadAndComputeDist { +template +struct loadAndComputeDist { Lambda compute_dist; int32_t& dist; - __device__ __forceinline__ loadAndComputeDist(int32_t& dist, Lambda op) - : dist(dist), compute_dist(op) + int32_t& norm_query; + int32_t& norm_data; + __device__ __forceinline__ + loadAndComputeDist(int32_t& dist, Lambda op, int32_t& norm_query, int32_t& norm_data) + : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) { } __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data, @@ -554,6 +650,10 @@ struct loadAndComputeDist { int32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; int32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; compute_dist(dist, queryRegs, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryRegs, queryRegs, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } } } @@ -574,6 +674,10 @@ struct loadAndComputeDist { int32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; int32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); compute_dist(dist, q, encV); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(queryReg, queryReg, norm_query); + norm_data = raft::dp4a(encV, encV, norm_data); + } } } } @@ -588,16 +692,23 @@ struct loadAndComputeDist { int32_t enc = reinterpret_cast(data + lane_id * veclen)[0]; int32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); compute_dist(dist, q, enc); + if constexpr (ComputeNorm) { + norm_query = raft::dp4a(q, q, norm_query); + norm_data = raft::dp4a(enc, enc, norm_data); + } } } }; -template -struct loadAndComputeDist { +template +struct loadAndComputeDist { Lambda compute_dist; int32_t& dist; - __device__ __forceinline__ loadAndComputeDist(int32_t& dist, Lambda op) - : dist(dist), compute_dist(op) + int32_t& norm_query; + int32_t& norm_data; + __device__ __forceinline__ + loadAndComputeDist(int32_t& dist, Lambda op, int32_t& norm_query, int32_t& norm_data) + : dist(dist), compute_dist(op), norm_query(norm_query), norm_data(norm_data) { } @@ -609,6 +720,11 @@ struct loadAndComputeDist { #pragma unroll for (int j = 0; j < kUnroll; ++j) { compute_dist(dist, query_shared[shmemIndex + j], data[loadIndex + j * kIndexGroupSize]); + if constexpr (ComputeNorm) { + norm_query += int32_t{query_shared[shmemIndex + j]} * int32_t{query_shared[shmemIndex + j]}; + norm_data += int32_t{data[loadIndex + j * kIndexGroupSize]} * + int32_t{data[loadIndex + j * kIndexGroupSize]}; + } } } @@ -625,9 +741,12 @@ struct loadAndComputeDist { for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { #pragma unroll for (int j = 0; j < kUnroll; ++j) { - compute_dist(dist, - raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize), - data[lane_id + j * kIndexGroupSize]); + int32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); + compute_dist(dist, q, data[lane_id + j * kIndexGroupSize]); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += data[lane_id + j * kIndexGroupSize] * data[lane_id + j * kIndexGroupSize]; + } } } } @@ -638,7 +757,12 @@ struct loadAndComputeDist { const int loadDim = dimBlocks + lane_id; int32_t queryReg = loadDim < dim ? query[loadDim] : 0; for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { - compute_dist(dist, raft::shfl(queryReg, d, raft::WarpSize), data[lane_id]); + int32_t q = raft::shfl(queryReg, d, raft::WarpSize); + compute_dist(dist, q, data[lane_id]); + if constexpr (ComputeNorm) { + norm_query += q * q; + norm_data += int32_t{data[lane_id]} * int32_t{data[lane_id]}; + } } } }; @@ -691,6 +815,7 @@ using block_sort_t = typename flat_block_sort::typ template lc(dist, - compute_dist); + // Process first shm_assisted_dim dimensions (always using shared memory) + loadAndComputeDist lc( + dist, compute_dist, norm_query, norm_dataset); for (int pos = 0; pos < shm_assisted_dim; pos += raft::WarpSize, data += kIndexGroupSize * raft::WarpSize) { lc.runLoadShmemCompute(data, query_shared, lane_id, pos); } - } - if (dim > query_smem_elems) { - // The default path - using shfl ops - for dimensions beyond query_smem_elems - loadAndComputeDist lc(dist, - compute_dist); - for (int pos = shm_assisted_dim; pos < full_warps_along_dim; pos += raft::WarpSize) { - lc.runLoadShflAndCompute(data, query, pos, lane_id); - } - lc.runLoadShflAndComputeRemainder(data, query, lane_id, dim, full_warps_along_dim); - } else { - // when shm_assisted_dim == full_warps_along_dim < dim - if (valid) { - loadAndComputeDist<1, decltype(compute_dist), Veclen, T, AccT> lc(dist, compute_dist); + if (dim > query_smem_elems) { + // The default path - using shfl ops - for dimensions beyond query_smem_elems + loadAndComputeDist lc( + dist, compute_dist, norm_query, norm_dataset); + for (int pos = shm_assisted_dim; pos < full_warps_along_dim; pos += raft::WarpSize) { + lc.runLoadShflAndCompute(data, query, pos, lane_id); + } + lc.runLoadShflAndComputeRemainder(data, query, lane_id, dim, full_warps_along_dim); + } else { + // when shm_assisted_dim == full_warps_along_dim < dim + loadAndComputeDist<1, decltype(compute_dist), Veclen, T, AccT, ComputeNorm> lc( + dist, compute_dist, norm_query, norm_dataset); for (int pos = full_warps_along_dim; pos < dim; pos += Veclen, data += kIndexGroupSize * Veclen) { lc.runLoadShmemCompute(data, query_shared, lane_id, pos); @@ -814,7 +940,13 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) } // Enqueue one element per thread - const float val = valid ? static_cast(dist) : local_topk_t::queue_t::kDummy; + float val = valid ? static_cast(dist) : local_topk_t::queue_t::kDummy; + + if constexpr (ComputeNorm) { + if (valid) + val = val / (raft::sqrt(static_cast(norm_query)) * + raft::sqrt(static_cast(norm_dataset))); + } if constexpr (kManageLocalTopK) { queue.add(val, sample_offset + vec_id); } else { @@ -864,6 +996,7 @@ uint32_t configure_launch_x(uint32_t numQueries, uint32_t n_probes, int32_t sMem template , raft::identity_op>({}, {}, std::forward(args)...); + case cuvs::distance::DistanceType::CosineExpanded: + // NB: "Ascending" is reversed because the post-processing step is done after that sort + return launch_kernel>( + {}, + raft::compose_op(raft::mul_const_op{-1.0f}, raft::add_const_op{1.0f}), + std::forward(args)...); // NB: update the description of `knn::ivf_flat::build` when adding here a new metric. default: RAFT_FAIL("The chosen distance metric is not supported (%d)", int(metric)); } diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh index 43111a7dea..b7dac3ef88 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh @@ -79,6 +79,8 @@ void search_impl(raft::resources const& handle, // also we might need additional storage for select_k rmm::device_uvector indices_tmp_dev(0, stream, search_mr); rmm::device_uvector neighbors_uint32_buf(0, stream, search_mr); + auto distance_buffer_dev_view = raft::make_device_matrix_view( + distance_buffer_dev.data(), n_queries, index.n_lists()); size_t float_query_size; if constexpr (std::is_integral_v) { @@ -122,6 +124,19 @@ void search_impl(raft::resources const& handle, RAFT_LOG_TRACE_VEC(distance_buffer_dev.data(), std::min(20, index.n_lists())); break; } + case cuvs::distance::DistanceType::CosineExpanded: { + raft::linalg::rowNorm(query_norm_dev.data(), + converted_queries_ptr, + static_cast(index.dim()), + static_cast(n_queries), + raft::linalg::L2Norm, + true, + stream, + raft::sqrt_op{}); + alpha = -1.0f; + beta = 0.0f; + break; + } default: { alpha = 1.0f; beta = 0.0f; @@ -144,12 +159,25 @@ void search_impl(raft::resources const& handle, index.n_lists(), stream); + if (index.metric() == cuvs::distance::DistanceType::CosineExpanded) { + auto n_lists = index.n_lists(); + const auto* q_norm_ptr = query_norm_dev.data(); + const auto* index_center_norm_ptr = index.center_norms()->data_handle(); + raft::linalg::map_offset( + handle, + distance_buffer_dev_view, + [=] __device__(const uint32_t idx, const float dist) { + const auto query = idx / n_lists; + const auto cluster = idx % n_lists; + return dist / (q_norm_ptr[query] * index_center_norm_ptr[cluster]); + }, + raft::make_const_mdspan(distance_buffer_dev_view)); + } RAFT_LOG_TRACE_VEC(distance_buffer_dev.data(), std::min(20, index.n_lists())); cuvs::selection::select_k( handle, - raft::make_device_matrix_view( - distance_buffer_dev.data(), n_queries, index.n_lists()), + raft::make_const_mdspan(distance_buffer_dev_view), std::nullopt, raft::make_device_matrix_view(coarse_distances_dev.data(), n_queries, n_probes), raft::make_device_matrix_view( diff --git a/cpp/src/neighbors/ivf_flat_c.cpp b/cpp/src/neighbors/ivf_flat_c.cpp old mode 100644 new mode 100755 index decc88ab3e..c14c1edc0c --- a/cpp/src/neighbors/ivf_flat_c.cpp +++ b/cpp/src/neighbors/ivf_flat_c.cpp @@ -47,7 +47,7 @@ void* _build(cuvsResources_t res, cuvsIvfFlatIndexParams params, DLManagedTensor build_params.conservative_memory_allocation = params.conservative_memory_allocation; auto dataset = dataset_tensor->dl_tensor; - auto dim = dataset.shape[0]; + auto dim = dataset.shape[1]; auto index = new cuvs::neighbors::ivf_flat::index(*res_ptr, build_params, dim); diff --git a/cpp/src/neighbors/ivf_flat_index.cpp b/cpp/src/neighbors/ivf_flat_index.cpp index b249a9c29b..6f7d11e500 100644 --- a/cpp/src/neighbors/ivf_flat_index.cpp +++ b/cpp/src/neighbors/ivf_flat_index.cpp @@ -193,6 +193,7 @@ void index::allocate_center_norms(raft::resources const& res) case cuvs::distance::DistanceType::L2SqrtExpanded: case cuvs::distance::DistanceType::L2Unexpanded: case cuvs::distance::DistanceType::L2SqrtUnexpanded: + case cuvs::distance::DistanceType::CosineExpanded: center_norms_ = raft::make_device_vector(res, n_lists()); break; default: center_norms_ = std::nullopt; diff --git a/cpp/src/neighbors/ivf_pq_c.cpp b/cpp/src/neighbors/ivf_pq_c.cpp old mode 100644 new mode 100755 index 0afe9356d1..2ecad6ada8 --- a/cpp/src/neighbors/ivf_pq_c.cpp +++ b/cpp/src/neighbors/ivf_pq_c.cpp @@ -51,7 +51,7 @@ void* _build(cuvsResources_t res, cuvsIvfPqIndexParams params, DLManagedTensor* build_params.max_train_points_per_pq_code = params.max_train_points_per_pq_code; auto dataset = dataset_tensor->dl_tensor; - auto dim = dataset.shape[0]; + auto dim = dataset.shape[1]; auto index = new cuvs::neighbors::ivf_pq::index(*res_ptr, build_params, dim); diff --git a/cpp/src/stats/detail/batched/silhouette_score.cuh b/cpp/src/stats/detail/batched/silhouette_score.cuh new file mode 100644 index 0000000000..aae14ea6cc --- /dev/null +++ b/cpp/src/stats/detail/batched/silhouette_score.cuh @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../silhouette_score.cuh" + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +namespace cuvs { +namespace stats { +namespace batched { +namespace detail { + +/** + * This kernel initializes matrix b (n_rows * n_labels) + * For each label that the corresponding row is not a part of is initialized as 0 + * If the corresponding row is the only sample in its label, again 0 + * Only if the there are > 1 samples in the label, row is initialized to max + */ +template +RAFT_KERNEL fill_b_kernel(value_t* b, + const label_idx* y, + value_idx n_rows, + label_idx n_labels, + const value_idx* cluster_counts) +{ + value_idx idx = threadIdx.x + blockIdx.x * blockDim.x; + label_idx idy = threadIdx.y + blockIdx.y * blockDim.y; + + if (idx >= n_rows || idy >= n_labels) { return; } + + auto row_cluster = y[idx]; + + auto col_cluster_count = cluster_counts[idy]; + + // b for own cluster should be max value + // so that it does not interfere with min operator + // b is also max if col cluster count is 0 + // however, b is 0 if self cluster count is 1 + if (row_cluster == idy || col_cluster_count == 0) { + if (cluster_counts[row_cluster] == 1) { + b[idx * n_labels + idy] = 0; + } else { + b[idx * n_labels + idy] = std::numeric_limits::max(); + } + } else { + b[idx * n_labels + idy] = 0; + } +} + +/** + * This kernel does an elementwise sweep of chunked pairwise distance matrix + * By knowing the offsets of the chunked pairwise distance matrix in the + * global pairwise distance matrix, we are able to calculate + * intermediate values of a and b for the rows and columns present in the + * current chunked pairwise distance matrix. + */ +template +RAFT_KERNEL compute_chunked_a_b_kernel(value_t* a, + value_t* b, + value_idx row_offset, + value_idx col_offset, + const label_idx* y, + label_idx n_labels, + const value_idx* cluster_counts, + const value_t* distances, + value_idx dist_rows, + value_idx dist_cols) +{ + value_idx row_id = threadIdx.x + blockIdx.x * blockDim.x; + value_idx col_id = threadIdx.y + blockIdx.y * blockDim.y; + + // these are global offsets of current element + // in the full pairwise distance matrix + value_idx pw_row_id = row_id + row_offset; + value_idx pw_col_id = col_id + col_offset; + + if (row_id >= dist_rows || col_id >= dist_cols || pw_row_id == pw_col_id) { return; } + + auto row_cluster = y[pw_row_id]; + if (cluster_counts[row_cluster] == 1) { return; } + + auto col_cluster = y[pw_col_id]; + auto col_cluster_counts = cluster_counts[col_cluster]; + + if (col_cluster == row_cluster) { + atomicAdd(&a[pw_row_id], distances[row_id * dist_cols + col_id] / (col_cluster_counts - 1)); + } else { + atomicAdd(&b[pw_row_id * n_labels + col_cluster], + distances[row_id * dist_cols + col_id] / col_cluster_counts); + } +} + +template +rmm::device_uvector get_cluster_counts(raft::resources const& handle, + const label_idx* y, + value_idx& n_rows, + label_idx& n_labels) +{ + auto stream = raft::resource::get_cuda_stream(handle); + + rmm::device_uvector cluster_counts(n_labels, stream); + + rmm::device_uvector workspace(1, stream); + + cuvs::stats::detail::countLabels(y, cluster_counts.data(), n_rows, n_labels, workspace, stream); + + return cluster_counts; +} + +template +rmm::device_uvector get_pairwise_distance(raft::resources const& handle, + const value_t* left_begin, + const value_t* right_begin, + value_idx& n_left_rows, + value_idx& n_right_rows, + value_idx& n_cols, + cuvs::distance::DistanceType metric, + cudaStream_t stream) +{ + rmm::device_uvector distances(n_left_rows * n_right_rows, stream); + + cuvs::distance::pairwise_distance( + handle, + raft::make_device_matrix_view(left_begin, n_left_rows, n_cols), + raft::make_device_matrix_view(right_begin, n_right_rows, n_cols), + raft::make_device_matrix_view(distances.data(), n_left_rows, n_right_rows), + metric); + + return distances; +} + +template +void compute_chunked_a_b(raft::resources const& handle, + value_t* a, + value_t* b, + value_idx& row_offset, + value_idx& col_offset, + const label_idx* y, + label_idx& n_labels, + const value_idx* cluster_counts, + const value_t* distances, + value_idx& dist_rows, + value_idx& dist_cols, + cudaStream_t stream) +{ + dim3 block_size(std::min(dist_rows, 32), std::min(dist_cols, 32)); + dim3 grid_size(raft::ceildiv(dist_rows, (value_idx)block_size.x), + raft::ceildiv(dist_cols, (value_idx)block_size.y)); + + detail::compute_chunked_a_b_kernel<<>>( + a, b, row_offset, col_offset, y, n_labels, cluster_counts, distances, dist_rows, dist_cols); +} + +template +value_t silhouette_score( + raft::resources const& handle, + const value_t* X, + value_idx n_rows, + value_idx n_cols, + const label_idx* y, + label_idx n_labels, + value_t* scores, + value_idx chunk, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded) +{ + ASSERT(n_labels >= 2 && n_labels <= (n_rows - 1), + "silhouette Score not defined for the given number of labels!"); + + rmm::device_uvector cluster_counts = get_cluster_counts(handle, y, n_rows, n_labels); + + auto stream = raft::resource::get_cuda_stream(handle); + auto policy = raft::resource::get_thrust_policy(handle); + + auto b_size = n_rows * n_labels; + + value_t *a_ptr, *b_ptr; + rmm::device_uvector a(0, stream); + rmm::device_uvector b(b_size, stream); + + b_ptr = b.data(); + + // since a and silhouette score per sample are same size, reusing + if (scores == nullptr || scores == NULL) { + a.resize(n_rows, stream); + a_ptr = a.data(); + } else { + a_ptr = scores; + } + + thrust::fill(policy, a_ptr, a_ptr + n_rows, 0); + + dim3 block_size(std::min(n_rows, 32), std::min(n_labels, 32)); + dim3 grid_size(raft::ceildiv(n_rows, (value_idx)block_size.x), + raft::ceildiv(n_labels, (label_idx)block_size.y)); + detail::fill_b_kernel<<>>( + b_ptr, y, n_rows, n_labels, cluster_counts.data()); + + raft::resource::wait_stream_pool_on_stream(handle); + + auto n_iters = 0; + + for (value_idx i = 0; i < n_rows; i += chunk) { + for (value_idx j = 0; j < n_rows; j += chunk) { + ++n_iters; + + auto chunk_stream = raft::resource::get_next_usable_stream(handle, i + chunk * j); + + const auto* left_begin = X + (i * n_cols); + const auto* right_begin = X + (j * n_cols); + + auto n_left_rows = (i + chunk) < n_rows ? chunk : (n_rows - i); + auto n_right_rows = (j + chunk) < n_rows ? chunk : (n_rows - j); + + rmm::device_uvector distances = get_pairwise_distance( + handle, left_begin, right_begin, n_left_rows, n_right_rows, n_cols, metric, chunk_stream); + + compute_chunked_a_b(handle, + a_ptr, + b_ptr, + i, + j, + y, + n_labels, + cluster_counts.data(), + distances.data(), + n_left_rows, + n_right_rows, + chunk_stream); + } + } + + raft::resource::sync_stream_pool(handle); + + // calculating row-wise minimum in b + // this prim only supports int indices for now + raft::linalg::reduce( + b_ptr, + b_ptr, + n_labels, + n_rows, + std::numeric_limits::max(), + true, + true, + stream, + false, + raft::identity_op(), + raft::min_op()); + + // calculating the silhouette score per sample + raft::linalg::binaryOp, value_t, value_idx>( + a_ptr, a_ptr, b_ptr, n_rows, cuvs::stats::detail::SilOp(), stream); + + return thrust::reduce(policy, a_ptr, a_ptr + n_rows, value_t(0)) / n_rows; +} + +} // namespace detail +} // namespace batched +} // namespace stats +} // namespace cuvs diff --git a/cpp/src/stats/detail/silhouette_score.cuh b/cpp/src/stats/detail/silhouette_score.cuh new file mode 100644 index 0000000000..6c876ce6e6 --- /dev/null +++ b/cpp/src/stats/detail/silhouette_score.cuh @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include + +#include +#include +#include + +namespace cuvs { +namespace stats { +namespace detail { + +/** + * @brief kernel that calculates the average intra-cluster distance for every sample data point and + * updates the cluster distance to max value + * @tparam DataT: type of the data samples + * @tparam LabelT: type of the labels + * @param sampleToClusterSumOfDistances: the pointer to the 2D array that contains the sum of + * distances from every sample to every cluster (nRows x nLabels) + * @param binCountArray: pointer to the 1D array that contains the count of samples per cluster (1 x + * nLabels) + * @param d_aArray: the pointer to the array of average intra-cluster distances for every sample in + * device memory (1 x nRows) + * @param labels: the pointer to the array containing labels for every data sample (1 x nRows) + * @param nRows: number of data samples + * @param nLabels: number of Labels + * @param MAX_VAL: DataT specific upper limit + */ +template +RAFT_KERNEL populateAKernel(DataT* sampleToClusterSumOfDistances, + DataT* binCountArray, + DataT* d_aArray, + const LabelT* labels, + int nRows, + int nLabels, + const DataT MAX_VAL) +{ + // getting the current index + int sampleIndex = threadIdx.x + blockIdx.x * blockDim.x; + + if (sampleIndex >= nRows) return; + + // sampleDistanceVector is an array that stores that particular row of the distanceMatrix + DataT* sampleToClusterSumOfDistancesVector = + &sampleToClusterSumOfDistances[sampleIndex * nLabels]; + + LabelT sampleCluster = labels[sampleIndex]; + + int sampleClusterIndex = (int)sampleCluster; + + if (binCountArray[sampleClusterIndex] - 1 <= 0) { + d_aArray[sampleIndex] = -1; + return; + + } + + else { + d_aArray[sampleIndex] = (sampleToClusterSumOfDistancesVector[sampleClusterIndex]) / + (binCountArray[sampleClusterIndex] - 1); + + // modifying the sampleDistanceVector to give sample average distance + sampleToClusterSumOfDistancesVector[sampleClusterIndex] = MAX_VAL; + } +} + +/** + * @brief function to calculate the bincounts of number of samples in every label + * @tparam DataT: type of the data samples + * @tparam LabelT: type of the labels + * @param labels: the pointer to the array containing labels for every data sample (1 x nRows) + * @param binCountArray: pointer to the 1D array that contains the count of samples per cluster (1 x + * nLabels) + * @param nRows: number of data samples + * @param nUniqueLabels: number of Labels + * @param workspace: device buffer containing workspace memory + * @param stream: the cuda stream where to launch this kernel + */ +template +void countLabels(const LabelT* labels, + DataT* binCountArray, + int nRows, + int nUniqueLabels, + rmm::device_uvector& workspace, + cudaStream_t stream) +{ + int num_levels = nUniqueLabels + 1; + LabelT lower_level = 0; + LabelT upper_level = nUniqueLabels; + size_t temp_storage_bytes = 0; + + rmm::device_uvector countArray(nUniqueLabels, stream); + + RAFT_CUDA_TRY(cub::DeviceHistogram::HistogramEven(nullptr, + temp_storage_bytes, + labels, + binCountArray, + num_levels, + lower_level, + upper_level, + nRows, + stream)); + + workspace.resize(temp_storage_bytes, stream); + + RAFT_CUDA_TRY(cub::DeviceHistogram::HistogramEven(workspace.data(), + temp_storage_bytes, + labels, + binCountArray, + num_levels, + lower_level, + upper_level, + nRows, + stream)); +} + +/** + * @brief structure that defines the division Lambda for elementwise op + */ +template +struct DivOp { + HDI DataT operator()(DataT a, int b, int c) + { + if (b == 0) + return ULLONG_MAX; + else + return a / b; + } +}; + +/** + * @brief structure that defines the elementwise operation to calculate silhouette score using + * params 'a' and 'b' + */ +template +struct SilOp { + HDI DataT operator()(DataT a, DataT b) + { + if (a == 0 && b == 0 || a == b) + return 0; + else if (a == -1) + return 0; + else if (a > b) + return (b - a) / a; + else + return (b - a) / b; + } +}; + +/** + * @brief main function that returns the average silhouette score for a given set of data and its + * clusterings + * @tparam DataT: type of the data samples + * @tparam LabelT: type of the labels + * @param X_in: pointer to the input Data samples array (nRows x nCols) + * @param nRows: number of data samples + * @param nCols: number of features + * @param labels: the pointer to the array containing labels for every data sample (1 x nRows) + * @param nLabels: number of Labels + * @param silhouette_scorePerSample: pointer to the array that is optionally taken in as input and + * is populated with the silhouette score for every sample (1 x nRows) + * @param stream: the cuda stream where to launch this kernel + * @param metric: the numerical value that maps to the type of distance metric to be used in the + * calculations + */ +template +DataT silhouette_score( + raft::resources const& handle, + const DataT* X_in, + int nRows, + int nCols, + const LabelT* labels, + int nLabels, + DataT* silhouette_scorePerSample, + cudaStream_t stream, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded) +{ + ASSERT(nLabels >= 2 && nLabels <= (nRows - 1), + "silhouette Score not defined for the given number of labels!"); + + // compute the distance matrix + rmm::device_uvector distanceMatrix(nRows * nRows, stream); + rmm::device_uvector workspace(1, stream); + + auto X_in_view = raft::make_device_matrix_view(X_in, nRows, nCols); + + cuvs::distance::pairwise_distance( + handle, + X_in_view, + X_in_view, + raft::make_device_matrix_view(distanceMatrix.data(), nRows, nRows), + metric); + + // deciding on the array of silhouette scores for each dataPoint + rmm::device_uvector silhouette_scoreSamples(0, stream); + DataT* perSampleSilScore = nullptr; + if (silhouette_scorePerSample == nullptr) { + silhouette_scoreSamples.resize(nRows, stream); + perSampleSilScore = silhouette_scoreSamples.data(); + } else { + perSampleSilScore = silhouette_scorePerSample; + } + RAFT_CUDA_TRY(cudaMemsetAsync(perSampleSilScore, 0, nRows * sizeof(DataT), stream)); + + // getting the sample count per cluster + rmm::device_uvector binCountArray(nLabels, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(binCountArray.data(), 0, nLabels * sizeof(DataT), stream)); + countLabels(labels, binCountArray.data(), nRows, nLabels, workspace, stream); + + // calculating the sample-cluster-distance-sum-array + rmm::device_uvector sampleToClusterSumOfDistances(nRows * nLabels, stream); + RAFT_CUDA_TRY(cudaMemsetAsync( + sampleToClusterSumOfDistances.data(), 0, nRows * nLabels * sizeof(DataT), stream)); + raft::linalg::reduce_cols_by_key(distanceMatrix.data(), + labels, + sampleToClusterSumOfDistances.data(), + nRows, + nRows, + nLabels, + stream); + + // creating the a array and b array + rmm::device_uvector d_aArray(nRows, stream); + rmm::device_uvector d_bArray(nRows, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(d_aArray.data(), 0, nRows * sizeof(DataT), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(d_bArray.data(), 0, nRows * sizeof(DataT), stream)); + + // kernel that populates the d_aArray + // kernel configuration + dim3 numThreadsPerBlock(32, 1, 1); + dim3 numBlocks(raft::ceildiv(nRows, numThreadsPerBlock.x), 1, 1); + + // calling the kernel + populateAKernel<<>>( + sampleToClusterSumOfDistances.data(), + binCountArray.data(), + d_aArray.data(), + labels, + nRows, + nLabels, + std::numeric_limits::max()); + + // elementwise dividing by bincounts + rmm::device_uvector averageDistanceBetweenSampleAndCluster(nRows * nLabels, stream); + RAFT_CUDA_TRY(cudaMemsetAsync( + averageDistanceBetweenSampleAndCluster.data(), 0, nRows * nLabels * sizeof(DataT), stream)); + + raft::linalg::matrixVectorOp(averageDistanceBetweenSampleAndCluster.data(), + sampleToClusterSumOfDistances.data(), + binCountArray.data(), + binCountArray.data(), + nLabels, + nRows, + true, + true, + DivOp(), + stream); + + // calculating row-wise minimum + raft::linalg::reduce( + d_bArray.data(), + averageDistanceBetweenSampleAndCluster.data(), + nLabels, + nRows, + std::numeric_limits::max(), + true, + true, + stream, + false, + raft::identity_op{}, + raft::min_op{}); + + // calculating the silhouette score per sample using the d_aArray and d_bArray + raft::linalg::binaryOp>( + perSampleSilScore, d_aArray.data(), d_bArray.data(), nRows, SilOp(), stream); + + // calculating the sum of all the silhouette score + rmm::device_scalar d_avgSilhouetteScore(stream); + RAFT_CUDA_TRY(cudaMemsetAsync(d_avgSilhouetteScore.data(), 0, sizeof(DataT), stream)); + + raft::linalg::mapThenSumReduce(d_avgSilhouetteScore.data(), + nRows, + raft::identity_op(), + stream, + perSampleSilScore, + perSampleSilScore); + + DataT avgSilhouetteScore = d_avgSilhouetteScore.value(stream); + + raft::resource::sync_stream(handle, stream); + + avgSilhouetteScore /= nRows; + + return avgSilhouetteScore; +} + +}; // namespace detail +}; // namespace stats +}; // namespace cuvs diff --git a/cpp/src/stats/detail/trustworthiness_score.cuh b/cpp/src/stats/detail/trustworthiness_score.cuh new file mode 100644 index 0000000000..f4725a2e8f --- /dev/null +++ b/cpp/src/stats/detail/trustworthiness_score.cuh @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include +#include + +#define N_THREADS 512 + +namespace cuvs { +namespace stats { +namespace detail { + +/** + * @brief Build the lookup table + * @param[out] lookup_table: Lookup table giving nearest neighbor order + * of pairwise distance calculations given sample index + * @param[in] X_ind: Sorted indexes of pairwise distance calculations of X + * @param n: Number of samples + * @param work: Number of elements to consider + */ +RAFT_KERNEL build_lookup_table(int* lookup_table, const int* X_ind, int n, int work) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= work) return; + + int sample_idx = i / n; + int nn_idx = i % n; + + int idx = X_ind[i]; + lookup_table[(sample_idx * n) + idx] = nn_idx; +} + +/** + * @brief Compute a the rank of trustworthiness score + * @param[out] rank: Resulting rank + * @param[out] lookup_table: Lookup table giving nearest neighbor order + * of pairwise distance calculations given sample index + * @param[in] emb_ind: Indexes of KNN on embeddings + * @param n: Number of samples + * @param n_neighbors: Number of neighbors considered by trustworthiness score + * @param work: Batch to consider (to do it at once use n * n_neighbors) + */ +template +RAFT_KERNEL compute_rank(double* rank, + const int* lookup_table, + const knn_index_t* emb_ind, + int n, + int n_neighbors, + int work) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= work) return; + + int sample_idx = i / n_neighbors; + + knn_index_t emb_nn_ind = emb_ind[i]; + + int r = lookup_table[(sample_idx * n) + emb_nn_ind]; + int tmp = r - n_neighbors + 1; + if (tmp > 0) raft::myAtomicAdd(rank, tmp); +} + +/** + * @brief Compute a kNN and returns the indices of the nearest neighbors + * @param h Raft handle + * @param[in] input Input matrix containing the dataset + * @param n Number of samples + * @param d Number of features + * @param n_neighbors number of neighbors + * @param[out] indices KNN indexes + * @param[out] distances KNN distances + */ +template +void run_knn(const raft::resources& h, + math_t* input, + cuvs::distance::DistanceType metric, + int n, + int d, + int n_neighbors, + int64_t* indices, + math_t* distances) +{ + auto input_view = raft::make_device_matrix_view(input, n, d); + auto index = cuvs::neighbors::brute_force::build(h, input_view, metric); + + cuvs::neighbors::brute_force::search( + h, + index, + input_view, + raft::make_device_matrix_view(indices, n, n_neighbors), + raft::make_device_matrix_view(distances, n, n_neighbors), + std::nullopt); +} + +/** + * @brief Compute the trustworthiness score + * @param h Raft handle + * @param X[in]: Data in original dimension + * @param X_embedded[in]: Data in target dimension (embedding) + * @param n: Number of samples + * @param m: Number of features in high/original dimension + * @param d: Number of features in low/embedded dimension + * @param n_neighbors Number of neighbors considered by trustworthiness score + * @param batchSize Batch size + * @return Trustworthiness score + */ +template +double trustworthiness_score(const raft::resources& h, + const math_t* X, + math_t* X_embedded, + cuvs::distance::DistanceType metric, + int n, + int m, + int d, + int n_neighbors, + int batchSize = 512) +{ + cudaStream_t stream = raft::resource::get_cuda_stream(h); + + const int KNN_ALLOC = n * (n_neighbors + 1); + rmm::device_uvector emb_ind(KNN_ALLOC, stream); + rmm::device_uvector emb_dist(KNN_ALLOC, stream); + + run_knn(h, X_embedded, metric, n, d, n_neighbors + 1, emb_ind.data(), emb_dist.data()); + + const int PAIRWISE_ALLOC = batchSize * n; + rmm::device_uvector X_ind(PAIRWISE_ALLOC, stream); + rmm::device_uvector X_dist(PAIRWISE_ALLOC, stream); + rmm::device_uvector lookup_table(PAIRWISE_ALLOC, stream); + + double t = 0.0; + rmm::device_scalar t_dbuf(stream); + + int toDo = n; + while (toDo > 0) { + int curBatchSize = min(toDo, batchSize); + + // Takes at most batchSize vectors at a time + cuvs::distance::pairwise_distance( + h, + raft::make_device_matrix_view(&X[(n - toDo) * m], curBatchSize, m), + raft::make_device_matrix_view(X, n, m), + raft::make_device_matrix_view(X_dist.data(), curBatchSize, n), + metric); + + size_t colSortWorkspaceSize = 0; + bool bAllocWorkspace = false; + + raft::matrix::sort_cols_per_row(X_dist.data(), + X_ind.data(), + curBatchSize, + n, + bAllocWorkspace, + nullptr, + colSortWorkspaceSize, + stream); + + if (bAllocWorkspace) { + rmm::device_uvector sortColsWorkspace(colSortWorkspaceSize, stream); + + raft::matrix::sort_cols_per_row(X_dist.data(), + X_ind.data(), + curBatchSize, + n, + bAllocWorkspace, + sortColsWorkspace.data(), + colSortWorkspaceSize, + stream); + } + + int work = curBatchSize * n; + int n_blocks = raft::ceildiv(work, N_THREADS); + build_lookup_table<<>>( + lookup_table.data(), X_ind.data(), n, work); + + RAFT_CUDA_TRY(cudaMemsetAsync(t_dbuf.data(), 0, sizeof(double), stream)); + + work = curBatchSize * (n_neighbors + 1); + n_blocks = raft::ceildiv(work, N_THREADS); + compute_rank<<>>( + t_dbuf.data(), + lookup_table.data(), + &emb_ind.data()[(n - toDo) * (n_neighbors + 1)], + n, + n_neighbors + 1, + work); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + t += t_dbuf.value(stream); + + toDo -= curBatchSize; + } + + t = 1.0 - ((2.0 / ((n * n_neighbors) * ((2.0 * n) - (3.0 * n_neighbors) - 1.0))) * t); + + return t; +} + +} // namespace detail +} // namespace stats +} // namespace cuvs diff --git a/cpp/src/stats/silhouette_score.cu b/cpp/src/stats/silhouette_score.cu new file mode 100644 index 0000000000..a3bd5f24da --- /dev/null +++ b/cpp/src/stats/silhouette_score.cu @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "./detail/batched/silhouette_score.cuh" +#include "./detail/silhouette_score.cuh" + +namespace cuvs { +namespace stats { +namespace { +template +value_t _silhouette_score( + raft::resources const& handle, + raft::device_matrix_view X_in, + raft::device_vector_view labels, + std::optional> silhouette_score_per_sample, + idx_t n_unique_labels, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded) +{ + RAFT_EXPECTS(labels.extent(0) == X_in.extent(0), "Size mismatch between labels and data"); + + value_t* silhouette_score_per_sample_ptr = nullptr; + if (silhouette_score_per_sample.has_value()) { + silhouette_score_per_sample_ptr = silhouette_score_per_sample.value().data_handle(); + RAFT_EXPECTS(silhouette_score_per_sample.value().extent(0) == X_in.extent(0), + "Size mismatch between silhouette_score_per_sample and data"); + } + return detail::silhouette_score(handle, + X_in.data_handle(), + X_in.extent(0), + X_in.extent(1), + labels.data_handle(), + n_unique_labels, + silhouette_score_per_sample_ptr, + raft::resource::get_cuda_stream(handle), + metric); +} + +template +value_t _silhouette_score_batched( + raft::resources const& handle, + raft::device_matrix_view X, + raft::device_vector_view labels, + std::optional> silhouette_score_per_sample, + idx_t n_unique_labels, + idx_t batch_size, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded) +{ + static_assert(std::is_integral_v, + "silhouette_score_batched: The index type " + "of each mdspan argument must be an integral type."); + static_assert(std::is_integral_v, + "silhouette_score_batched: The label type must be an integral type."); + RAFT_EXPECTS(labels.extent(0) == X.extent(0), "Size mismatch between labels and data"); + + value_t* scores_ptr = nullptr; + if (silhouette_score_per_sample.has_value()) { + scores_ptr = silhouette_score_per_sample.value().data_handle(); + RAFT_EXPECTS(silhouette_score_per_sample.value().extent(0) == X.extent(0), + "Size mismatch between silhouette_score_per_sample and data"); + } + return cuvs::stats::batched::detail::silhouette_score(handle, + X.data_handle(), + X.extent(0), + X.extent(1), + labels.data_handle(), + n_unique_labels, + scores_ptr, + batch_size, + metric); +} +} // namespace + +float silhouette_score( + raft::resources const& handle, + raft::device_matrix_view X_in, + raft::device_vector_view labels, + std::optional> silhouette_score_per_sample, + int64_t n_unique_labels, + cuvs::distance::DistanceType metric) +{ + return _silhouette_score( + handle, X_in, labels, silhouette_score_per_sample, n_unique_labels, metric); +} + +double silhouette_score( + raft::resources const& handle, + raft::device_matrix_view X_in, + raft::device_vector_view labels, + std::optional> silhouette_score_per_sample, + int64_t n_unique_labels, + cuvs::distance::DistanceType metric) +{ + return _silhouette_score( + handle, X_in, labels, silhouette_score_per_sample, n_unique_labels, metric); +} + +float silhouette_score_batched( + raft::resources const& handle, + raft::device_matrix_view X, + raft::device_vector_view labels, + std::optional> silhouette_score_per_sample, + int64_t n_unique_labels, + int64_t batch_size, + cuvs::distance::DistanceType metric) +{ + return _silhouette_score_batched( + handle, X, labels, silhouette_score_per_sample, n_unique_labels, batch_size, metric); +} + +double silhouette_score_batched( + raft::resources const& handle, + raft::device_matrix_view X, + raft::device_vector_view labels, + std::optional> silhouette_score_per_sample, + int64_t n_unique_labels, + int64_t batch_size, + cuvs::distance::DistanceType metric) +{ + return _silhouette_score_batched( + handle, X, labels, silhouette_score_per_sample, n_unique_labels, batch_size, metric); +} +}; // namespace stats +}; // namespace cuvs diff --git a/cpp/src/stats/trustworthiness_score.cu b/cpp/src/stats/trustworthiness_score.cu new file mode 100644 index 0000000000..9ab4cb7062 --- /dev/null +++ b/cpp/src/stats/trustworthiness_score.cu @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "./detail/trustworthiness_score.cuh" + +namespace cuvs { +namespace stats { + +double trustworthiness_score( + raft::resources const& handle, + raft::device_matrix_view X, + raft::device_matrix_view X_embedded, + int n_neighbors, + cuvs::distance::DistanceType metric, + int batch_size) +{ + RAFT_EXPECTS(X.extent(0) == X_embedded.extent(0), "Size mismatch between X and X_embedded"); + RAFT_EXPECTS(X.extent(0) <= std::numeric_limits::max(), "Index type not supported"); + + // TODO: Change the underlying implementation to remove the need to const_cast X_embedded. + return detail::trustworthiness_score(handle, + X.data_handle(), + const_cast(X_embedded.data_handle()), + metric, + static_cast(X.extent(0)), + static_cast(X.extent(1)), + static_cast(X_embedded.extent(1)), + n_neighbors, + batch_size); +} + +} // namespace stats +} // namespace cuvs diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 3495b23442..e04c39318d 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -70,6 +70,9 @@ function(ConfigureTest) ${TEST_NAME} PRIVATE "$<$:${CUVS_CXX_FLAGS}>" "$<$:${CUVS_CUDA_FLAGS}>" ) + + target_compile_definitions(${TEST_NAME} PRIVATE "CUVS_EXPLICIT_INSTANTIATE_ONLY") + if(_CUVS_TEST_NOCUDA) target_compile_definitions(${TEST_NAME} PRIVATE "CUVS_DISABLE_CUDA") endif() @@ -91,13 +94,13 @@ endfunction() if(BUILD_TESTS) ConfigureTest( - NAME NEIGHBORS_TEST PATH neighbors/brute_force.cu - neighbors/brute_force_prefiltered.cu neighbors/refine.cu GPUS 1 PERCENT 100 + NAME NEIGHBORS_TEST PATH neighbors/brute_force.cu neighbors/brute_force_prefiltered.cu + neighbors/refine.cu GPUS 1 PERCENT 100 ) ConfigureTest( - NAME CLUSTER_TEST PATH cluster/kmeans.cu cluster/kmeans_balanced.cu - cluster/kmeans_find_k.cu cluster/linkage.cu GPUS 1 PERCENT 100 + NAME CLUSTER_TEST PATH cluster/kmeans.cu cluster/kmeans_balanced.cu cluster/kmeans_find_k.cu + cluster/linkage.cu GPUS 1 PERCENT 100 ) ConfigureTest( @@ -126,6 +129,11 @@ if(BUILD_TESTS) 100 ) + ConfigureTest( + NAME NEIGHBORS_ANN_BRUTE_FORCE_TEST PATH neighbors/ann_brute_force/test_float.cu + neighbors/ann_brute_force/test_half.cu GPUS 1 PERCENT 100 + ) + ConfigureTest( NAME NEIGHBORS_ANN_CAGRA_TEST @@ -181,30 +189,33 @@ if(BUILD_TESTS) PERCENT 100 ) + ConfigureTest( + NAME STATS_TEST PATH stats/trustworthiness.cu stats/silhouette_score.cu GPUS 1 PERCENT 100 + ) endif() if(BUILD_C_TESTS) ConfigureTest(NAME INTEROP_TEST PATH core/interop.cu C_LIB) ConfigureTest( - NAME DISTANCE_C_TEST PATH distance/run_pairwise_distance_c.c - distance/pairwise_distance_c.cu C_LIB - ) - - ConfigureTest( - NAME BRUTEFORCE_C_TEST PATH neighbors/run_brute_force_c.c neighbors/brute_force_c.cu + NAME DISTANCE_C_TEST PATH distance/run_pairwise_distance_c.c distance/pairwise_distance_c.cu C_LIB ) ConfigureTest( - NAME IVF_FLAT_C_TEST PATH neighbors/run_ivf_flat_c.c neighbors/ann_ivf_flat_c.cu - C_LIB + NAME BRUTEFORCE_C_TEST PATH neighbors/run_brute_force_c.c neighbors/brute_force_c.cu C_LIB ) ConfigureTest( - NAME IVF_PQ_C_TEST PATH neighbors/run_ivf_pq_c.c neighbors/ann_ivf_pq_c.cu C_LIB + NAME IVF_FLAT_C_TEST PATH neighbors/run_ivf_flat_c.c neighbors/ann_ivf_flat_c.cu C_LIB ) + ConfigureTest(NAME IVF_PQ_C_TEST PATH neighbors/run_ivf_pq_c.c neighbors/ann_ivf_pq_c.cu C_LIB) + ConfigureTest(NAME CAGRA_C_TEST PATH neighbors/ann_cagra_c.cu C_LIB) + + if(BUILD_CAGRA_HNSWLIB) + ConfigureTest(NAME HNSW_C_TEST PATH neighbors/ann_hnsw_c.cu C_LIB) + endif() endif() # ################################################################################################## diff --git a/cpp/test/core/c_api.c b/cpp/test/core/c_api.c index a5b73d8fb1..27973c2ddf 100644 --- a/cpp/test/core/c_api.c +++ b/cpp/test/core/c_api.c @@ -31,6 +31,33 @@ int main() cuvsError_t stream_error = cuvsStreamSet(res, stream); if (stream_error == CUVS_ERROR) { exit(EXIT_FAILURE); } + // Allocate memory + void* ptr; + size_t bytes = 1024; + cuvsError_t alloc_error = cuvsRMMAlloc(res, &ptr, bytes); + if (alloc_error == CUVS_ERROR) { exit(EXIT_FAILURE); } + + // Free memory + cuvsError_t free_error = cuvsRMMFree(res, ptr, bytes); + if (free_error == CUVS_ERROR) { exit(EXIT_FAILURE); } + + // Enable pool memory resource + cuvsError_t pool_error = cuvsRMMPoolMemoryResourceEnable(10, 100); + if (pool_error == CUVS_ERROR) { exit(EXIT_FAILURE); } + + // Allocate memory again + void* ptr2; + cuvsError_t alloc_error_pool = cuvsRMMAlloc(res, &ptr2, 1024); + if (alloc_error_pool == CUVS_ERROR) { exit(EXIT_FAILURE); } + + // Free memory + cuvsError_t free_error_pool = cuvsRMMFree(res, ptr2, 1024); + if (free_error_pool == CUVS_ERROR) { exit(EXIT_FAILURE); } + + // Reset pool memory resource + cuvsError_t reset_error = cuvsRMMMemoryResourceReset(); + if (reset_error == CUVS_ERROR) { exit(EXIT_FAILURE); } + // Destroy resources cuvsError_t destroy_error = cuvsResourcesDestroy(res); if (destroy_error == CUVS_ERROR) { exit(EXIT_FAILURE); } diff --git a/cpp/test/distance/dist_canberra.cu b/cpp/test/distance/dist_canberra.cu index 2bf5906013..e7ffa9d0f5 100644 --- a/cpp/test/distance/dist_canberra.cu +++ b/cpp/test/distance/dist_canberra.cu @@ -20,8 +20,9 @@ namespace cuvs { namespace distance { -template -class DistanceCanberra : public DistanceTest {}; +template +class DistanceCanberra + : public DistanceTest {}; const std::vector> inputsf = { {0.001f, 1024, 1024, 32, true, 1234ULL}, @@ -63,6 +64,26 @@ TEST_P(DistanceCanberraD, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceCanberraD, ::testing::ValuesIn(inputsd)); +const std::vector> inputsh = { + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceCanberra DistanceCanberraH; +TEST_P(DistanceCanberraH, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(cuvs::devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApprox(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceCanberraH, ::testing::ValuesIn(inputsh)); + class BigMatrixCanberra : public BigMatrixDistanceTest {}; TEST_F(BigMatrixCanberra, Result) {} diff --git a/cpp/test/distance/dist_correlation.cu b/cpp/test/distance/dist_correlation.cu index 9e061bebc3..70f3a9adb5 100644 --- a/cpp/test/distance/dist_correlation.cu +++ b/cpp/test/distance/dist_correlation.cu @@ -20,13 +20,15 @@ namespace cuvs { namespace distance { -template +template class DistanceCorrelation - : public DistanceTest {}; + : public DistanceTest {}; -template +template class DistanceCorrelationXequalY - : public DistanceTestSameBuffer {}; + : public DistanceTestSameBuffer {}; const std::vector> inputsf = { {0.001f, 1024, 1024, 32, true, 1234ULL}, @@ -48,6 +50,26 @@ TEST_P(DistanceCorrelationF, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceCorrelationF, ::testing::ValuesIn(inputsf)); +const std::vector> inputsh = { + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceCorrelation DistanceCorrelationH; +TEST_P(DistanceCorrelationH, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(cuvs::devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApprox(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceCorrelationH, ::testing::ValuesIn(inputsh)); + typedef DistanceCorrelationXequalY DistanceCorrelationXequalYF; TEST_P(DistanceCorrelationXequalYF, Result) { @@ -87,6 +109,25 @@ TEST_P(DistanceCorrelationD, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceCorrelationD, ::testing::ValuesIn(inputsd)); +typedef DistanceCorrelationXequalY DistanceCorrelationXequalYH; +TEST_P(DistanceCorrelationXequalYH, Result) +{ + int m = params.m; + ASSERT_TRUE(cuvs::devArrMatch(dist_ref[0].data(), + dist[0].data(), + m, + m, + cuvs::CompareApprox(params.tolerance), + stream)); + ASSERT_TRUE(cuvs::devArrMatch(dist_ref[1].data(), + dist[1].data(), + m / 2, + m, + cuvs::CompareApprox(params.tolerance), + stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceCorrelationXequalYH, ::testing::ValuesIn(inputsh)); + class BigMatrixCorrelation : public BigMatrixDistanceTest {}; TEST_F(BigMatrixCorrelation, Result) {} diff --git a/cpp/test/distance/dist_cos.cu b/cpp/test/distance/dist_cos.cu index e134f045fc..78e2c745fe 100644 --- a/cpp/test/distance/dist_cos.cu +++ b/cpp/test/distance/dist_cos.cu @@ -20,13 +20,15 @@ namespace cuvs { namespace distance { -template -class DistanceExpCos : public DistanceTest { -}; +template +class DistanceExpCos + : public DistanceTest {}; -template +template class DistanceExpCosXequalY - : public DistanceTestSameBuffer {}; + : public DistanceTestSameBuffer {}; const std::vector> inputsf = { {0.001f, 128, (65536 + 128) * 128, 8, true, 1234ULL}, @@ -52,6 +54,30 @@ const std::vector> inputsXeqYf = { {0.03f, 1024, 1024, 1024, false, 1234ULL}, }; +const std::vector> inputsh = { + {0.001f, 128, (65536 + 128) * 128, 8, true, 1234ULL}, + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, (65536 + 128) * 128, 128, 8, false, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; + +const std::vector> inputsXeqYh = { + {0.01f, 1024, 1024, 32, true, 1234ULL}, + {0.01f, 1024, 32, 1024, true, 1234ULL}, + {0.01f, 32, 1024, 1024, true, 1234ULL}, + {0.03f, 1024, 1024, 1024, true, 1234ULL}, + {0.01f, 1024, 1024, 32, false, 1234ULL}, + {0.01f, 1024, 32, 1024, false, 1234ULL}, + {0.01f, 32, 1024, 1024, false, 1234ULL}, + {0.03f, 1024, 1024, 1024, false, 1234ULL}, +}; + typedef DistanceExpCos DistanceExpCosF; TEST_P(DistanceExpCosF, Result) { @@ -62,6 +88,16 @@ TEST_P(DistanceExpCosF, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpCosF, ::testing::ValuesIn(inputsf)); +typedef DistanceExpCos DistanceExpCosH; +TEST_P(DistanceExpCosH, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApprox(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpCosH, ::testing::ValuesIn(inputsh)); + typedef DistanceExpCosXequalY DistanceExpCosXequalYF; TEST_P(DistanceExpCosXequalYF, Result) { @@ -85,6 +121,29 @@ TEST_P(DistanceExpCosXequalYF, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpCosXequalYF, ::testing::ValuesIn(inputsXeqYf)); +typedef DistanceExpCosXequalY DistanceExpCosXequalYH; +TEST_P(DistanceExpCosXequalYH, Result) +{ + int m = params.m; + int n = params.m; + ASSERT_TRUE(cuvs::devArrMatch(dist_ref[0].data(), + dist[0].data(), + m, + n, + cuvs::CompareApprox(params.tolerance), + stream)); + n = params.isRowMajor ? m : m / 2; + m = params.isRowMajor ? m / 2 : m; + + ASSERT_TRUE(cuvs::devArrMatch(dist_ref[1].data(), + dist[1].data(), + m, + n, + cuvs::CompareApprox(params.tolerance), + stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpCosXequalYH, ::testing::ValuesIn(inputsXeqYh)); + const std::vector> inputsd = { {0.001, 1024, 1024, 32, true, 1234ULL}, {0.001, 1024, 32, 1024, true, 1234ULL}, diff --git a/cpp/test/distance/dist_hamming.cu b/cpp/test/distance/dist_hamming.cu index 0cf753ecae..3073ed9399 100644 --- a/cpp/test/distance/dist_hamming.cu +++ b/cpp/test/distance/dist_hamming.cu @@ -20,9 +20,9 @@ namespace cuvs { namespace distance { -template +template class DistanceHamming - : public DistanceTest {}; + : public DistanceTest {}; const std::vector> inputsf = { {0.001f, 1024, 1024, 32, true, 1234ULL}, @@ -64,6 +64,26 @@ TEST_P(DistanceHammingD, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceHammingD, ::testing::ValuesIn(inputsd)); +const std::vector> inputsh = { + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceHamming DistanceHammingH; +TEST_P(DistanceHammingH, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(cuvs::devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApprox(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceHammingH, ::testing::ValuesIn(inputsh)); + class BigMatrixHamming : public BigMatrixDistanceTest {}; TEST_F(BigMatrixHamming, Result) {} diff --git a/cpp/test/distance/dist_hellinger.cu b/cpp/test/distance/dist_hellinger.cu index 3998a60ab1..692bfeeffe 100644 --- a/cpp/test/distance/dist_hellinger.cu +++ b/cpp/test/distance/dist_hellinger.cu @@ -20,9 +20,9 @@ namespace cuvs { namespace distance { -template +template class DistanceHellingerExp - : public DistanceTest {}; + : public DistanceTest {}; const std::vector> inputsf = { {0.001f, 1024, 1024, 32, true, 1234ULL}, @@ -64,6 +64,26 @@ TEST_P(DistanceHellingerExpD, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceHellingerExpD, ::testing::ValuesIn(inputsd)); +const std::vector> inputsh = { + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceHellingerExp DistanceHellingerExpH; +TEST_P(DistanceHellingerExpH, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(cuvs::devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApprox(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceHellingerExpH, ::testing::ValuesIn(inputsh)); + class BigMatrixHellingerExp : public BigMatrixDistanceTest {}; TEST_F(BigMatrixHellingerExp, Result) {} diff --git a/cpp/test/distance/dist_inner_product.cu b/cpp/test/distance/dist_inner_product.cu index 1d6709d52b..aaedb5bf11 100644 --- a/cpp/test/distance/dist_inner_product.cu +++ b/cpp/test/distance/dist_inner_product.cu @@ -20,9 +20,9 @@ namespace cuvs { namespace distance { -template +template class DistanceInnerProduct - : public DistanceTest {}; + : public DistanceTest {}; const std::vector> inputsf = { {0.001f, 10, 5, 32, true, 1234ULL}, @@ -66,6 +66,27 @@ TEST_P(DistanceInnerProductD, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceInnerProductD, ::testing::ValuesIn(inputsd)); +const std::vector> inputsh = { + {0.001f, 10, 5, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceInnerProduct DistanceInnerProductH; +TEST_P(DistanceInnerProductH, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + + ASSERT_TRUE(devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApprox(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceInnerProductH, ::testing::ValuesIn(inputsh)); + class BigMatrixInnerProduct : public BigMatrixDistanceTest {}; TEST_F(BigMatrixInnerProduct, Result) {} diff --git a/cpp/test/distance/dist_jensen_shannon.cu b/cpp/test/distance/dist_jensen_shannon.cu index 43b7b361d1..f50830d4ee 100644 --- a/cpp/test/distance/dist_jensen_shannon.cu +++ b/cpp/test/distance/dist_jensen_shannon.cu @@ -20,9 +20,9 @@ namespace cuvs { namespace distance { -template +template class DistanceJensenShannon - : public DistanceTest {}; + : public DistanceTest {}; const std::vector> inputsf = { {0.001f, 1024, 1024, 32, true, 1234ULL}, @@ -64,6 +64,26 @@ TEST_P(DistanceJensenShannonD, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceJensenShannonD, ::testing::ValuesIn(inputsd)); +const std::vector> inputsh = { + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceJensenShannon DistanceJensenShannonH; +TEST_P(DistanceJensenShannonH, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(cuvs::devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApprox(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceJensenShannonH, ::testing::ValuesIn(inputsh)); + class BigMatrixJensenShannon : public BigMatrixDistanceTest {}; TEST_F(BigMatrixJensenShannon, Result) {} diff --git a/cpp/test/distance/dist_kl_divergence.cu b/cpp/test/distance/dist_kl_divergence.cu index 5e56928411..3d2373bf73 100644 --- a/cpp/test/distance/dist_kl_divergence.cu +++ b/cpp/test/distance/dist_kl_divergence.cu @@ -20,9 +20,9 @@ namespace cuvs { namespace distance { -template +template class DistanceKLDivergence - : public DistanceTest {}; + : public DistanceTest {}; const std::vector> inputsf = { {0.001f, 1024, 1024, 32, true, 1234ULL}, @@ -64,6 +64,26 @@ TEST_P(DistanceKLDivergenceD, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceKLDivergenceD, ::testing::ValuesIn(inputsd)); +const std::vector> inputsh = { + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceKLDivergence DistanceKLDivergenceH; +TEST_P(DistanceKLDivergenceH, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(cuvs::devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApprox(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceKLDivergenceH, ::testing::ValuesIn(inputsh)); + class BigMatrixKLDivergence : public BigMatrixDistanceTest {}; TEST_F(BigMatrixKLDivergence, Result) {} diff --git a/cpp/test/distance/dist_l1.cu b/cpp/test/distance/dist_l1.cu index a3ecd21fe0..cd9a9219b5 100644 --- a/cpp/test/distance/dist_l1.cu +++ b/cpp/test/distance/dist_l1.cu @@ -20,8 +20,9 @@ namespace cuvs { namespace distance { -template -class DistanceUnexpL1 : public DistanceTest {}; +template +class DistanceUnexpL1 + : public DistanceTest {}; const std::vector> inputsf = { {0.001f, 1024, 1024, 32, true, 1234ULL}, @@ -63,6 +64,26 @@ TEST_P(DistanceUnexpL1D, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceUnexpL1D, ::testing::ValuesIn(inputsd)); +const std::vector> inputsh = { + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceUnexpL1 DistanceUnexpL1H; +TEST_P(DistanceUnexpL1H, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(cuvs::devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApprox(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceUnexpL1H, ::testing::ValuesIn(inputsh)); + class BigMatrixUnexpL1 : public BigMatrixDistanceTest {}; TEST_F(BigMatrixUnexpL1, Result) {} diff --git a/cpp/test/distance/dist_l2_exp.cu b/cpp/test/distance/dist_l2_exp.cu index f3d038cbc9..5804bb6c57 100644 --- a/cpp/test/distance/dist_l2_exp.cu +++ b/cpp/test/distance/dist_l2_exp.cu @@ -20,13 +20,14 @@ namespace cuvs { namespace distance { -template -class DistanceEucExpTest : public DistanceTest { -}; +template +class DistanceEucExpTest + : public DistanceTest {}; -template +template class DistanceEucExpTestXequalY - : public DistanceTestSameBuffer {}; + : public DistanceTestSameBuffer { +}; const std::vector> inputsf = { {0.001f, 128, (65536 + 128) * 128, 8, true, 1234ULL}, @@ -58,6 +59,36 @@ const std::vector> inputsXeqYf = { {0.03f, 1021, 1021, 1021, false, 1234ULL}, }; +const std::vector> inputsh = { + {0.001f, 128, (65536 + 128) * 128, 8, true, 1234ULL}, + {0.001f, 2048, 4096, 128, true, 1234ULL}, + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.003f, 1021, 1021, 1021, true, 1234ULL}, + {0.001f, (65536 + 128) * 128, 128, 8, false, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, + {0.003f, 1021, 1021, 1021, false, 1234ULL}, +}; + +const std::vector> inputsXeqYh = { + {0.01f, 2048, 4096, 128, true, 1234ULL}, + {0.01f, 1024, 1024, 32, true, 1234ULL}, + {0.01f, 1024, 32, 1024, true, 1234ULL}, + {0.01f, 32, 1024, 1024, true, 1234ULL}, + {0.03f, 1024, 1024, 1024, true, 1234ULL}, + {0.03f, 1021, 1021, 1021, true, 1234ULL}, + {0.01f, 1024, 1024, 32, false, 1234ULL}, + {0.01f, 1024, 32, 1024, false, 1234ULL}, + {0.01f, 32, 1024, 1024, false, 1234ULL}, + {0.03f, 1024, 1024, 1024, false, 1234ULL}, + {0.03f, 1021, 1021, 1021, false, 1234ULL}, +}; + typedef DistanceEucExpTest DistanceEucExpTestF; TEST_P(DistanceEucExpTestF, Result) { @@ -68,6 +99,16 @@ TEST_P(DistanceEucExpTestF, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceEucExpTestF, ::testing::ValuesIn(inputsf)); +typedef DistanceEucExpTest DistanceEucExpTestH; +TEST_P(DistanceEucExpTestH, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApprox(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceEucExpTestH, ::testing::ValuesIn(inputsh)); + typedef DistanceEucExpTestXequalY DistanceEucExpTestXequalYF; TEST_P(DistanceEucExpTestXequalYF, Result) { @@ -89,6 +130,27 @@ INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceEucExpTestXequalYF, ::testing::ValuesIn(inputsXeqYf)); +typedef DistanceEucExpTestXequalY DistanceEucExpTestXequalYH; +TEST_P(DistanceEucExpTestXequalYH, Result) +{ + int m = params.m; + ASSERT_TRUE(cuvs::devArrMatch(dist_ref[0].data(), + dist[0].data(), + m, + m, + cuvs::CompareApprox(params.tolerance), + stream)); + ASSERT_TRUE(cuvs::devArrMatch(dist_ref[1].data(), + dist[1].data(), + m / 2, + m, + cuvs::CompareApprox(params.tolerance), + stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, + DistanceEucExpTestXequalYH, + ::testing::ValuesIn(inputsXeqYh)); + const std::vector> inputsd = { {0.001, 1024, 1024, 32, true, 1234ULL}, {0.001, 1024, 32, 1024, true, 1234ULL}, diff --git a/cpp/test/distance/dist_l2_sqrt_exp.cu b/cpp/test/distance/dist_l2_sqrt_exp.cu index b24384be8a..a2e09fdb08 100644 --- a/cpp/test/distance/dist_l2_sqrt_exp.cu +++ b/cpp/test/distance/dist_l2_sqrt_exp.cu @@ -20,9 +20,9 @@ namespace cuvs { namespace distance { -template +template class DistanceEucSqrtExpTest - : public DistanceTest {}; + : public DistanceTest {}; const std::vector> inputsf = { {0.001f, 2048, 4096, 128, true, 1234ULL}, @@ -67,6 +67,29 @@ TEST_P(DistanceEucSqrtExpTestD, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceEucSqrtExpTestD, ::testing::ValuesIn(inputsd)); +const std::vector> inputsh = { + {0.001f, 2048, 4096, 128, true, 1234ULL}, + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.003f, 1021, 1021, 1021, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, + {0.003f, 1021, 1021, 1021, false, 1234ULL}, +}; +typedef DistanceEucSqrtExpTest DistanceEucSqrtExpTestH; +TEST_P(DistanceEucSqrtExpTestH, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApprox(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceEucSqrtExpTestH, ::testing::ValuesIn(inputsh)); + class BigMatrixEucSqrtExp : public BigMatrixDistanceTest {}; TEST_F(BigMatrixEucSqrtExp, Result) {} diff --git a/cpp/test/distance/dist_l2_unexp.cu b/cpp/test/distance/dist_l2_unexp.cu index c057434fae..3f9e6458f3 100644 --- a/cpp/test/distance/dist_l2_unexp.cu +++ b/cpp/test/distance/dist_l2_unexp.cu @@ -20,9 +20,9 @@ namespace cuvs { namespace distance { -template +template class DistanceEucUnexpTest - : public DistanceTest {}; + : public DistanceTest {}; const std::vector> inputsf = { {0.001f, 1024, 1024, 32, true, 1234ULL}, @@ -64,6 +64,26 @@ TEST_P(DistanceEucUnexpTestD, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceEucUnexpTestD, ::testing::ValuesIn(inputsd)); +const std::vector> inputsh = { + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceEucUnexpTest DistanceEucUnexpTestH; +TEST_P(DistanceEucUnexpTestH, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApprox(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceEucUnexpTestH, ::testing::ValuesIn(inputsh)); + class BigMatrixEucUnexp : public BigMatrixDistanceTest { }; TEST_F(BigMatrixEucUnexp, Result) {} diff --git a/cpp/test/distance/dist_l_inf.cu b/cpp/test/distance/dist_l_inf.cu index b9ced68f34..21e9a6c871 100644 --- a/cpp/test/distance/dist_l_inf.cu +++ b/cpp/test/distance/dist_l_inf.cu @@ -20,8 +20,9 @@ namespace cuvs { namespace distance { -template -class DistanceLinf : public DistanceTest {}; +template +class DistanceLinf : public DistanceTest { +}; const std::vector> inputsf = { {0.001f, 1024, 1024, 32, true, 1234ULL}, @@ -63,6 +64,26 @@ TEST_P(DistanceLinfD, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceLinfD, ::testing::ValuesIn(inputsd)); +const std::vector> inputsh = { + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceLinf DistanceLinfH; +TEST_P(DistanceLinfH, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(cuvs::devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApprox(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceLinfH, ::testing::ValuesIn(inputsh)); + class BigMatrixLinf : public BigMatrixDistanceTest {}; TEST_F(BigMatrixLinf, Result) {} diff --git a/cpp/test/distance/dist_lp_unexp.cu b/cpp/test/distance/dist_lp_unexp.cu index 26620b44b3..95e521fb3a 100644 --- a/cpp/test/distance/dist_lp_unexp.cu +++ b/cpp/test/distance/dist_lp_unexp.cu @@ -20,9 +20,9 @@ namespace cuvs { namespace distance { -template -class DistanceLpUnexp : public DistanceTest { -}; +template +class DistanceLpUnexp + : public DistanceTest {}; const std::vector> inputsf = { {0.001f, 1024, 1024, 32, true, 1234ULL, 4.0f}, @@ -64,6 +64,26 @@ TEST_P(DistanceLpUnexpD, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceLpUnexpD, ::testing::ValuesIn(inputsd)); +const std::vector> inputsh = { + {0.001f, 1024, 1024, 32, true, 1234ULL, 4.0f}, + {0.001f, 1024, 32, 1024, true, 1234ULL, 3.0f}, + {0.001f, 32, 1024, 1024, true, 1234ULL, 4.0f}, + {0.003f, 1024, 1024, 1024, true, 1234ULL, 3.0f}, + {0.001f, 1024, 1024, 32, false, 1234ULL, 4.0f}, + {0.001f, 1024, 32, 1024, false, 1234ULL, 3.0f}, + {0.001f, 32, 1024, 1024, false, 1234ULL, 4.0f}, + {0.003f, 1024, 1024, 1024, false, 1234ULL, 3.0f}, +}; +typedef DistanceLpUnexp DistanceLpUnexpH; +TEST_P(DistanceLpUnexpH, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(cuvs::devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApprox(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceLpUnexpH, ::testing::ValuesIn(inputsh)); + class BigMatrixLpUnexp : public BigMatrixDistanceTest { }; TEST_F(BigMatrixLpUnexp, Result) {} diff --git a/cpp/test/distance/dist_russell_rao.cu b/cpp/test/distance/dist_russell_rao.cu index 46da7f9cd0..814a0503f9 100644 --- a/cpp/test/distance/dist_russell_rao.cu +++ b/cpp/test/distance/dist_russell_rao.cu @@ -20,9 +20,9 @@ namespace cuvs { namespace distance { -template +template class DistanceRussellRao - : public DistanceTest {}; + : public DistanceTest {}; const std::vector> inputsf = { {0.001f, 1024, 1024, 32, true, 1234ULL}, @@ -64,6 +64,26 @@ TEST_P(DistanceRussellRaoD, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceRussellRaoD, ::testing::ValuesIn(inputsd)); +const std::vector> inputsh = { + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceRussellRao DistanceRussellRaoH; +TEST_P(DistanceRussellRaoH, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(cuvs::devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApprox(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceRussellRaoH, ::testing::ValuesIn(inputsh)); + class BigMatrixRussellRao : public BigMatrixDistanceTest {}; TEST_F(BigMatrixRussellRao, Result) {} diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh index 2213db87eb..8a431f49a1 100644 --- a/cpp/test/distance/distance_base.cuh +++ b/cpp/test/distance/distance_base.cuh @@ -18,7 +18,6 @@ #include "../test_utils.cuh" -#include #include // cuvs::distance::DistanceType #include // raft::common::nvtx::range #include //raft::make_device_matrix_view @@ -34,8 +33,18 @@ namespace cuvs { namespace distance { -template -RAFT_KERNEL naiveDistanceKernel(DataType* dist, +template +_RAFT_DEVICE inline auto half2float(T& a) +{ + if constexpr (std::is_same_v::type, half>) { + return __half2float(a); + } else { + return a; + } +} + +template +RAFT_KERNEL naiveDistanceKernel(OutputType* dist, const DataType* x, const DataType* y, std::int64_t m, @@ -47,11 +56,11 @@ RAFT_KERNEL naiveDistanceKernel(DataType* dist, std::int64_t midx = threadIdx.x + blockIdx.x * blockDim.x; std::int64_t nidx = threadIdx.y + blockIdx.y * blockDim.y; if (midx >= m || nidx >= n) return; - DataType acc = DataType(0); + OutputType acc = OutputType(0); for (std::int64_t i = 0; i < k; ++i) { std::int64_t xidx = isRowMajor ? i + midx * k : i * m + midx; std::int64_t yidx = isRowMajor ? i + nidx * k : i * n + nidx; - auto diff = x[xidx] - y[yidx]; + auto diff = half2float(x[xidx]) - half2float(y[yidx]); acc += diff * diff; } if (type == cuvs::distance::DistanceType::L2SqrtExpanded || @@ -61,8 +70,8 @@ RAFT_KERNEL naiveDistanceKernel(DataType* dist, dist[outidx] = acc; } -template -RAFT_KERNEL naiveL1_Linf_CanberraDistanceKernel(DataType* dist, +template +RAFT_KERNEL naiveL1_Linf_CanberraDistanceKernel(OutputType* dist, const DataType* x, const DataType* y, std::int64_t m, @@ -75,12 +84,12 @@ RAFT_KERNEL naiveL1_Linf_CanberraDistanceKernel(DataType* dist, std::int64_t nidx = threadIdx.y + blockIdx.y * blockDim.y; if (midx >= m || nidx >= n) { return; } - DataType acc = DataType(0); + OutputType acc = OutputType(0); for (std::int64_t i = 0; i < k; ++i) { std::int64_t xidx = isRowMajor ? i + midx * k : i * m + midx; std::int64_t yidx = isRowMajor ? i + nidx * k : i * n + nidx; - auto a = x[xidx]; - auto b = y[yidx]; + auto a = half2float(x[xidx]); + auto b = half2float(y[yidx]); auto diff = (a > b) ? (a - b) : (b - a); if (type == cuvs::distance::DistanceType::Linf) { acc = raft::max(acc, diff); @@ -98,8 +107,8 @@ RAFT_KERNEL naiveL1_Linf_CanberraDistanceKernel(DataType* dist, dist[outidx] = acc; } -template -RAFT_KERNEL naiveCosineDistanceKernel(DataType* dist, +template +RAFT_KERNEL naiveCosineDistanceKernel(OutputType* dist, const DataType* x, const DataType* y, std::int64_t m, @@ -111,15 +120,15 @@ RAFT_KERNEL naiveCosineDistanceKernel(DataType* dist, std::int64_t nidx = threadIdx.y + blockIdx.y * blockDim.y; if (midx >= m || nidx >= n) { return; } - DataType acc_a = DataType(0); - DataType acc_b = DataType(0); - DataType acc_ab = DataType(0); + OutputType acc_a = OutputType(0); + OutputType acc_b = OutputType(0); + OutputType acc_ab = OutputType(0); for (std::int64_t i = 0; i < k; ++i) { std::int64_t xidx = isRowMajor ? i + midx * k : i * m + midx; std::int64_t yidx = isRowMajor ? i + nidx * k : i * n + nidx; - auto a = x[xidx]; - auto b = y[yidx]; + auto a = half2float(x[xidx]); + auto b = half2float(y[yidx]); acc_a += a * a; acc_b += b * b; acc_ab += a * b; @@ -128,11 +137,11 @@ RAFT_KERNEL naiveCosineDistanceKernel(DataType* dist, std::int64_t outidx = isRowMajor ? midx * n + nidx : midx + m * nidx; // Use 1.0 - (cosine similarity) to calc the distance - dist[outidx] = (DataType)1.0 - acc_ab / (raft::sqrt(acc_a) * raft::sqrt(acc_b)); + dist[outidx] = (OutputType)1.0 - acc_ab / (raft::sqrt(acc_a) * raft::sqrt(acc_b)); } -template -RAFT_KERNEL naiveInnerProductKernel(DataType* dist, +template +RAFT_KERNEL naiveInnerProductKernel(OutputType* dist, const DataType* x, const DataType* y, std::int64_t m, @@ -144,13 +153,13 @@ RAFT_KERNEL naiveInnerProductKernel(DataType* dist, std::int64_t nidx = threadIdx.y + blockIdx.y * blockDim.y; if (midx >= m || nidx >= n) { return; } - DataType acc_ab = DataType(0); + OutputType acc_ab = OutputType(0); for (std::int64_t i = 0; i < k; ++i) { std::int64_t xidx = isRowMajor ? i + midx * k : i * m + midx; std::int64_t yidx = isRowMajor ? i + nidx * k : i * n + nidx; - auto a = x[xidx]; - auto b = y[yidx]; + auto a = half2float(x[xidx]); + auto b = half2float(y[yidx]); acc_ab += a * b; } @@ -158,8 +167,8 @@ RAFT_KERNEL naiveInnerProductKernel(DataType* dist, dist[outidx] = acc_ab; } -template -RAFT_KERNEL naiveHellingerDistanceKernel(DataType* dist, +template +RAFT_KERNEL naiveHellingerDistanceKernel(OutputType* dist, const DataType* x, const DataType* y, std::int64_t m, @@ -171,13 +180,13 @@ RAFT_KERNEL naiveHellingerDistanceKernel(DataType* dist, std::int64_t nidx = threadIdx.y + blockIdx.y * blockDim.y; if (midx >= m || nidx >= n) { return; } - DataType acc_ab = DataType(0); + OutputType acc_ab = OutputType(0); for (std::int64_t i = 0; i < k; ++i) { std::int64_t xidx = isRowMajor ? i + midx * k : i * m + midx; std::int64_t yidx = isRowMajor ? i + nidx * k : i * n + nidx; - auto a = x[xidx]; - auto b = y[yidx]; + auto a = half2float(x[xidx]); + auto b = half2float(y[yidx]); acc_ab += raft::sqrt(a) * raft::sqrt(b); } @@ -189,25 +198,25 @@ RAFT_KERNEL naiveHellingerDistanceKernel(DataType* dist, dist[outidx] = raft::sqrt(rectifier * acc_ab); } -template -RAFT_KERNEL naiveLpUnexpDistanceKernel(DataType* dist, +template +RAFT_KERNEL naiveLpUnexpDistanceKernel(OutputType* dist, const DataType* x, const DataType* y, std::int64_t m, std::int64_t n, std::int64_t k, bool isRowMajor, - DataType p) + OutputType p) { std::int64_t midx = threadIdx.x + blockIdx.x * blockDim.x; std::int64_t nidx = threadIdx.y + blockIdx.y * blockDim.y; if (midx >= m || nidx >= n) return; - DataType acc = DataType(0); + OutputType acc = OutputType(0); for (std::int64_t i = 0; i < k; ++i) { std::int64_t xidx = isRowMajor ? i + midx * k : i * m + midx; std::int64_t yidx = isRowMajor ? i + nidx * k : i * n + nidx; - auto a = x[xidx]; - auto b = y[yidx]; + auto a = half2float(x[xidx]); + auto b = half2float(y[yidx]); auto diff = raft::abs(a - b); acc += raft::pow(diff, p); } @@ -217,8 +226,8 @@ RAFT_KERNEL naiveLpUnexpDistanceKernel(DataType* dist, dist[outidx] = acc; } -template -RAFT_KERNEL naiveHammingDistanceKernel(DataType* dist, +template +RAFT_KERNEL naiveHammingDistanceKernel(OutputType* dist, const DataType* x, const DataType* y, std::int64_t m, @@ -229,12 +238,12 @@ RAFT_KERNEL naiveHammingDistanceKernel(DataType* dist, std::int64_t midx = threadIdx.x + blockIdx.x * blockDim.x; std::int64_t nidx = threadIdx.y + blockIdx.y * blockDim.y; if (midx >= m || nidx >= n) return; - DataType acc = DataType(0); + OutputType acc = OutputType(0); for (std::int64_t i = 0; i < k; ++i) { std::int64_t xidx = isRowMajor ? i + midx * k : i * m + midx; std::int64_t yidx = isRowMajor ? i + nidx * k : i * n + nidx; - auto a = x[xidx]; - auto b = y[yidx]; + auto a = half2float(x[xidx]); + auto b = half2float(y[yidx]); acc += (a != b); } acc = acc / k; @@ -242,8 +251,8 @@ RAFT_KERNEL naiveHammingDistanceKernel(DataType* dist, dist[outidx] = acc; } -template -RAFT_KERNEL naiveJensenShannonDistanceKernel(DataType* dist, +template +RAFT_KERNEL naiveJensenShannonDistanceKernel(OutputType* dist, const DataType* x, const DataType* y, std::int64_t m, @@ -254,19 +263,19 @@ RAFT_KERNEL naiveJensenShannonDistanceKernel(DataType* dist, std::int64_t midx = threadIdx.x + blockIdx.x * blockDim.x; std::int64_t nidx = threadIdx.y + blockIdx.y * blockDim.y; if (midx >= m || nidx >= n) return; - DataType acc = DataType(0); + OutputType acc = OutputType(0); for (std::int64_t i = 0; i < k; ++i) { std::int64_t xidx = isRowMajor ? i + midx * k : i * m + midx; std::int64_t yidx = isRowMajor ? i + nidx * k : i * n + nidx; - auto a = x[xidx]; - auto b = y[yidx]; + auto a = half2float(x[xidx]); + auto b = half2float(y[yidx]); - DataType m = 0.5f * (a + b); - bool a_zero = a == 0; - bool b_zero = b == 0; + OutputType m = 0.5f * (a + b); + bool a_zero = a == 0; + bool b_zero = b == 0; - DataType p = (!a_zero * m) / (a_zero + a); - DataType q = (!b_zero * m) / (b_zero + b); + OutputType p = (!a_zero * m) / (a_zero + a); + OutputType q = (!b_zero * m) / (b_zero + b); bool p_zero = p == 0; bool q_zero = q == 0; @@ -278,8 +287,8 @@ RAFT_KERNEL naiveJensenShannonDistanceKernel(DataType* dist, dist[outidx] = acc; } -template -RAFT_KERNEL naiveRussellRaoDistanceKernel(OutType* dist, +template +RAFT_KERNEL naiveRussellRaoDistanceKernel(OutputType* dist, const DataType* x, const DataType* y, std::int64_t m, @@ -290,12 +299,12 @@ RAFT_KERNEL naiveRussellRaoDistanceKernel(OutType* dist, std::int64_t midx = threadIdx.x + blockIdx.x * blockDim.x; std::int64_t nidx = threadIdx.y + blockIdx.y * blockDim.y; if (midx >= m || nidx >= n) return; - OutType acc = OutType(0); + OutputType acc = OutputType(0); for (std::int64_t i = 0; i < k; ++i) { std::int64_t xidx = isRowMajor ? i + midx * k : i * m + midx; std::int64_t yidx = isRowMajor ? i + nidx * k : i * n + nidx; - auto a = x[xidx]; - auto b = y[yidx]; + auto a = half2float(x[xidx]); + auto b = half2float(y[yidx]); acc += (a * b); } acc = (k - acc) / k; @@ -303,8 +312,8 @@ RAFT_KERNEL naiveRussellRaoDistanceKernel(OutType* dist, dist[outidx] = acc; } -template -RAFT_KERNEL naiveKLDivergenceDistanceKernel(OutType* dist, +template +RAFT_KERNEL naiveKLDivergenceDistanceKernel(OutputType* dist, const DataType* x, const DataType* y, std::int64_t m, @@ -315,12 +324,12 @@ RAFT_KERNEL naiveKLDivergenceDistanceKernel(OutType* dist, std::int64_t midx = threadIdx.x + blockIdx.x * blockDim.x; std::int64_t nidx = threadIdx.y + blockIdx.y * blockDim.y; if (midx >= m || nidx >= n) return; - OutType acc = OutType(0); + OutputType acc = OutputType(0); for (std::int64_t i = 0; i < k; ++i) { std::int64_t xidx = isRowMajor ? i + midx * k : i * m + midx; std::int64_t yidx = isRowMajor ? i + nidx * k : i * n + nidx; - auto a = x[xidx]; - auto b = y[yidx]; + auto a = half2float(x[xidx]); + auto b = half2float(y[yidx]); bool b_zero = (b == 0); bool a_zero = (a == 0); acc += a * (log(a + a_zero) - log(b + b_zero)); @@ -330,8 +339,8 @@ RAFT_KERNEL naiveKLDivergenceDistanceKernel(OutType* dist, dist[outidx] = acc; } -template -RAFT_KERNEL naiveCorrelationDistanceKernel(OutType* dist, +template +RAFT_KERNEL naiveCorrelationDistanceKernel(OutputType* dist, const DataType* x, const DataType* y, std::int64_t m, @@ -342,16 +351,16 @@ RAFT_KERNEL naiveCorrelationDistanceKernel(OutType* dist, std::int64_t midx = threadIdx.x + blockIdx.x * blockDim.x; std::int64_t nidx = threadIdx.y + blockIdx.y * blockDim.y; if (midx >= m || nidx >= n) return; - OutType acc = OutType(0); - auto a_norm = DataType(0); - auto b_norm = DataType(0); - auto a_sq_norm = DataType(0); - auto b_sq_norm = DataType(0); + OutputType acc = OutputType(0); + auto a_norm = OutputType(0); + auto b_norm = OutputType(0); + auto a_sq_norm = OutputType(0); + auto b_sq_norm = OutputType(0); for (std::int64_t i = 0; i < k; ++i) { std::int64_t xidx = isRowMajor ? i + midx * k : i * m + midx; std::int64_t yidx = isRowMajor ? i + nidx * k : i * n + nidx; - auto a = x[xidx]; - auto b = y[yidx]; + auto a = half2float(x[xidx]); + auto b = half2float(y[yidx]); a_norm += a; b_norm += b; a_sq_norm += (a * a); @@ -369,8 +378,8 @@ RAFT_KERNEL naiveCorrelationDistanceKernel(OutType* dist, dist[outidx] = acc; } -template -void naiveDistance(DataType* dist, +template +void naiveDistance(OutputType* dist, const DataType* x, const DataType* y, std::int64_t m, @@ -378,8 +387,8 @@ void naiveDistance(DataType* dist, std::int64_t k, cuvs::distance::DistanceType type, bool isRowMajor, - DataType metric_arg = 2.0f, - cudaStream_t stream = 0) + OutputType metric_arg = 2.0f, + cudaStream_t stream = 0) { static const dim3 TPB(4, 256, 1); dim3 nblks(raft::ceildiv(m, (std::int64_t)TPB.x), raft::ceildiv(n, (std::int64_t)TPB.y), 1); @@ -388,49 +397,50 @@ void naiveDistance(DataType* dist, case cuvs::distance::DistanceType::Canberra: case cuvs::distance::DistanceType::Linf: case cuvs::distance::DistanceType::L1: - naiveL1_Linf_CanberraDistanceKernel + naiveL1_Linf_CanberraDistanceKernel <<>>(dist, x, y, m, n, k, type, isRowMajor); break; case cuvs::distance::DistanceType::L2SqrtUnexpanded: case cuvs::distance::DistanceType::L2Unexpanded: case cuvs::distance::DistanceType::L2SqrtExpanded: case cuvs::distance::DistanceType::L2Expanded: - naiveDistanceKernel + naiveDistanceKernel <<>>(dist, x, y, m, n, k, type, isRowMajor); break; case cuvs::distance::DistanceType::CosineExpanded: - naiveCosineDistanceKernel + naiveCosineDistanceKernel <<>>(dist, x, y, m, n, k, isRowMajor); break; case cuvs::distance::DistanceType::HellingerExpanded: - naiveHellingerDistanceKernel + naiveHellingerDistanceKernel <<>>(dist, x, y, m, n, k, isRowMajor); break; case cuvs::distance::DistanceType::LpUnexpanded: - naiveLpUnexpDistanceKernel + naiveLpUnexpDistanceKernel <<>>(dist, x, y, m, n, k, isRowMajor, metric_arg); break; case cuvs::distance::DistanceType::HammingUnexpanded: - naiveHammingDistanceKernel + naiveHammingDistanceKernel <<>>(dist, x, y, m, n, k, isRowMajor); break; case cuvs::distance::DistanceType::InnerProduct: - naiveInnerProductKernel<<>>(dist, x, y, m, n, k, isRowMajor); + naiveInnerProductKernel + <<>>(dist, x, y, m, n, k, isRowMajor); break; case cuvs::distance::DistanceType::JensenShannon: - naiveJensenShannonDistanceKernel + naiveJensenShannonDistanceKernel <<>>(dist, x, y, m, n, k, isRowMajor); break; case cuvs::distance::DistanceType::RusselRaoExpanded: - naiveRussellRaoDistanceKernel + naiveRussellRaoDistanceKernel <<>>(dist, x, y, m, n, k, isRowMajor); break; case cuvs::distance::DistanceType::KLDivergence: - naiveKLDivergenceDistanceKernel + naiveKLDivergenceDistanceKernel <<>>(dist, x, y, m, n, k, isRowMajor); break; case cuvs::distance::DistanceType::CorrelationExpanded: - naiveCorrelationDistanceKernel + naiveCorrelationDistanceKernel <<>>(dist, x, y, m, n, k, isRowMajor); break; default: FAIL() << "should be here\n"; @@ -438,13 +448,13 @@ void naiveDistance(DataType* dist, RAFT_CUDA_TRY(cudaPeekAtLastError()); } -template +template struct DistanceInputs { - DataType tolerance; + OutputType tolerance; std::int64_t m, n, k; bool isRowMajor; unsigned long long int seed; - DataType metric_arg = 2.0f; + OutputType metric_arg = 2.0f; }; template @@ -472,31 +482,38 @@ constexpr bool layout_to_row_major() return false; } -template +template void distanceLauncher(raft::resources const& handle, DataType* x, DataType* y, - DataType* dist, - DataType* dist2, + OutputType* dist, + OutputType* dist2, std::int64_t m, std::int64_t n, std::int64_t k, - DistanceInputs& params, - DataType threshold, - DataType metric_arg = 2.0f) + DistanceInputs& params, + OutputType threshold, + OutputType metric_arg = 2.0f) { - auto x_v = raft::make_device_matrix_view(x, m, k); - auto y_v = raft::make_device_matrix_view(y, n, k); - auto dist_v = raft::make_device_matrix_view(dist, m, n); + // Create device matrix views for the input and output data + auto x_v = raft::make_device_matrix_view(x, m, k); + auto y_v = raft::make_device_matrix_view(y, n, k); + auto dist_v = raft::make_device_matrix_view(dist, m, n); + // Explicitly instantiate the template function cuvs::distance::pairwise_distance(handle, x_v, y_v, dist_v, distanceType, metric_arg); } -template -class DistanceTest : public ::testing::TestWithParam> { +template +class DistanceTest : public ::testing::TestWithParam> { public: DistanceTest() - : params(::testing::TestWithParam>::GetParam()), + : params(::testing::TestWithParam>::GetParam()), stream(raft::resource::get_cuda_stream(handle)), x(params.m * params.k, stream), y(params.n * params.k, stream), @@ -513,11 +530,11 @@ class DistanceTest : public ::testing::TestWithParam> { "test::%s/%s", testInfo->test_suite_name(), testInfo->name()); raft::random::RngState r(params.seed); - std::int64_t m = params.m; - std::int64_t n = params.n; - std::int64_t k = params.k; - DataType metric_arg = params.metric_arg; - bool isRowMajor = params.isRowMajor; + std::int64_t m = params.m; + std::int64_t n = params.n; + std::int64_t k = params.k; + OutputType metric_arg = params.metric_arg; + bool isRowMajor = params.isRowMajor; if (distanceType == cuvs::distance::DistanceType::HellingerExpanded || distanceType == cuvs::distance::DistanceType::JensenShannon || distanceType == cuvs::distance::DistanceType::KLDivergence) { @@ -537,33 +554,33 @@ class DistanceTest : public ::testing::TestWithParam> { naiveDistance( dist_ref.data(), x.data(), y.data(), m, n, k, distanceType, isRowMajor, metric_arg, stream); - DataType threshold = -10000.f; + OutputType threshold = -10000.f; if (isRowMajor) { - distanceLauncher(handle, - x.data(), - y.data(), - dist.data(), - dist2.data(), - m, - n, - k, - params, - threshold, - metric_arg); + distanceLauncher(handle, + x.data(), + y.data(), + dist.data(), + dist2.data(), + m, + n, + k, + params, + threshold, + metric_arg); } else { - distanceLauncher(handle, - x.data(), - y.data(), - dist.data(), - dist2.data(), - m, - n, - k, - params, - threshold, - metric_arg); + distanceLauncher(handle, + x.data(), + y.data(), + dist.data(), + dist2.data(), + m, + n, + k, + params, + threshold, + metric_arg); } raft::resource::sync_stream(handle, stream); } @@ -572,8 +589,9 @@ class DistanceTest : public ::testing::TestWithParam> { raft::resources handle; cudaStream_t stream; - DistanceInputs params; - rmm::device_uvector x, y, dist_ref, dist, dist2; + DistanceInputs params; + rmm::device_uvector x, y; + rmm::device_uvector dist_ref, dist, dist2; }; /* @@ -583,12 +601,15 @@ class DistanceTest : public ::testing::TestWithParam> { * It may happen that though both X and Y are same buffer but user passes * different dimensions for them like in case of tiled_brute_force_knn. */ -template -class DistanceTestSameBuffer : public ::testing::TestWithParam> { +template +class DistanceTestSameBuffer + : public ::testing::TestWithParam> { public: - using dev_vector = rmm::device_uvector; + using dev_vector = rmm::device_uvector; DistanceTestSameBuffer() - : params(::testing::TestWithParam>::GetParam()), + : params(::testing::TestWithParam>::GetParam()), stream(raft::resource::get_cuda_stream(handle)), x(params.m * params.k, stream), dist_ref({dev_vector(params.m * params.m, stream), dev_vector(params.m * params.m, stream)}), @@ -604,11 +625,11 @@ class DistanceTestSameBuffer : public ::testing::TestWithParamtest_suite_name(), testInfo->name()); raft::random::RngState r(params.seed); - std::int64_t m = params.m; - std::int64_t n = params.m; - std::int64_t k = params.k; - DataType metric_arg = params.metric_arg; - bool isRowMajor = params.isRowMajor; + std::int64_t m = params.m; + std::int64_t n = params.m; + std::int64_t k = params.k; + OutputType metric_arg = params.metric_arg; + bool isRowMajor = params.isRowMajor; if (distanceType == cuvs::distance::DistanceType::HellingerExpanded || distanceType == cuvs::distance::DistanceType::JensenShannon || distanceType == cuvs::distance::DistanceType::KLDivergence) { @@ -637,33 +658,35 @@ class DistanceTestSameBuffer : public ::testing::TestWithParam(handle, - x.data(), - x.data(), - dist[i].data(), - dist2[i].data(), - m, - n, - k, - params, - threshold, - metric_arg); + distanceLauncher( + handle, + x.data(), + x.data(), + dist[i].data(), + dist2[i].data(), + m, + n, + k, + params, + threshold, + metric_arg); } else { - distanceLauncher(handle, - x.data(), - x.data(), - dist[i].data(), - dist2[i].data(), - m, - n, - k, - params, - threshold, - metric_arg); + distanceLauncher( + handle, + x.data(), + x.data(), + dist[i].data(), + dist2[i].data(), + m, + n, + k, + params, + threshold, + metric_arg); } } raft::resource::sync_stream(handle, stream); @@ -673,8 +696,8 @@ class DistanceTestSameBuffer : public ::testing::TestWithParam params; - dev_vector x; + DistanceInputs params; + rmm::device_uvector x; static const std::int64_t N = 2; std::array dist_ref, dist, dist2; }; diff --git a/cpp/test/neighbors/ann_brute_force.cuh b/cpp/test/neighbors/ann_brute_force.cuh new file mode 100644 index 0000000000..461a202f22 --- /dev/null +++ b/cpp/test/neighbors/ann_brute_force.cuh @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "../test_utils.cuh" + +#include "knn_utils.cuh" +#include "naive_knn.cuh" + +#include + +#include +#include + +#include + +namespace cuvs::neighbors::brute_force { + +template +struct AnnBruteForceInputs { + IdxT num_queries; + IdxT num_db_vecs; + IdxT dim; + IdxT k; + cuvs::distance::DistanceType metric; + float metric_arg = 0.0f; +}; + +template +::std::ostream& operator<<(::std::ostream& os, const AnnBruteForceInputs& p) +{ + os << "{ " << p.num_queries << ", " << p.num_db_vecs << ", " << p.dim << ", " << p.k << ", " + << static_cast(p.metric) << static_cast(p.metric_arg) << '}' << std::endl; + return os; +} + +template +class AnnBruteForceTest : public ::testing::TestWithParam> { + public: + AnnBruteForceTest() + : stream_(raft::resource::get_cuda_stream(handle_)), + ps(::testing::TestWithParam>::GetParam()), + database(0, stream_), + search_queries(0, stream_) + { + } + + void testBruteForce() + { + size_t queries_size = ps.num_queries * ps.k; + + rmm::device_uvector distances_naive_dev(queries_size, stream_); + rmm::device_uvector indices_naive_dev(queries_size, stream_); + + cuvs::neighbors::naive_knn(handle_, + distances_naive_dev.data(), + indices_naive_dev.data(), + search_queries.data(), + database.data(), + ps.num_queries, + ps.num_db_vecs, + ps.dim, + ps.k, + ps.metric); + raft::resource::sync_stream(handle_); + + { + // Require exact result for brute force + rmm::device_uvector distances_bruteforce_dev(queries_size, stream_); + rmm::device_uvector indices_bruteforce_dev(queries_size, stream_); + + auto idx = [this]() { + auto database_view = raft::make_device_matrix_view( + (const DataT*)database.data(), ps.num_db_vecs, ps.dim); + + return brute_force::build(handle_, database_view, ps.metric, ps.metric_arg); + }(); + + auto search_queries_view = raft::make_device_matrix_view( + search_queries.data(), ps.num_queries, ps.dim); + auto indices_out_view = raft::make_device_matrix_view( + indices_bruteforce_dev.data(), ps.num_queries, ps.k); + auto dists_out_view = raft::make_device_matrix_view( + distances_bruteforce_dev.data(), ps.num_queries, ps.k); + + brute_force::search( + handle_, idx, search_queries_view, indices_out_view, dists_out_view, std::nullopt); + + raft::resource::sync_stream(handle_); + + ASSERT_TRUE(cuvs::neighbors::devArrMatchKnnPair(indices_naive_dev.data(), + indices_bruteforce_dev.data(), + distances_naive_dev.data(), + distances_bruteforce_dev.data(), + ps.num_queries, + ps.k, + 0.001f, + stream_, + true)); + brute_force::search( + handle_, idx, search_queries_view, indices_out_view, dists_out_view, std::nullopt); + } + } + + void SetUp() override + { + database.resize(ps.num_db_vecs * ps.dim, stream_); + search_queries.resize(ps.num_queries * ps.dim, stream_); + + raft::random::RngState r(1234ULL); + if constexpr (std::is_same{} || std::is_same{}) { + raft::random::uniform( + handle_, r, database.data(), ps.num_db_vecs * ps.dim, DataT(0.1), DataT(2.0)); + raft::random::uniform( + handle_, r, search_queries.data(), ps.num_queries * ps.dim, DataT(0.1), DataT(2.0)); + } else { + raft::random::uniformInt( + handle_, r, database.data(), ps.num_db_vecs * ps.dim, DataT(1), DataT(20)); + raft::random::uniformInt( + handle_, r, search_queries.data(), ps.num_queries * ps.dim, DataT(1), DataT(20)); + } + raft::resource::sync_stream(handle_); + } + + void TearDown() override + { + raft::resource::sync_stream(handle_); + database.resize(0, stream_); + search_queries.resize(0, stream_); + } + + private: + raft::resources handle_; + rmm::cuda_stream_view stream_; + AnnBruteForceInputs ps; + rmm::device_uvector database; + rmm::device_uvector search_queries; +}; + +const std::vector> inputs = { + // test various dims (aligned and not aligned to vector sizes) + {1000, 10000, 1, 16, cuvs::distance::DistanceType::L2Expanded}, + {1000, 10000, 2, 16, cuvs::distance::DistanceType::L2Expanded}, + {1000, 10000, 3, 16, cuvs::distance::DistanceType::L2Expanded}, + {1000, 10000, 4, 16, cuvs::distance::DistanceType::L2Expanded}, + {1000, 10000, 5, 16, cuvs::distance::DistanceType::InnerProduct}, + {1000, 10000, 8, 16, cuvs::distance::DistanceType::InnerProduct}, + {1000, 10000, 5, 16, cuvs::distance::DistanceType::L2SqrtExpanded}, + {1000, 10000, 8, 16, cuvs::distance::DistanceType::L2SqrtExpanded}, + + // test dims that do not fit into kernel shared memory limits + {1000, 10000, 2048, 16, cuvs::distance::DistanceType::L2Expanded}, + {1000, 10000, 2049, 16, cuvs::distance::DistanceType::L2Expanded}, + {1000, 10000, 2050, 16, cuvs::distance::DistanceType::InnerProduct}, + {1000, 10000, 2051, 16, cuvs::distance::DistanceType::InnerProduct}, + {1000, 10000, 2052, 16, cuvs::distance::DistanceType::InnerProduct}, + {1000, 10000, 2053, 16, cuvs::distance::DistanceType::L2Expanded}, + {1000, 10000, 2056, 16, cuvs::distance::DistanceType::L2Expanded}, + + // test fused_l2_knn + {100, 1000, 16, 10, cuvs::distance::DistanceType::L2Expanded}, + {256, 256, 30, 10, cuvs::distance::DistanceType::L2Expanded}, + {1000, 10000, 16, 10, cuvs::distance::DistanceType::L2Expanded}, + {100, 1000, 16, 50, cuvs::distance::DistanceType::L2Expanded}, + {20, 10000, 16, 10, cuvs::distance::DistanceType::L2Expanded}, + {1000, 10000, 16, 50, cuvs::distance::DistanceType::L2Expanded}, + {1000, 10000, 32, 50, cuvs::distance::DistanceType::L2Expanded}, + {10000, 40000, 32, 30, cuvs::distance::DistanceType::L2Expanded}, + {100, 1000, 16, 10, cuvs::distance::DistanceType::L2Unexpanded}, + {1000, 10000, 16, 10, cuvs::distance::DistanceType::L2Unexpanded}, + {100, 1000, 16, 50, cuvs::distance::DistanceType::L2Unexpanded}, + {20, 10000, 16, 50, cuvs::distance::DistanceType::L2Unexpanded}, + {1000, 10000, 16, 50, cuvs::distance::DistanceType::L2Unexpanded}, + {1000, 10000, 32, 50, cuvs::distance::DistanceType::L2Unexpanded}, + {10000, 40000, 32, 30, cuvs::distance::DistanceType::L2Unexpanded}, + + // test tile + {256, 512, 16, 8, cuvs::distance::DistanceType::L2Expanded}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2Unexpanded}, + {256, 512, 16, 8, cuvs::distance::DistanceType::InnerProduct}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded}, + {10000, 40000, 32, 30, cuvs::distance::DistanceType::L2Expanded}, + {789, 20516, 64, 256, cuvs::distance::DistanceType::L2SqrtExpanded}, + {4, 12, 32, 6, cuvs::distance::DistanceType::L2Expanded}, + {1, 40, 32, 30, cuvs::distance::DistanceType::L2Expanded}, + {1000, 500000, 128, 128, cuvs::distance::DistanceType::L2Expanded}}; +} // namespace cuvs::neighbors::brute_force diff --git a/cpp/test/neighbors/ann_brute_force/test_float.cu b/cpp/test/neighbors/ann_brute_force/test_float.cu new file mode 100644 index 0000000000..ded371c427 --- /dev/null +++ b/cpp/test/neighbors/ann_brute_force/test_float.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ann_brute_force.cuh" + +#include + +namespace cuvs::neighbors::brute_force { + +using AnnBruteForceTest_float = AnnBruteForceTest; +TEST_P(AnnBruteForceTest_float, AnnBruteForce) { this->testBruteForce(); } + +INSTANTIATE_TEST_CASE_P(AnnBruteForceTest, AnnBruteForceTest_float, ::testing::ValuesIn(inputs)); + +} // namespace cuvs::neighbors::brute_force diff --git a/cpp/test/neighbors/ann_brute_force/test_half.cu b/cpp/test/neighbors/ann_brute_force/test_half.cu new file mode 100644 index 0000000000..39b7b79822 --- /dev/null +++ b/cpp/test/neighbors/ann_brute_force/test_half.cu @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ann_brute_force.cuh" + +#include + +namespace cuvs::neighbors::brute_force { + +using AnnBruteForceTest_half_float = AnnBruteForceTest; +TEST_P(AnnBruteForceTest_half_float, AnnBruteForce) { this->testBruteForce(); } + +INSTANTIATE_TEST_CASE_P(AnnBruteForceTest, + AnnBruteForceTest_half_float, + ::testing::ValuesIn(inputs)); + +} // namespace cuvs::neighbors::brute_force diff --git a/cpp/test/neighbors/ann_hnsw_c.cu b/cpp/test/neighbors/ann_hnsw_c.cu new file mode 100644 index 0000000000..fc740b924c --- /dev/null +++ b/cpp/test/neighbors/ann_hnsw_c.cu @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.cuh" +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +float dataset[4][2] = {{0.74021935, 0.9209938}, + {0.03902049, 0.9689629}, + {0.92514056, 0.4463501}, + {0.6673192, 0.10993068}}; +float queries[4][2] = {{0.48216683, 0.0428398}, + {0.5084142, 0.6545497}, + {0.51260436, 0.2643005}, + {0.05198065, 0.5789965}}; + +std::vector neighbors_exp = {3, 0, 3, 1}; +std::vector distances_exp = {0.03878258, 0.12472608, 0.04776672, 0.15224178}; + +TEST(CagraHnswC, BuildSearch) +{ + // create cuvsResources_t + cuvsResources_t res; + cuvsResourcesCreate(&res); + + // create dataset DLTensor + DLManagedTensor dataset_tensor; + dataset_tensor.dl_tensor.data = dataset; + dataset_tensor.dl_tensor.device.device_type = kDLCPU; + dataset_tensor.dl_tensor.ndim = 2; + dataset_tensor.dl_tensor.dtype.code = kDLFloat; + dataset_tensor.dl_tensor.dtype.bits = 32; + dataset_tensor.dl_tensor.dtype.lanes = 1; + int64_t dataset_shape[2] = {4, 2}; + dataset_tensor.dl_tensor.shape = dataset_shape; + dataset_tensor.dl_tensor.strides = nullptr; + + // create index + cuvsCagraIndex_t index; + cuvsCagraIndexCreate(&index); + + // build index + cuvsCagraIndexParams_t build_params; + cuvsCagraIndexParamsCreate(&build_params); + cuvsCagraBuild(res, build_params, &dataset_tensor, index); + cuvsCagraSerializeToHnswlib(res, "/tmp/cagra_hnswlib.index", index); + + DLManagedTensor queries_tensor; + queries_tensor.dl_tensor.data = queries; + queries_tensor.dl_tensor.device.device_type = kDLCPU; + queries_tensor.dl_tensor.ndim = 2; + queries_tensor.dl_tensor.dtype.code = kDLFloat; + queries_tensor.dl_tensor.dtype.bits = 32; + queries_tensor.dl_tensor.dtype.lanes = 1; + int64_t queries_shape[2] = {4, 2}; + queries_tensor.dl_tensor.shape = queries_shape; + queries_tensor.dl_tensor.strides = nullptr; + + // create neighbors DLTensor + std::vector neighbors(4); + + DLManagedTensor neighbors_tensor; + neighbors_tensor.dl_tensor.data = neighbors.data(); + neighbors_tensor.dl_tensor.device.device_type = kDLCPU; + neighbors_tensor.dl_tensor.ndim = 2; + neighbors_tensor.dl_tensor.dtype.code = kDLUInt; + neighbors_tensor.dl_tensor.dtype.bits = 64; + neighbors_tensor.dl_tensor.dtype.lanes = 1; + int64_t neighbors_shape[2] = {4, 1}; + neighbors_tensor.dl_tensor.shape = neighbors_shape; + neighbors_tensor.dl_tensor.strides = nullptr; + + // create distances DLTensor + std::vector distances(4); + + DLManagedTensor distances_tensor; + distances_tensor.dl_tensor.data = distances.data(); + distances_tensor.dl_tensor.device.device_type = kDLCPU; + distances_tensor.dl_tensor.ndim = 2; + distances_tensor.dl_tensor.dtype.code = kDLFloat; + distances_tensor.dl_tensor.dtype.bits = 32; + distances_tensor.dl_tensor.dtype.lanes = 1; + int64_t distances_shape[2] = {4, 1}; + distances_tensor.dl_tensor.shape = distances_shape; + distances_tensor.dl_tensor.strides = nullptr; + + // create hnsw index + cuvsHnswIndex_t hnsw_index; + cuvsHnswIndexCreate(&hnsw_index); + hnsw_index->dtype = index->dtype; + cuvsHnswDeserialize(res, "/tmp/cagra_hnswlib.index", 2, L2Expanded, hnsw_index); + + // search index + cuvsHnswSearchParams_t search_params; + cuvsHnswSearchParamsCreate(&search_params); + cuvsHnswSearch( + res, search_params, hnsw_index, &queries_tensor, &neighbors_tensor, &distances_tensor); + + // verify output + ASSERT_TRUE(cuvs::hostVecMatch(neighbors_exp, neighbors, cuvs::Compare())); + ASSERT_TRUE(cuvs::hostVecMatch(distances_exp, distances, cuvs::CompareApprox(0.001f))); + + cuvsCagraIndexParamsDestroy(build_params); + cuvsCagraIndexDestroy(index); + cuvsHnswSearchParamsDestroy(search_params); + cuvsHnswIndexDestroy(hnsw_index); + cuvsResourcesDestroy(res); +} diff --git a/cpp/test/neighbors/ann_ivf_flat.cuh b/cpp/test/neighbors/ann_ivf_flat.cuh index f907baa1fb..17ec84097c 100644 --- a/cpp/test/neighbors/ann_ivf_flat.cuh +++ b/cpp/test/neighbors/ann_ivf_flat.cuh @@ -20,7 +20,9 @@ #include "naive_knn.cuh" #include +#include #include +#include #include #include @@ -533,63 +535,109 @@ const std::vector> inputs = { // test various dims (aligned and not aligned to vector sizes) {1000, 10000, 1, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, true}, {1000, 10000, 2, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 2, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, {1000, 10000, 3, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 10000, 3, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, {1000, 10000, 4, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 4, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, {1000, 10000, 5, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 5, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, {1000, 10000, 8, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, true}, + {1000, 10000, 8, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, {1000, 10000, 5, 16, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + {1000, 10000, 5, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, {1000, 10000, 8, 16, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, true}, + {1000, 10000, 8, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, // test dims that do not fit into kernel shared memory limits {1000, 10000, 2048, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 2048, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, {1000, 10000, 2049, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 2049, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, {1000, 10000, 2050, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 2050, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, {1000, 10000, 2051, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, true}, + {1000, 10000, 2051, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, {1000, 10000, 2052, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 2052, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, {1000, 10000, 2053, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 10000, 2053, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, {1000, 10000, 2056, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 10000, 2056, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, // various random combinations {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::L2Expanded, false}, + {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::CosineExpanded, false}, {20, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {20, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, {1000, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, {10000, 131072, 8, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {10000, 131072, 8, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, // host input data {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false, true}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false, true}, {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::L2Expanded, false, true}, + {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::CosineExpanded, false, true}, {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::L2Expanded, false, true}, + {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::CosineExpanded, false, true}, {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::L2Expanded, false, true}, + {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::CosineExpanded, false, true}, {20, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, false, true}, + {20, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, false, true}, {1000, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, false, true}, + {1000, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, false, true}, {10000, 131072, 8, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, false, true}, + {10000, 131072, 8, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, false, true}, // // host input data with prefetching for kernel copy overlapping {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false, true, true}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false, true, true}, {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::L2Expanded, false, true, true}, + {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::CosineExpanded, false, true, true}, {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::L2Expanded, false, true, true}, + {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::CosineExpanded, false, true, true}, {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::L2Expanded, false, true, true}, + {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::CosineExpanded, false, true, true}, {20, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, false, true, true}, + {20, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, false, true, true}, {1000, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, false, true, true}, + {1000, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, false, true, true}, {10000, 131072, 8, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, false, true, true}, + {10000, 131072, 8, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, false, true, true}, {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, true}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::InnerProduct, true}, + {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::InnerProduct, true}, + {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::CosineExpanded, true}, {20, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::InnerProduct, true}, + {20, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, {1000, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, {10000, 131072, 8, 10, 50, 1024, cuvs::distance::DistanceType::InnerProduct, true}, + {10000, 131072, 8, 10, 50, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, {1000, 10000, 4096, 20, 50, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 4096, 20, 50, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, // test splitting the big query batches (> max gridDim.y) into smaller batches {100000, 1024, 32, 10, 64, 64, cuvs::distance::DistanceType::InnerProduct, false}, + {100000, 1024, 32, 10, 64, 64, cuvs::distance::DistanceType::CosineExpanded, false}, {1000000, 1024, 32, 10, 256, 256, cuvs::distance::DistanceType::InnerProduct, false}, + {1000000, 1024, 32, 10, 256, 256, cuvs::distance::DistanceType::CosineExpanded, false}, {98306, 1024, 32, 10, 64, 64, cuvs::distance::DistanceType::InnerProduct, true}, + {98306, 1024, 32, 10, 64, 64, cuvs::distance::DistanceType::CosineExpanded, true}, // test radix_sort for getting the cluster selection {1000, @@ -608,6 +656,14 @@ const std::vector> inputs = { raft::matrix::detail::select::warpsort::kMaxCapacity * 4, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, + 10000, + 16, + 10, + raft::matrix::detail::select::warpsort::kMaxCapacity * 4, + raft::matrix::detail::select::warpsort::kMaxCapacity * 4, + cuvs::distance::DistanceType::CosineExpanded, + false}, // The following two test cases should show very similar recall. // num_queries, num_db_vecs, dim, k, nprobe, nlist, metric, adaptive_centers diff --git a/cpp/test/neighbors/brute_force.cu b/cpp/test/neighbors/brute_force.cu index c97bb5531c..f1a05e045d 100644 --- a/cpp/test/neighbors/brute_force.cu +++ b/cpp/test/neighbors/brute_force.cu @@ -25,9 +25,13 @@ #include #include +#include + namespace cuvs::neighbors::brute_force { + +template struct KNNInputs { - std::vector> input; + std::vector> input; int k; std::vector labels; }; @@ -53,11 +57,11 @@ RAFT_KERNEL build_expected_output(int* output, int n_rows, int k, const int* lab } } -template -class KNNTest : public ::testing::TestWithParam { +template +class KNNTest : public ::testing::TestWithParam> { public: KNNTest() - : params_(::testing::TestWithParam::GetParam()), + : params_(::testing::TestWithParam>::GetParam()), stream(raft::resource::get_cuda_stream(handle)), actual_labels_(0, stream), expected_labels_(0, stream), @@ -85,7 +89,7 @@ class KNNTest : public ::testing::TestWithParam { auto indices = raft::make_device_matrix_view(indices_.data(), rows_, k_); auto distances = - raft::make_device_matrix_view(distances_.data(), rows_, k_); + raft::make_device_matrix_view(distances_.data(), rows_, k_); auto metric = cuvs::distance::DistanceType::L2Unexpanded; auto idx = cuvs::neighbors::brute_force::build(handle, index, metric); @@ -119,23 +123,22 @@ class KNNTest : public ::testing::TestWithParam { cudaMemsetAsync(actual_labels_.data(), 0, actual_labels_.size() * sizeof(int), stream)); RAFT_CUDA_TRY( cudaMemsetAsync(expected_labels_.data(), 0, expected_labels_.size() * sizeof(int), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(input_.data(), 0, input_.size() * sizeof(float), stream)); - RAFT_CUDA_TRY( - cudaMemsetAsync(search_data_.data(), 0, search_data_.size() * sizeof(float), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(input_.data(), 0, input_.size() * sizeof(T), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(search_data_.data(), 0, search_data_.size() * sizeof(T), stream)); RAFT_CUDA_TRY(cudaMemsetAsync(indices_.data(), 0, indices_.size() * sizeof(IdxT), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(distances_.data(), 0, distances_.size() * sizeof(float), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(distances_.data(), 0, distances_.size() * sizeof(DistT), stream)); RAFT_CUDA_TRY( cudaMemsetAsync(search_labels_.data(), 0, search_labels_.size() * sizeof(int), stream)); - std::vector row_major_input; + std::vector row_major_input; for (std::size_t i = 0; i < params_.input.size(); ++i) { for (std::size_t j = 0; j < params_.input[i].size(); ++j) { row_major_input.push_back(params_.input[i][j]); } } rmm::device_buffer input_d = - rmm::device_buffer(row_major_input.data(), row_major_input.size() * sizeof(float), stream); - float* input_ptr = static_cast(input_d.data()); + rmm::device_buffer(row_major_input.data(), row_major_input.size() * sizeof(T), stream); + T* input_ptr = static_cast(input_d.data()); rmm::device_buffer labels_d = rmm::device_buffer(params_.labels.data(), params_.labels.size() * sizeof(int), stream); @@ -151,13 +154,13 @@ class KNNTest : public ::testing::TestWithParam { raft::resources handle; cudaStream_t stream; - KNNInputs params_; + KNNInputs params_; int rows_; int cols_; - rmm::device_uvector input_; - rmm::device_uvector search_data_; + rmm::device_uvector input_; + rmm::device_uvector search_data_; rmm::device_uvector indices_; - rmm::device_uvector distances_; + rmm::device_uvector distances_; int k_; rmm::device_uvector search_labels_; @@ -165,7 +168,8 @@ class KNNTest : public ::testing::TestWithParam { rmm::device_uvector expected_labels_; }; -const std::vector inputs = { +template +const std::vector> inputs = { // 2D {{ {2.7810836, 2.550537003}, @@ -182,10 +186,14 @@ const std::vector inputs = { 2, {0, 0, 0, 0, 0, 1, 1, 1, 1, 1}}}; -typedef KNNTest KNNTestFint64_t; -TEST_P(KNNTestFint64_t, BruteForce) { this->testBruteForce(); } +typedef KNNTest KNNTest_float_int64_t; +TEST_P(KNNTest_float_int64_t, BruteForce) { this->testBruteForce(); } + +typedef KNNTest KNNTest_half_int64_t; +TEST_P(KNNTest_half_int64_t, BruteForce) { this->testBruteForce(); } -INSTANTIATE_TEST_CASE_P(KNNTest, KNNTestFint64_t, ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_CASE_P(KNNTest, KNNTest_float_int64_t, ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_CASE_P(KNNTest, KNNTest_half_int64_t, ::testing::ValuesIn(inputs)); // Also test with larger random inputs, including col-major inputs struct RandomKNNInputs { @@ -205,7 +213,7 @@ std::ostream& operator<<(std::ostream& os, const RandomKNNInputs& input) << " row_major:" << input.row_major; } -template +template class RandomBruteForceKNNTest : public ::testing::TestWithParam { public: RandomBruteForceKNNTest() @@ -229,67 +237,153 @@ class RandomBruteForceKNNTest : public ::testing::TestWithParam raft::matrix::fill( handle_, raft::make_device_matrix_view(cuvs_distances_.data(), params_.num_queries, params_.k), - T{0.0}); + DistT{0.0}); raft::matrix::fill( handle_, raft::make_device_matrix_view(ref_distances_.data(), params_.num_queries, params_.k), - T{0.0}); + DistT{0.0}); } protected: + void cpu_distance(const T* d_A, + const T* d_B, + DistT* d_vals, + bool is_row_major_A, + bool is_row_major_B, + bool is_row_major_C, + cudaStream_t stream, + DistT alpha = 1.0, + DistT beta = 0.0) + { + size_t size_A = params_.num_queries * params_.dim * sizeof(T); + size_t size_B = params_.num_db_vecs * params_.dim * sizeof(T); + size_t size_vals = params_.num_queries * params_.num_db_vecs * sizeof(DistT); + + T* h_A = static_cast(malloc(size_A)); + T* h_B = static_cast(malloc(size_B)); + DistT* h_vals = static_cast(malloc(size_vals)); + + cudaMemcpyAsync(h_A, d_A, size_A, cudaMemcpyDeviceToHost, stream); + cudaMemcpyAsync(h_B, d_B, size_B, cudaMemcpyDeviceToHost, stream); + cudaMemcpyAsync(h_vals, d_vals, size_vals, cudaMemcpyDeviceToHost, stream); + cudaStreamSynchronize(stream); + + bool trans_a = is_row_major_A; + bool trans_b = is_row_major_B; + bool trans_c = is_row_major_C; + + for (int64_t i = 0; i < params_.num_queries; ++i) { + for (int64_t j = 0; j < params_.num_db_vecs; ++j) { + DistT sum = 0; + DistT norms_A = 0; + DistT norms_B = 0; + + for (int64_t l = 0; l < params_.dim; ++l) { + int64_t a_index = trans_a ? i * params_.dim + l : l * params_.num_queries + i; + int64_t b_index = trans_b ? j * params_.dim + l : l * params_.num_db_vecs + j; + DistT A_v; + DistT B_v; + if constexpr (sizeof(T) == 2) { + A_v = __half2float(h_A[a_index]); + B_v = __half2float(h_B[b_index]); + } else { + A_v = h_A[a_index]; + B_v = h_B[b_index]; + } + + sum += A_v * B_v; + + norms_A += A_v * A_v; + norms_B += B_v * B_v; + } + + int64_t c_index = trans_c ? i * params_.num_db_vecs + j : j * params_.num_queries + i; + + h_vals[c_index] = alpha * sum + beta * h_vals[c_index]; + if (params_.metric == cuvs::distance::DistanceType::L2Expanded) { + h_vals[c_index] = DistT(-2.0) * h_vals[c_index] + norms_A + norms_B; + } else if (params_.metric == cuvs::distance::DistanceType::L2SqrtExpanded) { + h_vals[c_index] = std::sqrt(DistT(-2.0) * h_vals[c_index] + norms_A + norms_B); + } else if (params_.metric == cuvs::distance::DistanceType::CosineExpanded) { + h_vals[c_index] = DistT(1.0) - h_vals[c_index] / std::sqrt(norms_A * norms_B); + } + } + } + cudaMemcpyAsync(d_vals, h_vals, size_vals, cudaMemcpyHostToDevice, stream); + cudaStreamSynchronize(stream); + + free(h_A); + free(h_B); + free(h_vals); + } + void testBruteForce() { - float metric_arg = 3.0; + DistT metric_arg = 3.0; // calculate the naive knn, by calculating the full pairwise distances and doing a k-select - rmm::device_uvector temp_distances(num_db_vecs * num_queries, stream_); + rmm::device_uvector temp_distances(num_db_vecs * num_queries, stream_); rmm::device_uvector workspace(0, stream_); auto temp_dist = temp_distances.data(); - rmm::device_uvector temp_row_major_dist(num_db_vecs * num_queries, stream_); - - if (params_.row_major) { - distance::pairwise_distance( - handle_, - raft::make_device_matrix_view( - search_queries.data(), params_.num_queries, params_.dim), - raft::make_device_matrix_view( - database.data(), params_.num_db_vecs, params_.dim), - raft::make_device_matrix_view(temp_distances.data(), num_queries, num_db_vecs), - metric, - metric_arg); - + rmm::device_uvector temp_row_major_dist(num_db_vecs * num_queries, stream_); + + // For the complex post processes in these algorithms, we use CPU logic to make the baseline. + if (metric == cuvs::distance::DistanceType::L2Expanded || + metric == cuvs::distance::DistanceType::L2SqrtExpanded || + metric == cuvs::distance::DistanceType::CosineExpanded) { + cpu_distance(search_queries.data(), + database.data(), + temp_distances.data(), + params_.row_major, + params_.row_major, + true, + stream_); } else { - distance::pairwise_distance(handle_, - raft::make_device_matrix_view( - search_queries.data(), params_.num_queries, params_.dim), - raft::make_device_matrix_view( - database.data(), params_.num_db_vecs, params_.dim), - raft::make_device_matrix_view( - temp_distances.data(), num_queries, num_db_vecs), - metric, - metric_arg); - - // the pairwisse_distance call assumes that the inputs and outputs are all either row-major - // or col-major - meaning we have to transpose the output back for col-major queries - // for comparison - raft::linalg::transpose( - handle_, temp_dist, temp_row_major_dist.data(), num_queries, num_db_vecs, stream_); - temp_dist = temp_row_major_dist.data(); + if (params_.row_major) { + distance::pairwise_distance(handle_, + raft::make_device_matrix_view( + search_queries.data(), params_.num_queries, params_.dim), + raft::make_device_matrix_view( + database.data(), params_.num_db_vecs, params_.dim), + raft::make_device_matrix_view( + temp_distances.data(), num_queries, num_db_vecs), + metric, + metric_arg); + + } else { + distance::pairwise_distance( + handle_, + raft::make_device_matrix_view( + search_queries.data(), params_.num_queries, params_.dim), + raft::make_device_matrix_view( + database.data(), params_.num_db_vecs, params_.dim), + raft::make_device_matrix_view( + temp_distances.data(), num_queries, num_db_vecs), + metric, + metric_arg); + + // the pairwise_distance call assumes that the inputs and outputs are all either row-major + // or col-major - meaning we have to transpose the output back for col-major queries + // for comparison + raft::linalg::transpose( + handle_, temp_dist, temp_row_major_dist.data(), num_queries, num_db_vecs, stream_); + temp_dist = temp_row_major_dist.data(); + } } cuvs::selection::select_k( handle_, - raft::make_device_matrix_view(temp_dist, num_queries, num_db_vecs), + raft::make_device_matrix_view(temp_dist, num_queries, num_db_vecs), std::nullopt, raft::make_device_matrix_view(ref_distances_.data(), params_.num_queries, params_.k), raft::make_device_matrix_view(ref_indices_.data(), params_.num_queries, params_.k), cuvs::distance::is_min_close(metric), true); - auto indices = raft::make_device_matrix_view( + auto indices = raft::make_device_matrix_view( cuvs_indices_.data(), params_.num_queries, params_.k); - auto distances = raft::make_device_matrix_view( + auto distances = raft::make_device_matrix_view( cuvs_distances_.data(), params_.num_queries, params_.k); if (params_.row_major) { @@ -332,7 +426,7 @@ class RandomBruteForceKNNTest : public ::testing::TestWithParam cuvs_distances_.data(), num_queries, k_, - float(0.001), + DistT(0.001), stream_, true)); } @@ -364,16 +458,16 @@ class RandomBruteForceKNNTest : public ::testing::TestWithParam rmm::device_uvector database; rmm::device_uvector search_queries; rmm::device_uvector cuvs_indices_; - rmm::device_uvector cuvs_distances_; + rmm::device_uvector cuvs_distances_; rmm::device_uvector ref_indices_; - rmm::device_uvector ref_distances_; + rmm::device_uvector ref_distances_; int k_; cuvs::distance::DistanceType metric; }; const std::vector random_inputs = { // test each distance metric on a small-ish input, with row-major inputs - {256, 512, 16, 8, cuvs::distance::DistanceType::L2Expanded, true}, + {100, 256, 2, 65, cuvs::distance::DistanceType::L2Expanded, true}, {256, 512, 16, 8, cuvs::distance::DistanceType::L2Unexpanded, true}, {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, true}, {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtUnexpanded, true}, @@ -387,7 +481,7 @@ const std::vector random_inputs = { {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, true}, {256, 512, 16, 8, cuvs::distance::DistanceType::Canberra, true}, // test each distance metric with col-major inputs - {256, 512, 16, 8, cuvs::distance::DistanceType::L2Expanded, false}, + {256, 512, 16, 7, cuvs::distance::DistanceType::L2Expanded, false}, {256, 512, 16, 8, cuvs::distance::DistanceType::L2Unexpanded, false}, {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, false}, {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtUnexpanded, false}, @@ -404,17 +498,24 @@ const std::vector random_inputs = { {10000, 40000, 32, 30, cuvs::distance::DistanceType::L2Expanded, false}, {345, 1023, 16, 128, cuvs::distance::DistanceType::CosineExpanded, true}, {789, 20516, 64, 256, cuvs::distance::DistanceType::L2SqrtExpanded, false}, - {1000, 500000, 128, 128, cuvs::distance::DistanceType::L2Expanded, true}, - {1000, 500000, 128, 128, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 200000, 128, 128, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 200000, 128, 128, cuvs::distance::DistanceType::L2Expanded, false}, {1000, 5000, 128, 128, cuvs::distance::DistanceType::LpUnexpanded, true}, {1000, 5000, 128, 128, cuvs::distance::DistanceType::L2SqrtExpanded, false}, {1000, 5000, 128, 128, cuvs::distance::DistanceType::InnerProduct, false}}; -typedef RandomBruteForceKNNTest RandomBruteForceKNNTestF; +typedef RandomBruteForceKNNTest RandomBruteForceKNNTestF; TEST_P(RandomBruteForceKNNTestF, BruteForce) { this->testBruteForce(); } +typedef RandomBruteForceKNNTest RandomBruteForceKNNTestH; +TEST_P(RandomBruteForceKNNTestH, BruteForce) { this->testBruteForce(); } + INSTANTIATE_TEST_CASE_P(RandomBruteForceKNNTest, RandomBruteForceKNNTestF, ::testing::ValuesIn(random_inputs)); +INSTANTIATE_TEST_CASE_P(RandomBruteForceKNNTest, + RandomBruteForceKNNTestH, + ::testing::ValuesIn(random_inputs)); + } // namespace cuvs::neighbors::brute_force diff --git a/cpp/test/neighbors/brute_force_prefiltered.cu b/cpp/test/neighbors/brute_force_prefiltered.cu index 2b8ae9d9a6..9304ee0451 100644 --- a/cpp/test/neighbors/brute_force_prefiltered.cu +++ b/cpp/test/neighbors/brute_force_prefiltered.cu @@ -30,7 +30,11 @@ #include +#include +#include #include +#include +#include #include #include @@ -94,6 +98,10 @@ RAFT_KERNEL normalize_kernel( } } +struct float_to_half { + __host__ __device__ __half operator()(const float x) const { return __float2half(x); } +}; + template void normalize(OutT* theta, const InT* in_vals, @@ -137,7 +145,8 @@ void set_bitmap(const index_t* src, <<>>(src, dst, bitmap, n_edges, n_cols); RAFT_CUDA_TRY(cudaGetLastError()); } -template + +template class PrefilteredBruteForceTest : public ::testing::TestWithParam> { public: @@ -157,22 +166,22 @@ class PrefilteredBruteForceTest protected: index_t create_sparse_matrix_with_rmat(index_t m, index_t n, - value_t sparsity, + float sparsity, rmm::device_uvector& filter_d) { index_t r_scale = (index_t)std::log2(m); index_t c_scale = (index_t)std::log2(n); - index_t n_edges = (index_t)(m * n * 1.0 * sparsity); + index_t n_edges = (index_t)(m * n * 1.0f * sparsity); index_t max_scale = std::max(r_scale, c_scale); rmm::device_uvector out_src{(unsigned long)n_edges, stream}; rmm::device_uvector out_dst{(unsigned long)n_edges, stream}; - rmm::device_uvector theta{(unsigned long)(4 * max_scale), stream}; + rmm::device_uvector theta{(unsigned long)(4 * max_scale), stream}; raft::random::RngState state{2024ULL, raft::random::GeneratorType::GenPC}; - raft::random::uniform(handle, state, theta.data(), theta.size(), 0.0f, 1.0f); - normalize( + raft::random::uniform(handle, state, theta.data(), theta.size(), 0.0f, 1.0f); + normalize( theta.data(), theta.data(), max_scale, r_scale, c_scale, r_scale != c_scale, true, stream); raft::random::rmat_rectangular_gen((index_t*)nullptr, out_src.data(), @@ -236,15 +245,15 @@ class PrefilteredBruteForceTest } } - void cpu_sddmm(const std::vector& A, - const std::vector& B, - std::vector& vals, + void cpu_sddmm(const std::vector& A, + const std::vector& B, + std::vector& vals, const std::vector& cols, const std::vector& row_ptrs, bool is_row_major_A, bool is_row_major_B, - value_t alpha = 1.0, - value_t beta = 0.0) + dist_t alpha = 1.0, + dist_t beta = 0.0) { if (params.n_queries * params.dim != static_cast(A.size()) || params.dim * params.n_dataset != static_cast(B.size())) { @@ -257,24 +266,35 @@ class PrefilteredBruteForceTest for (index_t i = 0; i < params.n_queries; ++i) { for (index_t j = row_ptrs[i]; j < row_ptrs[i + 1]; ++j) { - value_t sum = 0; - value_t norms_A = 0; - value_t norms_B = 0; + dist_t sum = 0; + dist_t norms_A = 0; + dist_t norms_B = 0; + for (index_t l = 0; l < params.dim; ++l) { index_t a_index = trans_a ? i * params.dim + l : l * params.n_queries + i; index_t b_index = trans_b ? l * params.n_dataset + cols[j] : cols[j] * params.dim + l; - sum += A[a_index] * B[b_index]; - - norms_A += A[a_index] * A[a_index]; - norms_B += B[b_index] * B[b_index]; + dist_t A_v; + dist_t B_v; + if constexpr (sizeof(value_t) == 2) { + A_v = __half2float(__float2half(A[a_index])); + B_v = __half2float(__float2half(B[b_index])); + } else { + A_v = A[a_index]; + B_v = B[b_index]; + } + + sum += A_v * B_v; + + norms_A += A_v * A_v; + norms_B += B_v * B_v; } vals[j] = alpha * sum + beta * vals[j]; if (params.metric == cuvs::distance::DistanceType::L2Expanded) { - vals[j] = value_t(-2.0) * vals[j] + norms_A + norms_B; + vals[j] = dist_t(-2.0) * vals[j] + norms_A + norms_B; } else if (params.metric == cuvs::distance::DistanceType::L2SqrtExpanded) { - vals[j] = std::sqrt(value_t(-2.0) * vals[j] + norms_A + norms_B); + vals[j] = std::sqrt(dist_t(-2.0) * vals[j] + norms_A + norms_B); } else if (params.metric == cuvs::distance::DistanceType::CosineExpanded) { - vals[j] = value_t(1.0) - vals[j] / std::sqrt(norms_A * norms_B); + vals[j] = dist_t(1.0) - vals[j] / std::sqrt(norms_A * norms_B); } } } @@ -282,32 +302,31 @@ class PrefilteredBruteForceTest void cpu_select_k(const std::vector& indptr_h, const std::vector& indices_h, - const std::vector& values_h, + const std::vector& values_h, std::optional>& in_idx_h, index_t n_queries, index_t n_dataset, index_t top_k, - std::vector& out_values_h, + std::vector& out_values_h, std::vector& out_indices_h, bool select_min = true) { - auto comp = [select_min](const std::pair& a, - const std::pair& b) { + auto comp = [select_min](const std::pair& a, + const std::pair& b) { return select_min ? a.first < b.first : a.first >= b.first; }; for (index_t row = 0; row < n_queries; ++row) { - std::priority_queue, - std::vector>, + std::priority_queue, + std::vector>, decltype(comp)> pq(comp); - for (index_t idx = indptr_h[row]; idx < indptr_h[row + 1]; ++idx) { pq.push({values_h[idx], (in_idx_h.has_value()) ? (*in_idx_h)[idx] : indices_h[idx]}); if (pq.size() > size_t(top_k)) { pq.pop(); } } - std::vector> row_pairs; + std::vector> row_pairs; while (!pq.empty()) { row_pairs.push_back(pq.top()); pq.pop(); @@ -347,40 +366,80 @@ class PrefilteredBruteForceTest index_t dataset_size = params.n_dataset * params.dim; index_t queries_size = params.n_queries * params.dim; - std::vector dataset_h(dataset_size); - std::vector queries_h(queries_size); + std::vector dataset_h(dataset_size); + std::vector queries_h(queries_size); dataset_d.resize(dataset_size, stream); queries_d.resize(queries_size, stream); auto blobs_in_val = - raft::make_device_matrix(handle, 1, dataset_size + queries_size); + raft::make_device_matrix(handle, 1, dataset_size + queries_size); auto labels = raft::make_device_vector(handle, 1); - raft::random::make_blobs(blobs_in_val.data_handle(), - labels.data_handle(), - 1, - dataset_size + queries_size, - 1, - stream, - false, - nullptr, - nullptr, - value_t(1.0), - false, - value_t(-1.0f), - value_t(1.0f), - uint64_t(2024)); + if constexpr (!std::is_same_v) { + raft::random::make_blobs(blobs_in_val.data_handle(), + labels.data_handle(), + 1, + dataset_size + queries_size, + 1, + stream, + false, + nullptr, + nullptr, + value_t(1.0), + false, + value_t(-1.0f), + value_t(1.0f), + uint64_t(2024)); + } else { + raft::random::make_blobs(blobs_in_val.data_handle(), + labels.data_handle(), + 1, + dataset_size + queries_size, + 1, + stream, + false, + nullptr, + nullptr, + dist_t(1.0), + false, + dist_t(-1.0f), + dist_t(1.0f), + uint64_t(2024)); + } raft::copy(dataset_h.data(), blobs_in_val.data_handle(), dataset_size, stream); - raft::copy(dataset_d.data(), blobs_in_val.data_handle(), dataset_size, stream); + + if constexpr (std::is_same_v) { + thrust::device_ptr d_output_ptr = + thrust::device_pointer_cast(blobs_in_val.data_handle()); + thrust::device_ptr d_value_ptr = thrust::device_pointer_cast(dataset_d.data()); + thrust::transform(thrust::cuda::par.on(stream), + d_output_ptr, + d_output_ptr + dataset_size, + d_value_ptr, + float_to_half()); + } else { + raft::copy(dataset_d.data(), blobs_in_val.data_handle(), dataset_size, stream); + } raft::copy(queries_h.data(), blobs_in_val.data_handle() + dataset_size, queries_size, stream); - raft::copy(queries_d.data(), blobs_in_val.data_handle() + dataset_size, queries_size, stream); + if constexpr (std::is_same_v) { + thrust::device_ptr d_output_ptr = + thrust::device_pointer_cast(blobs_in_val.data_handle() + dataset_size); + thrust::device_ptr d_value_ptr = thrust::device_pointer_cast(queries_d.data()); + thrust::transform(thrust::cuda::par.on(stream), + d_output_ptr, + d_output_ptr + queries_size, + d_value_ptr, + float_to_half()); + } else { + raft::copy(queries_d.data(), blobs_in_val.data_handle() + dataset_size, queries_size, stream); + } raft::resource::sync_stream(handle); - std::vector values_h(nnz); + std::vector values_h(nnz); std::vector indices_h(nnz); std::vector indptr_h(params.n_queries + 1); @@ -390,9 +449,9 @@ class PrefilteredBruteForceTest bool select_min = cuvs::distance::is_min_close(params.metric); - std::vector out_val_h(params.n_queries * params.top_k, - select_min ? std::numeric_limits::infinity() - : std::numeric_limits::lowest()); + std::vector out_val_h( + params.n_queries * params.top_k, + select_min ? std::numeric_limits::infinity() : std::numeric_limits::lowest()); std::vector out_idx_h(params.n_queries * params.top_k, static_cast(0)); out_val_d.resize(params.n_queries * params.top_k, stream); @@ -404,7 +463,6 @@ class PrefilteredBruteForceTest raft::resource::sync_stream(handle); std::optional> optional_indices_h = std::nullopt; - cpu_select_k(indptr_h, indices_h, values_h, @@ -415,10 +473,11 @@ class PrefilteredBruteForceTest out_val_h, out_idx_h, select_min); - out_val_expected_d.resize(params.n_queries * params.top_k, stream); out_idx_expected_d.resize(params.n_queries * params.top_k, stream); + // dump_vector(out_val_h.data(), out_val_h.size(), "out_val_h"); + raft::update_device(out_val_expected_d.data(), out_val_h.data(), out_val_h.size(), stream); raft::update_device(out_idx_expected_d.data(), out_idx_h.data(), out_idx_h.size(), stream); @@ -438,12 +497,17 @@ class PrefilteredBruteForceTest auto filter = cuvs::core::bitmap_view( (const bitmap_t*)filter_d.data(), params.n_queries, params.n_dataset); - auto out_val = raft::make_device_matrix_view( + auto out_val = raft::make_device_matrix_view( out_val_d.data(), params.n_queries, params.top_k); auto out_idx = raft::make_device_matrix_view( out_idx_d.data(), params.n_queries, params.top_k); brute_force::search(handle, dataset, queries, out_idx, out_val, std::make_optional(filter)); + std::vector out_val_h(params.n_queries * params.top_k, + std::numeric_limits::infinity()); + + raft::update_host(out_val_h.data(), out_val_d.data(), out_val_h.size(), stream); + raft::resource::sync_stream(handle); ASSERT_TRUE(cuvs::neighbors::devArrMatchKnnPair(out_idx_expected_d.data(), out_idx.data_handle(), @@ -468,49 +532,61 @@ class PrefilteredBruteForceTest rmm::device_uvector queries_d; rmm::device_uvector filter_d; - rmm::device_uvector out_val_d; - rmm::device_uvector out_val_expected_d; + rmm::device_uvector out_val_d; + rmm::device_uvector out_val_expected_d; rmm::device_uvector out_idx_d; rmm::device_uvector out_idx_expected_d; }; -using PrefilteredBruteForceTest_float_int64 = PrefilteredBruteForceTest; +using PrefilteredBruteForceTest_float_int64 = PrefilteredBruteForceTest; TEST_P(PrefilteredBruteForceTest_float_int64, Result) { Run(); } +using PrefilteredBruteForceTest_half_int64 = PrefilteredBruteForceTest; +TEST_P(PrefilteredBruteForceTest_half_int64, Result) { Run(); } + template const std::vector> selectk_inputs = { + {8, 131072, 255, 255, 0.01, cuvs::distance::DistanceType::L2Expanded}, + {8, 131072, 255, 255, 0.01, cuvs::distance::DistanceType::InnerProduct}, + {8, 131072, 255, 255, 0.01, cuvs::distance::DistanceType::L2SqrtExpanded}, + {8, 131072, 255, 255, 0.01, cuvs::distance::DistanceType::CosineExpanded}, {2, 131072, 255, 255, 0.4, cuvs::distance::DistanceType::L2Expanded}, + {8, 131072, 512, 16, 0.5, cuvs::distance::DistanceType::L2Expanded}, {16, 131072, 2052, 16, 0.2, cuvs::distance::DistanceType::L2Expanded}, + {2, 8192, 255, 16, 0.01, cuvs::distance::DistanceType::InnerProduct}, {2, 8192, 255, 16, 0.4, cuvs::distance::DistanceType::InnerProduct}, {16, 8192, 512, 16, 0.5, cuvs::distance::DistanceType::InnerProduct}, + {128, 8192, 2052, 16, 0.2, cuvs::distance::DistanceType::InnerProduct}, {1024, 8192, 1, 0, 0.1, cuvs::distance::DistanceType::L2Expanded}, {1024, 8192, 3, 0, 0.1, cuvs::distance::DistanceType::InnerProduct}, {1024, 8192, 5, 0, 0.1, cuvs::distance::DistanceType::L2SqrtExpanded}, {1024, 8192, 8, 0, 0.1, cuvs::distance::DistanceType::CosineExpanded}, - {1024, 8192, 1, 1, 0.1, cuvs::distance::DistanceType::L2Expanded}, + {1024, 8192, 1, 1, 0.1, cuvs::distance::DistanceType::L2Expanded}, //-- {1024, 8192, 3, 1, 0.1, cuvs::distance::DistanceType::InnerProduct}, {1024, 8192, 5, 1, 0.1, cuvs::distance::DistanceType::L2SqrtExpanded}, {1024, 8192, 8, 1, 0.1, cuvs::distance::DistanceType::CosineExpanded}, - {1024, 8192, 2050, 16, 0.4, cuvs::distance::DistanceType::L2Expanded}, + {1024, 8192, 2051, 16, 0.5, cuvs::distance::DistanceType::L2Expanded}, {1024, 8192, 2052, 16, 0.2, cuvs::distance::DistanceType::L2Expanded}, {1024, 8192, 2050, 16, 0.4, cuvs::distance::DistanceType::InnerProduct}, {1024, 8192, 2051, 16, 0.5, cuvs::distance::DistanceType::InnerProduct}, {1024, 8192, 2052, 16, 0.2, cuvs::distance::DistanceType::InnerProduct}, + {1024, 8192, 2050, 16, 0.4, cuvs::distance::DistanceType::L2SqrtExpanded}, {1024, 8192, 2051, 16, 0.5, cuvs::distance::DistanceType::L2SqrtExpanded}, {1024, 8192, 2052, 16, 0.2, cuvs::distance::DistanceType::L2SqrtExpanded}, {1024, 8192, 2050, 16, 0.4, cuvs::distance::DistanceType::CosineExpanded}, {1024, 8192, 2051, 16, 0.5, cuvs::distance::DistanceType::CosineExpanded}, - {1024, 8192, 2052, 16, 0.2, cuvs::distance::DistanceType::CosineExpanded}, + {1024, 8192, 2052, 16, 0.2, cuvs::distance::DistanceType::CosineExpanded}, {1024, 8192, 1, 16, 0.5, cuvs::distance::DistanceType::L2Expanded}, {1024, 8192, 2, 16, 0.2, cuvs::distance::DistanceType::L2Expanded}, + {1024, 8192, 3, 16, 0.4, cuvs::distance::DistanceType::InnerProduct}, {1024, 8192, 4, 16, 0.5, cuvs::distance::DistanceType::InnerProduct}, {1024, 8192, 5, 16, 0.2, cuvs::distance::DistanceType::L2SqrtExpanded}, @@ -522,4 +598,8 @@ INSTANTIATE_TEST_CASE_P(PrefilteredBruteForceTest, PrefilteredBruteForceTest_float_int64, ::testing::ValuesIn(selectk_inputs)); +INSTANTIATE_TEST_CASE_P(PrefilteredBruteForceTest, + PrefilteredBruteForceTest_half_int64, + ::testing::ValuesIn(selectk_inputs)); + } // namespace cuvs::neighbors::brute_force diff --git a/cpp/test/neighbors/naive_knn.cuh b/cpp/test/neighbors/naive_knn.cuh index 90547150f2..553e667aa1 100644 --- a/cpp/test/neighbors/naive_knn.cuh +++ b/cpp/test/neighbors/naive_knn.cuh @@ -41,7 +41,9 @@ RAFT_KERNEL naive_distance_kernel(EvalT* dist, if (midx >= m) return; IdxT grid_size = IdxT(blockDim.y) * IdxT(gridDim.y); for (IdxT nidx = threadIdx.y + blockIdx.y * blockDim.y; nidx < n; nidx += grid_size) { - EvalT acc = EvalT(0); + EvalT acc = EvalT(0); + EvalT normX = EvalT(0); + EvalT normY = EvalT(0); for (IdxT i = 0; i < k; ++i) { IdxT xidx = i + midx * k; IdxT yidx = i + nidx * k; @@ -51,6 +53,11 @@ RAFT_KERNEL naive_distance_kernel(EvalT* dist, case cuvs::distance::DistanceType::InnerProduct: { acc += xv * yv; } break; + case cuvs::distance::DistanceType::CosineExpanded: { + acc += xv * yv; + normX += xv * xv; + normY += yv * yv; + } break; case cuvs::distance::DistanceType::L2SqrtExpanded: case cuvs::distance::DistanceType::L2SqrtUnexpanded: case cuvs::distance::DistanceType::L2Expanded: @@ -66,6 +73,9 @@ RAFT_KERNEL naive_distance_kernel(EvalT* dist, case cuvs::distance::DistanceType::L2SqrtUnexpanded: { acc = raft::sqrt(acc); } break; + case cuvs::distance::DistanceType::CosineExpanded: { + acc = 1 - acc / (raft::sqrt(normX) * raft::sqrt(normY)); + } default: break; } dist[midx * n + nidx] = acc; @@ -118,7 +128,7 @@ void naive_knn(raft::resources const& handle, static_cast(k), dist_topk + offset * k, indices_topk + offset * k, - type != cuvs::distance::DistanceType::InnerProduct, + cuvs::distance::is_min_close(type), mr); } RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); diff --git a/cpp/test/stats/silhouette_score.cu b/cpp/test/stats/silhouette_score.cu new file mode 100644 index 0000000000..7662763fd7 --- /dev/null +++ b/cpp/test/stats/silhouette_score.cu @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "../test_utils.cuh" + +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include + +namespace cuvs { +namespace stats { + +// parameter structure definition +struct silhouetteScoreParam { + int nRows; + int nCols; + int nLabels; + cuvs::distance::DistanceType metric; + int chunk; + double tolerance; +}; + +// test fixture class +template +class silhouetteScoreTest : public ::testing::TestWithParam { + protected: + silhouetteScoreTest() + : d_X(0, raft::resource::get_cuda_stream(handle)), + sampleSilScore(0, raft::resource::get_cuda_stream(handle)), + d_labels(0, raft::resource::get_cuda_stream(handle)) + { + } + + void host_silhouette_score() + { + // generating random value test input + std::vector h_X(nElements, 0.0); + std::vector h_labels(nRows, 0); + std::random_device rd; + std::default_random_engine dre(nElements * nLabels); + std::uniform_int_distribution intGenerator(0, nLabels - 1); + std::uniform_real_distribution realGenerator(0, 100); + + std::generate(h_X.begin(), h_X.end(), [&]() { return realGenerator(dre); }); + std::generate(h_labels.begin(), h_labels.end(), [&]() { return intGenerator(dre); }); + + // allocating and initializing memory to the GPU + auto stream = raft::resource::get_cuda_stream(handle); + d_X.resize(nElements, stream); + d_labels.resize(nElements, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(d_X.data(), 0, d_X.size() * sizeof(DataT), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(d_labels.data(), 0, d_labels.size() * sizeof(LabelT), stream)); + sampleSilScore.resize(nElements, stream); + + raft::update_device(d_X.data(), &h_X[0], (int)nElements, stream); + raft::update_device(d_labels.data(), &h_labels[0], (int)nElements, stream); + + // finding the distance matrix + + rmm::device_uvector d_distanceMatrix(nRows * nRows, stream); + double* h_distanceMatrix = (double*)malloc(nRows * nRows * sizeof(double*)); + + auto d_X_view = raft::make_device_matrix_view(d_X.data(), nRows, nCols); + cuvs::distance::pairwise_distance( + handle, + d_X_view, + d_X_view, + raft::make_device_matrix_view(d_distanceMatrix.data(), nRows, nRows), + params.metric); + + raft::resource::sync_stream(handle, stream); + + raft::update_host(h_distanceMatrix, d_distanceMatrix.data(), nRows * nRows, stream); + + // finding the bincount array + + double* binCountArray = (double*)malloc(nLabels * sizeof(double*)); + memset(binCountArray, 0, nLabels * sizeof(double)); + + for (int i = 0; i < nRows; ++i) { + binCountArray[h_labels[i]] += 1; + } + + // finding the average intra cluster distance for every element + + double* a = (double*)malloc(nRows * sizeof(double*)); + + for (int i = 0; i < nRows; ++i) { + int myLabel = h_labels[i]; + double sumOfIntraClusterD = 0; + + for (int j = 0; j < nRows; ++j) { + if (h_labels[j] == myLabel) { sumOfIntraClusterD += h_distanceMatrix[i * nRows + j]; } + } + + if (binCountArray[myLabel] <= 1) + a[i] = -1; + else + a[i] = sumOfIntraClusterD / (binCountArray[myLabel] - 1); + } + + // finding the average inter cluster distance for every element + + double* b = (double*)malloc(nRows * sizeof(double*)); + + for (int i = 0; i < nRows; ++i) { + int myLabel = h_labels[i]; + double minAvgInterCD = ULLONG_MAX; + + for (int j = 0; j < nLabels; ++j) { + int curClLabel = j; + if (curClLabel == myLabel) continue; + double avgInterCD = 0; + + for (int k = 0; k < nRows; ++k) { + if (h_labels[k] == curClLabel) { avgInterCD += h_distanceMatrix[i * nRows + k]; } + } + + if (binCountArray[curClLabel]) + avgInterCD /= binCountArray[curClLabel]; + else + avgInterCD = ULLONG_MAX; + minAvgInterCD = min(minAvgInterCD, avgInterCD); + } + + b[i] = minAvgInterCD; + } + + // finding the silhouette score for every element + + double* truthSampleSilScore = (double*)malloc(nRows * sizeof(double*)); + for (int i = 0; i < nRows; ++i) { + if (a[i] == -1) + truthSampleSilScore[i] = 0; + else if (a[i] == 0 && b[i] == 0) + truthSampleSilScore[i] = 0; + else + truthSampleSilScore[i] = (b[i] - a[i]) / max(a[i], b[i]); + truthSilhouetteScore += truthSampleSilScore[i]; + } + + truthSilhouetteScore /= nRows; + } + + // the constructor + void SetUp() override + { + // getting the parameters + params = ::testing::TestWithParam::GetParam(); + + nRows = params.nRows; + nCols = params.nCols; + nLabels = params.nLabels; + chunk = params.chunk; + nElements = nRows * nCols; + + host_silhouette_score(); + + // calling the silhouette_score CUDA implementation + computedSilhouetteScore = cuvs::stats::silhouette_score( + handle, + raft::make_device_matrix_view(d_X.data(), nRows, nCols), + raft::make_device_vector_view(d_labels.data(), nRows), + std::make_optional(raft::make_device_vector_view(sampleSilScore.data(), nRows)), + nLabels, + params.metric); + + batchedSilhouetteScore = cuvs::stats::silhouette_score_batched( + handle, + raft::make_device_matrix_view(d_X.data(), nRows, nCols), + raft::make_device_vector_view(d_labels.data(), nRows), + std::make_optional(raft::make_device_vector_view(sampleSilScore.data(), nRows)), + nLabels, + chunk, + params.metric); + } + + // declaring the data values + raft::resources handle; + silhouetteScoreParam params; + int nLabels; + rmm::device_uvector d_X; + rmm::device_uvector sampleSilScore; + rmm::device_uvector d_labels; + int nRows; + int nCols; + int nElements; + double truthSilhouetteScore = 0; + double computedSilhouetteScore = 0; + double batchedSilhouetteScore = 0; + int chunk; +}; + +// setting test parameter values +const std::vector inputs = { + {4, 2, 3, cuvs::distance::DistanceType::L2Expanded, 4, 0.00001}, + {4, 2, 2, cuvs::distance::DistanceType::L2SqrtUnexpanded, 2, 0.00001}, + {8, 8, 3, cuvs::distance::DistanceType::L2Unexpanded, 4, 0.00001}, + {11, 2, 5, cuvs::distance::DistanceType::L2Expanded, 3, 0.00001}, + {40, 2, 8, cuvs::distance::DistanceType::L2Expanded, 10, 0.00001}, + {12, 7, 3, cuvs::distance::DistanceType::CosineExpanded, 8, 0.00001}, + {7, 5, 5, cuvs::distance::DistanceType::L1, 2, 0.00001}}; + +// writing the test suite +typedef silhouetteScoreTest silhouetteScoreTestClass; +TEST_P(silhouetteScoreTestClass, Result) +{ + ASSERT_NEAR(computedSilhouetteScore, truthSilhouetteScore, params.tolerance); + ASSERT_NEAR(batchedSilhouetteScore, truthSilhouetteScore, params.tolerance); +} +INSTANTIATE_TEST_CASE_P(silhouetteScore, silhouetteScoreTestClass, ::testing::ValuesIn(inputs)); + +} // end namespace stats +} // end namespace cuvs diff --git a/cpp/test/stats/trustworthiness.cu b/cpp/test/stats/trustworthiness.cu new file mode 100644 index 0000000000..e2ed04a03a --- /dev/null +++ b/cpp/test/stats/trustworthiness.cu @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.cuh" + +#include +#include +#include +#include + +#include + +#include +#include + +namespace cuvs { +namespace stats { + +class TrustworthinessScoreTest : public ::testing::Test { + public: + TrustworthinessScoreTest() + : d_X(0, raft::resource::get_cuda_stream(handle)), + d_X_embedded(0, raft::resource::get_cuda_stream(handle)) + { + } + + protected: + void basicTest() + { + std::vector X = { + 5.6142087, 8.59787, -4.382763, -3.6452143, -5.8816037, -0.6330313, 4.6920023, + -0.79210913, 0.6106314, 2.1210914, 5.919943, -8.43784, -6.4819884, 0.41001374, + -6.1052523, -4.0825715, -5.314755, -2.834671, 5.751696, -6.5012555, -0.4719201, + -7.53353, 7.6789393, -1.4959852, -5.5977287, -9.564147, 1.2902534, 3.559834, + -6.7659483, 8.265964, 4.595404, 9.133477, -6.1553917, -6.319754, -2.9039452, + 4.4150834, -3.094395, -4.426273, 9.584571, -5.64133, 6.6209483, 7.4044604, + 3.9620576, 5.639907, 10.33007, -0.8792053, 5.143776, -7.464049, 1.2448754, + -5.6300974, 5.4518576, 4.119535, 6.749645, 7.627064, -7.2298336, 1.9681473, + -6.9083176, 6.404673, 0.07186685, 9.0994835, 8.51037, -8.986389, 0.40534487, + 2.115397, 4.086756, 1.2284287, -2.6272132, 0.06527536, -9.587425, -7.206078, + 7.864875, 7.4397306, -6.9233336, -2.6643622, 3.3466153, 7.0408177, -3.6069896, + -9.971769, 4.4075623, 7.9063697, 2.559074, 4.323717, 1.6867131, -1.1576937, + -9.893141, -3.251416, -7.4889135, -4.0588717, -2.73338, -7.4852257, 3.4460473, + 9.759119, -5.4680476, -4.722435, -8.032619, -1.4598992, 4.227361, 3.135568, + 1.1950601, 1.1982028, 6.998856, -6.131138, -6.6921015, 0.5361224, -7.1213965, + -5.6104236, -7.2212887, -2.2710054, 8.544764, -6.0254574, 1.4582269, -5.5587835, + 8.031556, -0.26328218, -5.2591386, -9.262641, 2.8691363, 5.299787, -9.209455, + 8.523085, 5.180329, 10.655528, -5.7171874, -6.7739563, -3.6306462, 4.067106, + -1.5912259, -3.2345476, 8.042973, -3.6364832, 4.1242137, 9.886953, 5.4743724, + 6.3058076, 9.369645, -0.5175337, 4.9859877, -7.879498, 1.358422, -4.147944, + 3.8984218, 5.894656, 6.4903927, 8.702036, -8.023722, 2.802145, -7.748032, + 5.8461113, -0.34215945, 11.298865, 1.4107164, -9.949621, -1.6257563, -10.655836, + 2.4528909, 1.1570255, 5.170669, 2.8398793, 7.1838694, 9.088459, 2.631155, + 3.964414, 2.8769252, 0.04198391, -0.16993195, 3.6747139, -2.8377378, 6.1782537, + 10.759618, -4.5642614, -8.522967, 0.8614642, 6.623416, -1.029324, 5.5488334, + -7.804511, 2.128833, 7.9042315, 7.789576, -2.7944536, 0.72271067, -10.511495, + -0.78634536, -10.661714, 2.9376361, 1.9148129, 6.22859, 0.26264945, 8.028384, + 6.8743043, 0.9351067, 7.0690722, 4.2846055, 1.4134506, -0.18144785, 5.2778087, + -1.7140163, 9.217541, 8.602799, -2.6537218, -7.8377395, 1.1244944, 5.4540544, + -0.38506773, 3.9885726, -10.76455, 1.4440702, 9.136163, 6.664117, -5.7046547, + 8.038592, -9.229767, -0.2799413, 3.6064725, 4.187257, 1.0516582, -2.0707326, + -0.7615968, -8.561018, -3.7831352, 10.300297, 5.332594, -6.5880876, -4.2508664, + 1.7985519, 5.7226253, -4.1223383, -9.6697855, 1.4885283, 7.524974, 1.7206005, + 4.890457, 3.7264557, 0.4428284, -9.922455, -4.250455, -6.4410596, -2.107994, + -1.4109765, -6.1325397, 0.32883006, 6.0489736, 7.7257385, -8.281174, 1.0129383, + -10.792166, 8.378851, 10.802716, 9.848448, -9.188757, 1.3151443, 1.9971865, + -2.521849, 4.3268294, -7.775683, -2.2902298, 3.0824065, -7.17559, 9.6100855, + 7.3965735, -10.476525, 5.895973, -3.6974669, -7.6688933, 1.7354839, -7.4045196, + -1.7992063, -4.0394845, 5.2471714, -2.250571, 2.528036, -8.343515, -2.2374575, + -10.019771, 0.73371273, 3.1853926, 2.7994921, 2.6637669, 7.620401, 7.515571, + 0.68636256, 5.834537, 4.650282, -1.0362619, 0.4461701, 3.7870514, -4.1340904, + 7.202998, 9.736904, -3.005512, -8.920467, 1.1228397, 6.2598724, 1.2812365, + 4.5442104, -8.791537, 0.92113096, 8.464749, 8.359035, -4.3923397, 1.2252625, + -10.1986475, -1.4409319, -10.013967, 3.9071581, 1.683064, 4.877419, 1.6570637, + 9.559105, 7.3546534, 0.36635467, 5.220211, 4.6303267, 0.6601065, 0.16149978, + 3.8818731, -3.4438233, 8.42085, 8.659159, -3.0935583, -8.039611, 2.3060374, + 5.134666, 1.0458113, 6.0190983, -9.143728, 0.99048865, 9.210842, 6.670241, + -5.9614363, 0.8747396, 7.078824, 8.067469, -10.314754, 0.45977542, -9.28306, + 9.1838665, 9.318644, 7.189082, -11.092555, 1.0320464, 3.882163, 0.10953151, + 7.9029684, -6.9068265, -1.3526366, 5.3996363, -8.430931, 11.452577, 6.39663, + -11.090514, 4.6662245, -3.1268113, -8.357452, 2.2276728, -10.357126, -0.9291848, + -3.4193344, 3.1289792, -2.5030103, 6.772719, 11.457757, -4.2125936, -6.684548, + -4.7611327, 3.6960156, -2.3030636, -3.0591488, 10.452471, -4.1267314, 5.66614, + 7.501461, 5.072407, 6.636537, 8.990381, -0.2559256, 4.737867, -6.2149944, + 2.535682, -5.5484023, 5.7113924, 3.4742818, 7.9915137, 7.0052586, -7.156467, + 1.4354781, -8.286235, 5.7523417, -2.4175215, 9.678009, 0.05066403, -9.645226, + -2.2658763, -9.518178, 4.493372, 2.3232365, 2.1659086, 0.42507997, 8.360246, + 8.23535, 2.6878164, 5.236947, 3.4924245, -0.6089895, 0.8884741, 4.359464, + -4.6073823, 7.83441, 8.958755, -3.4690795, -9.182282, 1.2478025, 5.6311107, + -1.2408862, 3.6316886, -8.684654, 2.1078515, 7.2813864, 7.9265943, -3.6135032, + 0.4571511, 8.493568, 10.496853, -7.432897, 0.8625995, -9.607528, 7.2899456, + 8.83158, 8.908199, -10.300263, 1.1451302, 3.7871468, -0.97040755, 5.7664757, + -8.9688, -2.146672, 5.9641485, -6.2908535, 10.126465, 6.1553903, -12.066902, + 6.301596, -5.0419583, -8.228695, 2.4879954, -8.918582, -3.7434099, -4.1593685, + 3.7431836, -1.1704745, 0.5524103, 9.109399, 9.571567, -11.209955, 1.2462777, + -9.554555, 9.091726, 11.477966, 7.630937, -10.450911, 1.9205878, 5.358983, + -0.44546837, 6.7611346, -9.74753, -0.5939732, 3.8892255, -6.437991, 10.294727, + 5.6723895, -10.7883, 6.192348, -5.293862, -10.811491, 1.0194173, -7.074576, + -3.192368, -2.5231771, 4.2791643, -0.53309685, 0.501366, 9.636625, 7.710316, + -6.4219728, 1.0975566, -8.218886, 6.9011984, 9.873679, 8.903804, -9.316832, + 1.2404599, 4.9039655, 1.2272617, 4.541515, -5.2753224, -3.2196746, 3.1303136, + -7.285681, 9.041425, 5.6417427, -9.93667, 5.7548947, -5.113397, -8.544622, + 4.182665, -7.7709813, -3.2810235, -3.312072, 3.8900535, -2.0604856, 6.709082, + -8.461194, 1.2666026, 4.8770437, 2.6955879, 3.0340345, -1.1614609, -3.536341, + -7.090382, -5.36146, 9.072544, 6.4554095, -4.4728956, -1.88395, 3.1095037, + 8.782348, -3.316743, -8.65248, 1.6802986, 8.186188, 2.1783829, 4.931278, + 4.158475, 1.4033595, -11.320101, -3.7084908, -6.740436, -2.5555193, -1.0451177, + -6.5569925, 0.82810307, 8.505919, 8.332857, -9.488569, -0.21588463, -8.056692, + 8.493993, 7.6401625, 8.812983, -9.377281, 2.4369764, 3.1766508, 0.6300803, + 5.6666765, -7.913654, -0.42301777, 4.506412, -7.8954244, 10.904591, 5.042256, + -9.626183, 8.347351, -3.605006, -7.923387, 1.1024277, -8.705793, -2.5151258, + -2.5066147, 4.0515003, -2.060757, 6.2635093, 8.286584, -6.0509276, -6.76452, + -3.1158175, 1.6578803, -1.4608748, -1.24211, 8.151246, -4.2970877, 6.093071, + 7.4911637, 4.51018, 4.8425875, 9.211085, -2.4386222, 4.5830803, -5.6079445, + 2.3713675, -4.0707507, 3.1787417, 5.462342, 6.915912, 6.3928423, -7.2970796, + 5.0112796, -9.140893, 4.9990606, 0.38391754, 7.7088532, 1.9340848, 8.18833, + 8.16617, -9.42086, -0.3388326, -9.659727, 8.243045, 8.099073, 8.439428, + -7.038694, 2.1077902, 3.3866816, -1.9975324, 7.4972878, -7.2525196, -1.553731, + 4.08758, -6.6922374, 9.50525, 4.026735, -9.243538, 7.2740564, -3.9319072, + -6.3228955, 1.6693478, -7.923119, -3.7423058, -2.2813146, 5.3469067, -1.8285407, + 3.3118162, 8.826356, -4.4641976, -6.4751124, -9.200089, -2.519147, 4.225298, + 2.4105988, -0.4344186, 0.53441775, 5.2836394, -8.2816105, -4.996147, -1.6870759, + -7.8543897, -3.9788852, -7.0346904, -3.1289773, 7.4567637, -5.6227813, 1.0709786, + -8.866012, 8.427324, -1.1755563, -5.789216, -8.197835, 5.3342214, 6.0646234, + -6.8975716, 7.717031, 3.480355, 8.312151, -3.6645212, -3.0976524, -8.090359, + -1.9176173, 2.4257212, 1.9700835, 0.4098958, 2.1341088, 7.652741, -9.9595585, + -5.989757, 0.10119354, -7.935407, -5.792786, -5.22783, -4.318978, 5.414037, + -6.4621663, 1.670883, -6.9224787, 8.696932, -2.0214002, -6.6681314, -8.326418, + 4.9049683, 5.4442496, -6.403739, 7.5822453, 7.0972915, -9.072851, -0.23897195, + 1.7662339, 5.3096304, 1.983179, -2.222645, -0.34700772, -9.094717, -6.107907, + 9.525174, 8.1550665, -5.6940084, -4.1636486, 1.7360662, 8.528821, -3.7299833, + -9.341266, 2.608542, 9.108706, 0.7978509, 4.2488184, 2.454484, 0.9446999, + -10.106636, -3.8973773, -6.6566644, -4.5647273, -0.99837756, -6.568582, 9.324853, + -7.9020953, 2.0910501, 2.2896829, 1.6790711, 1.3159255, -3.5258796, 1.8898442, + -8.105812, -4.924962, 8.771129, 7.1202874, -5.991957, -3.4106019, 2.4450088, + 7.796387, -3.055946, -7.8971434, 1.9856719, 9.001636, 1.8511922, 3.019749, + 3.1227696, 0.4822102, -10.021213, -3.530504, -6.225959, -3.0029628, -1.7881511, + -7.3879776, 1.3925704, 9.499782, -3.7318087, -3.7074296, -7.7466836, -1.5284524, + 4.0535855, 3.112011, 0.10340207, -0.5429599, 6.67026, -9.155924, -4.924038, + 0.64248866, -10.0103655, -3.2742946, -4.850029, -3.6707063, 8.586258, -5.855605, + 4.906918, -6.7813993, 7.9938135, -2.5473144, -5.688948, -7.822478, 2.1421318, + 4.66659, -9.701272, 9.549149, 0.8998125, -8.651497, -0.56899565, -8.639817, + 2.3088377, 2.1264515, 3.2764478, 2.341989, 8.594338, 8.630639, 2.8440373, + 6.2043204, 4.433932, 0.6320018, -1.8179281, 5.09452, -1.5741565, 8.153934, + 8.744339, -3.6945698, -8.883078, 1.5329908, 5.2745943, 0.44716078, 4.8809066, + -7.9594903, 1.134374, 9.233994, 6.5528665, -4.520542, 9.477355, -8.622195, + -0.23191702, 2.0485356, 3.9379985, 1.5916302, -1.4516805, -0.0843819, -7.8554378, + -5.88308, 7.999766, 6.2572145, -5.585321, -4.0097756, 0.42382592, 6.160884, + -3.631315, -8.333449, 2.770595, 7.8495173, 3.3331623, 4.940415, 3.6207345, + -0.037517, -11.034698, -3.185103, -6.614664, -3.2177854, -2.0792234, -6.8879867, + 7.821685, -8.455084, 1.0784642, 4.0033927, 2.7343264, 2.6052725, -4.1224284, + -0.89305353, -6.8267674, -4.9715133, 8.880253, 5.6994023, -5.9695024, -4.9181266, + 1.3017995, 7.972617, -3.9452884, -10.424556, 2.4504194, 6.21529, 0.93840516, + 4.2070026, 6.159839, 0.91979957, -8.706724, -4.317946, -6.6823545, -3.0388, + -2.464262, -7.3716645, 1.3926703, 6.544412, -5.6251183, -5.122411, -8.622049, + -2.3905911, 3.9138813, 1.9779967, -0.05011125, 0.13310997, 7.229751, -9.742043, + -8.08724, 1.2426697, -7.9230795, -3.3162494, -7.129571, -3.5488048, 7.4701195, + -5.2357526, 0.5917681, -6.272206, 6.342328, -2.909731, -4.991607, -8.845513, + 3.3228495, 7.033246, -7.8180246, 8.214469, 6.3910093, 9.185153, -6.20472, + -7.713809, -3.8481297, 3.5579286, 0.7078448, -3.2893546, 7.384514, -4.448121, + 3.0104196, 9.492943, 8.024847, 4.9114385, 9.965594, -3.014036, 5.182494, + -5.8806014, 2.5312455, -5.9926524, 4.474469, 6.3717875, 6.993105, 6.493093, + -8.935534, 3.004074, -8.055647, 8.315765, -1.3026813, 8.250377, 0.02606229, + 6.8508425, 9.655665, -7.0116496, -0.41060972, -10.049198, 7.897801, 6.7791023, + 8.3362, -9.821014, 2.491157, 3.5160472, -1.6228812, 7.398063, -8.769123, + -3.1743705, 3.2827861, -6.497855, 10.831924, 5.2761307, -9.704417, 4.3817043, + -3.9841619, -8.111647, 1.1883026, -8.115312, -2.9240117, -5.8879666, 4.20928, + -0.3587938, 6.935672, -10.177582, 0.48819053, 3.1250648, 2.9306343, 3.082544, + -3.477687, -1.3768549, -7.4922366, -3.756631, 10.039836, 3.6670392, -5.9761434, + -4.4728765, 3.244255, 7.027899, -2.3806512, -10.4100685, 1.605716, 7.7953773, + 0.5408159, 1.7156523, 3.824097, -1.0604783, -10.142124, -5.246805, -6.5283823, + -4.579547, -2.42714, -6.709197, 2.7782338, 7.33353, -6.454507, -2.9929368, + -7.8362985, -2.695445, 2.4900775, 1.6682367, 0.4641757, -1.0495365, 6.9631333, + -9.291356, -8.23837, -0.34263706, -8.275113, -2.8454232, -5.0864096, -2.681942, + 7.5450225, -6.2517986, 0.06810654, -6.470652, 4.9042645, -1.8369255, -6.6937943, + -7.9625087, 2.8510258, 6.180508, -8.282598, 7.919079, 1.4897474, 6.7217417, + -4.2459426, -4.114431, -8.375707, -2.143264, 5.6972933, 1.5574739, 0.39375135, + 1.7930849, 5.1737595, -7.826241, -5.160268, -0.80433255, -7.839536, -5.2620406, + -5.4643164, -3.185536, 6.620315, -7.065227, 1.0524757, -6.125088, 5.7126627, + -1.6161644, -3.852159, -9.164279, 2.7005782, 5.946544, -8.468236, 8.2145405, + 1.1035942, 6.590157, -4.0461283, -4.8090615, -7.6702685, -2.1121511, 5.1147075, + 1.6128504, 2.0064135, 1.0544407, 6.0038295, -7.8282537, -4.801278, 0.32349443, + -8.0649805, -4.372714, -5.61336, -5.21394, 8.176595, -5.4753284, 1.7800134, + -8.267283, 7.2133374, -0.16594432, -6.317046, -9.490406, 4.1261597, 5.473317, + -7.7551675, 7.007468, 7.478628, -8.801905, 0.10975724, 3.5478222, 4.797803, + 1.3825226, -3.357369, 0.99262005, -6.94877, -5.4781394, 9.632604, 5.7492557, + -5.9014316, -3.1632116, 2.340859, 8.708098, -3.1255999, -8.848661, 4.5612836, + 8.455157, 0.73460823, 4.112301, 4.392744, -0.30759293, -6.8036823, -3.0331545, + -8.269506, -2.82415, -0.9411246, -5.993506, 2.1618164, -8.716055, -0.7432543, + -10.255819, 3.095418, 2.5131428, 4.752442, 0.9907621, 7.8279433, 7.85814, + 0.50430876, 5.2840405, 4.457291, 0.03330028, -0.40692952, 3.9244103, -2.117118, + 7.6977615, 8.759009, -4.2157164, -9.136053, 3.247858, 4.668686, 0.76162136, + 5.3833632, -9.231471, 0.44309422, 8.380872, 6.7211227, -3.091507, 2.173508, + -9.038242, -1.3666698, -9.819077, 0.37825826, 2.3898845, 4.2440815, 1.9161536, + 7.24787, 6.9124637, 1.6238527, 5.1140285, 3.1935842, 1.02845, -1.1273454, + 5.638998, -2.497932, 8.342559, 8.586319, -2.9069402, -7.6387944, 3.5975037, + 4.4115705, 0.41506064, 4.9078383, -9.68327, 1.8159529, 9.744613, 8.40622, + -4.495336, 9.244892, -8.789869, 1.3158468, 4.018167, 3.3922846, 2.652022, + -2.7495477, 0.2528986, -8.268324, -6.004913, 10.428784, 6.6580734, -5.537176, + -1.7177434, 2.7504628, 6.7735, -2.4454272, -9.998361, 2.9483433, 6.8266654, + 2.3787718, 4.472637, 2.5871701, 0.7355365, -7.7027745, -4.1879907, -7.172832, + -4.1843605, -0.03646783, -5.419406, 6.958486, 11.011111, -7.1821184, -7.956423, + -3.408451, 4.6850276, -2.348787, -4.398289, 6.9787564, -3.8324208, 5.967827, + 8.433518, 4.660108, 5.5657144, 9.964243, -1.3515275, 6.404833, -6.4805903, + 2.4379845, -6.0816774, 1.752272, 5.3771873, 6.9613523, 6.9788294, -6.3894596, + 3.7521114, -6.8034263, 6.4458385, -0.7233525, 10.512529, 4.362273, 9.231461, + -6.3382263, -7.659, -3.461823, 4.71463, 0.17817476, -3.685746, 7.2962036, + -4.6489477, 5.218017, 11.546999, 4.7218375, 6.8498397, 9.281103, -3.900459, + 6.844054, -7.0886965, -0.05019227, -8.233724, 5.5808983, 6.374517, 8.321048, + 7.969449, -7.3478637, 1.4917561, -8.003144, 4.780668, -1.1981848, 7.753739, + 2.0260844, -8.880096, -3.4258451, -7.141975, 1.9637157, 1.814725, 5.311151, + 1.4831505, 7.8483663, 7.257948, 1.395786, 6.417756, 5.376912, 0.59505713, + 0.00062552, 3.6634305, -4.159713, 7.3571978, 10.966816, -2.5419605, -8.466229, + 1.904205, 5.6338267, -0.52567476, 5.59736, -8.361799, 0.5009981, 8.460681, + 7.3891273, -3.5272243, 5.0552278, 9.921456, -7.69693, -7.286378, -1.9198836, + 3.1666567, -2.5832257, -2.2445817, 9.888111, -5.076563, 5.677401, 7.497946, + 5.662994, 5.414262, 8.566503, -2.5530663, 7.1032815, -6.0612082, 1.3419591, + -4.9595256, 4.3377542, 4.3790717, 6.793512, 8.383502, -7.1278043, 3.3240774, + -9.379446, 6.838661, -0.81241214, 8.694813, 0.79141915, 7.632467, 8.575382, + -8.533798, 0.28954387, -7.5675836, 5.8653326, 8.97235, 7.1649346, -10.575289, + 0.9359381, 5.02381, -0.5609511, 5.543464, -7.69131, -2.1792977, 2.4729247, + -6.1917787, 10.373678, 7.6549597, -8.809486, 5.5657206, -3.3169382, -8.042887, + 2.0874746, -7.079005, -3.33398, -3.6843317, 4.0172358, -2.0754814, 1.1726758, + 7.4618697, 6.9483604, -8.469206, 0.7401797, -10.318176, 8.384557, 10.5476265, + 9.146971, -9.250223, 0.6290606, 4.4941425, -0.7514017, 7.2271705, -8.309598, + -1.4761636, 4.0140634, -6.021102, 9.132852, 5.6610966, -11.249811, 8.359293, + -1.9445792, -7.7393436, -0.3931331, -8.824441, -2.5995944, -2.5714035, 4.140213, + -3.6863053, 5.517265, 9.020411, -4.9286127, -7.871219, -3.7446704, 2.5179656, + -1.4543481, -2.2703636, 7.010597, -3.6436229, 6.753862, 7.4129915, 7.1406755, + 5.653706, 9.5445175, 0.15698843, 4.761813, -7.698002, 1.6870106, -4.5410123, + 4.171763, 5.3747005, 6.341021, 7.456738, -8.231657, 2.763487, -9.208167, + 6.676799, -1.1957736, 10.062605, 4.0975976, 7.312957, -2.4981596, -2.9658387, + -8.150425, -2.1075552, 2.64375, 1.6636052, 1.1483809, 0.09276015, 5.8556347, + -7.8481026, -5.9913163, -0.02840613, -9.937289, -1.0486673, -5.2340155, -3.83912, + 7.7165728, -8.409944, 0.80863273, -6.9119215, 7.5712357, 0.36031485, -6.056131, + -8.470033, 1.8678337, 3.0121377, -7.3096333, 8.205484, 5.262654, 8.774514, + -4.7603083, -7.2096143, -4.437014, 3.6080024, -1.624254, -4.2787876, 8.880863, + -4.8984556, 5.1782074, 9.944454, 3.911282, 3.5396595, 8.867042, -1.2006199, + 5.393288, -5.6455317, 0.7829499, -4.0338907, 2.479272, 6.5080743, 8.582535, + 7.0097537, -6.9823785, 3.984318, -7.225381, 5.3135114, -1.0391048, 8.951443, + -0.70119005, -8.510742, -0.42949116, -10.9224825, 2.8176029, 1.6800792, 5.778404, + 1.7269998, 7.1975236, 7.7258267, 2.7632928, 5.3399253, 3.4650044, 0.01971426, + -1.6468811, 4.114996, -1.5110453, 6.8689218, 8.269899, -3.1568048, -7.0344677, + 1.2911975, 5.950357, 0.19028673, 4.657226, -8.199647, 2.246055, 8.989509, + 5.3101015, -4.2400866}; + + std::vector X_embedded = { + -0.41849962, -0.53906363, 0.46958843, -0.35832694, -0.23779503, -0.29751351, -0.01072748, + -0.21353109, -0.54769957, -0.55086273, 0.37093949, -0.12714292, -0.06639574, -0.36098689, + -0.13060696, -0.07362658, -1.01205945, -0.39285606, 0.2864089, -0.32031146, -0.19595343, + 0.08900568, -0.04813879, -0.06563424, -0.42655188, -0.69014251, 0.51459783, -0.1942696, + -0.07767916, -0.6119386, 0.04813685, -0.22557008, -0.56890118, -0.60293794, 0.43429622, + -0.09240723, -0.00624062, -0.25800395, -0.1886092, 0.01655941, -0.01961523, -0.14147359, + 0.41414487, -0.8512944, -0.61199242, -0.18586016, 0.14024924, -0.41635606, -0.02890144, + 0.1065347, 0.39700791, -1.14060664, -0.95313865, 0.14416681, 0.17306046, -0.53189689, + -0.98987544, -0.67918193, 0.41787854, -0.20878236, -0.06612862, 0.03502904, -0.03765266, + -0.0980606, -0.00971657, 0.29432917, 0.36575687, -1.1645509, -0.89094597, 0.03718805, + 0.2310573, -0.38345811, -0.10401925, -0.10653082, 0.38469055, -0.88302094, -0.80197543, + 0.03548668, 0.02775662, -0.54374295, 0.03379983, 0.00923623, 0.29320273, -1.05263519, + -0.93360096, 0.03778313, 0.12360487, -0.56437284, 0.0644429, 0.33432651, 0.36450726, + -1.22978747, -0.83822101, -0.18796451, 0.34888434, -0.3801491, -0.45327303, -0.59747899, + 0.39697698, -0.15616602, -0.06159166, -0.40301991, -0.11725303, -0.11913263, -0.12406619, + -0.11227967, 0.43083835, -0.90535849, -0.81646025, 0.10012121, -0.0141237, -0.63747931, + 0.04805023, 0.34190539, 0.50725192, -1.17861414, -0.74641538, -0.09333111, 0.27992678, + -0.56214809, 0.04970971, 0.36249384, 0.57705611, -1.16913795, -0.69849908, 0.10957897, + 0.27983218, -0.62088525, 0.0410459, 0.23973398, 0.40960434, -1.14183664, -0.83321381, + 0.02149482, 0.21720445, -0.49869928, -0.95655465, -0.51680422, 0.45761383, -0.08351214, + -0.12151554, 0.00819737, -0.20813803, -0.01055793, 0.25319234, 0.36154974, 0.1822421, + -1.15837133, -0.92209691, -0.0501582, 0.08535917, -0.54003763, -1.08675635, -1.04009593, + 0.09408128, 0.07009826, -0.01762833, -0.19180447, -0.18029785, -0.20342001, 0.04034991, + 0.1814747, 0.36906669, -1.13532007, -0.8852452, 0.0782818, 0.16825101, -0.50301319, + -0.29128098, -0.65341312, 0.51484352, -0.38758236, -0.22531103, -0.55021971, 0.10804344, + -0.3521522, -0.38849035, -0.74110794, 0.53761131, -0.25142813, -0.1118066, -0.47453368, + 0.06347904, -0.23796193, -1.02682328, -0.47594091, 0.39515916, -0.2782529, -0.16566519, + 0.08063579, 0.00810116, -0.06213913, -1.059654, -0.62496334, 0.53698546, -0.11806234, + 0.00356161, 0.11513405, -0.14213292, 0.04102662, -0.36622161, -0.73686272, 0.48323864, + -0.27338892, -0.14203401, -0.41736352, 0.03332564, -0.21907479, -0.06396769, 0.01831361, + 0.46263444, -1.01878166, -0.86486858, 0.17622118, -0.01249686, -0.74530888, -0.9354887, + -0.5027945, 0.38170099, -0.15547098, 0.00677824, -0.04677663, -0.13541745, 0.07253501, + -0.97933143, -0.58001202, 0.48235369, -0.18836913, -0.02430783, 0.07572441, -0.08101331, + 0.00630076, -0.16881248, -0.67989182, 0.46083611, -0.43910736, -0.29321918, -0.38735861, + 0.07669903, -0.29749861, -0.40047669, -0.56722462, 0.33168188, -0.13118173, -0.06672747, + -0.56856316, -0.26269144, -0.14236671, 0.10651901, 0.4962585, 0.38848072, -1.06653547, + -0.64079332, -0.47378591, 0.43195483, -0.04856951, -0.9840439, -0.70610428, 0.34028092, + -0.2089237, -0.05382041, 0.01625874, -0.02080803, -0.12535211, -0.04146428, -1.24533033, + 0.48944879, 0.0578458, 0.26708388, -0.90321028, 0.35377088, -0.36791429, -0.35382384, + -0.52748734, 0.42854419, -0.31744713, -0.19174226, -0.39073724, -0.03258846, -0.19978228, + -0.36185205, -0.57412046, 0.43681973, -0.25414538, -0.12904905, -0.46334973, -0.03123853, + -0.11303604, -0.87073672, -0.45441297, 0.41825858, -0.25303507, -0.21845073, 0.10248682, + -0.11045569, -0.10002795, -0.00572806, 0.16519061, 0.42651513, -1.11417019, -0.83789682, + 0.02995787, 0.16843079, -0.53874511, 0.03056994, 0.17877036, 0.49632853, -1.03276777, + -0.74778616, -0.03971953, 0.10907949, -0.67385727, -0.9523471, -0.56550741, 0.40409449, + -0.2703723, -0.10175014, 0.13605487, -0.06306008, -0.01768126, -0.4749442, -0.56964815, + 0.39389887, -0.19248079, -0.04161081, -0.38728487, -0.20341556, -0.12656988, -0.35949609, + -0.46137866, 0.28798422, -0.06603147, -0.04363992, -0.60343552, -0.23565227, -0.10242701, + -0.06792886, 0.09689897, 0.33259571, -0.98854214, -0.84444433, 0.00673901, 0.13457057, + -0.43145794, -0.51500046, -0.50821936, 0.38000089, 0.0132636, 0.0580942, -0.40157595, + -0.11967677, 0.02549113, -0.10350953, 0.22918226, 0.40411913, -1.05619383, -0.71218503, + -0.02197581, 0.26422262, -0.34765676, 0.06601537, 0.21712676, 0.34723559, -1.20982027, + -0.95646334, 0.00793948, 0.27620381, -0.43475035, -0.67326003, -0.6137197, 0.43724492, + -0.17666136, -0.06591748, -0.18937394, -0.07400128, -0.06881691, -0.5201112, -0.61088628, + 0.4225319, -0.18969463, -0.06921366, -0.33993208, -0.06990873, -0.10288513, -0.70659858, + -0.56003648, 0.46628812, -0.16090363, -0.0185108, -0.1431348, -0.1128775, -0.0078648, + -0.02323332, 0.04292452, 0.39291084, -0.94897962, -0.63863206, -0.16546988, 0.23698957, + -0.30633628}; + + auto stream = raft::resource::get_cuda_stream(handle); + + d_X.resize(X.size(), stream); + d_X_embedded.resize(X_embedded.size(), stream); + raft::update_device(d_X.data(), X.data(), X.size(), stream); + raft::update_device(d_X_embedded.data(), X_embedded.data(), X_embedded.size(), stream); + auto n_sample = 50; + auto n_features_origin = 30; + auto n_features_embedded = 8; + + // euclidean test + score = cuvs::stats::trustworthiness_score( + handle, + raft::make_device_matrix_view(d_X.data(), n_sample, n_features_origin), + raft::make_device_matrix_view( + d_X_embedded.data(), n_sample, n_features_embedded), + 5, + cuvs::distance::DistanceType::L2SqrtUnexpanded); + } + + void SetUp() override { basicTest(); } + + void TearDown() override {} + + protected: + raft::resources handle; + + rmm::device_uvector d_X; + rmm::device_uvector d_X_embedded; + + double score; +}; + +typedef TrustworthinessScoreTest TrustworthinessScoreTestF; +TEST_F(TrustworthinessScoreTestF, Result) { ASSERT_TRUE(0.9375 < score && score < 0.9379); } +}; // namespace stats +}; // namespace cuvs diff --git a/dependencies.yaml b/dependencies.yaml index 75bd89b8ab..9fcbeaae2f 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -11,6 +11,8 @@ files: - build_py_cuvs - cuda - cuda_version + - depends_on_pylibraft + - depends_on_librmm - develop - checks - build_wheels @@ -31,6 +33,8 @@ files: - build_py_cuvs - cuda - cuda_version + - depends_on_pylibraft + - depends_on_librmm - develop - bench - bench_python @@ -91,7 +95,9 @@ files: extras: table: project includes: + - cuda_wheels - run_py_cuvs + - depends_on_pylibraft py_test_py_cuvs: output: pyproject pyproject_dir: python/cuvs @@ -192,15 +198,7 @@ dependencies: common: - output_types: [conda] packages: - - &rmm_unsuffixed rmm==24.10.*,>=0.0.0a0 - - &pylibraft_unsuffixed pylibraft==24.10.*,>=0.0.0a0 - dlpack>=0.8,<1.0 - - output_types: requirements - packages: - # pip recognizes the index as a global option for the requirements.txt file - # This index is needed for rmm-cu{11,12}. - - --extra-index-url=https://pypi.nvidia.com - - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: - output_types: [conda, requirements, pyproject] matrices: @@ -215,21 +213,6 @@ dependencies: - matrix: packages: - &cuda_python cuda-python - - output_types: [requirements, pyproject] - matrices: - - matrix: - cuda: "12.*" - cuda_suffixed: "true" - packages: - - &rmm_cu12 rmm-cu12==24.10.*,>=0.0.0a0 - - &pylibraft_cu12 pylibraft-cu12==24.10.*,>=0.0.0a0 - - matrix: - cuda: "11.*" - cuda_suffixed: "true" - packages: - - &rmm_cu11 rmm-cu11==24.10.*,>=0.0.0a0 - - &pylibraft_cu11 pylibraft-cu11==24.10.*,>=0.0.0a0 - - {matrix: null, packages: [*rmm_unsuffixed, *pylibraft_unsuffixed] } checks: common: - output_types: [conda, requirements] @@ -345,6 +328,36 @@ dependencies: - *libcusolver114 - *libcusparse_dev114 - *libcusparse114 + cuda_wheels: + specific: + - output_types: pyproject + matrices: + - matrix: + cuda: "12.*" + use_cuda_wheels: "true" + packages: + - nvidia-cublas-cu12 + - nvidia-curand-cu12 + - nvidia-cusolver-cu12 + - nvidia-cusparse-cu12 + # CUDA 11 does not provide wheels, so use the system libraries instead + - matrix: + cuda: "11.*" + use_cuda_wheels: "true" + packages: + # if use_cuda_wheels=false is provided, do not add dependencies on any CUDA wheels + # (e.g. for DLFW and pip devcontainers) + - matrix: + use_cuda_wheels: "false" + packages: + # if no matching matrix selectors passed, list the unsuffixed packages + # (just as a source of documentation, as this populates pyproject.toml in source control) + - matrix: + packages: + - nvidia-cublas + - nvidia-curand + - nvidia-cusolver + - nvidia-cusparse cupy: common: @@ -400,10 +413,6 @@ dependencies: specific: - output_types: conda matrices: - - matrix: - py: "3.9" - packages: - - python=3.9 - matrix: py: "3.10" packages: @@ -413,23 +422,17 @@ dependencies: packages: - python=3.11 - matrix: + py: "3.12" packages: - - python>=3.9,<3.12 + - python=3.12 + - matrix: + packages: + - python>=3.10,<3.13 run_py_cuvs: common: - - output_types: [conda, pyproject] - packages: - - &numpy numpy>=1.23,<2.0a0 - - output_types: [conda] - packages: - - *rmm_unsuffixed - - *pylibraft_unsuffixed - - output_types: requirements + - output_types: [conda, requirements, pyproject] packages: - # pip recognizes the index as a global option for the requirements.txt file - # This index is needed for cudf and rmm. - - --extra-index-url=https://pypi.nvidia.com - - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + - &numpy numpy>=1.23,<3.0a0 specific: - output_types: [conda, requirements, pyproject] matrices: @@ -444,15 +447,6 @@ dependencies: - matrix: packages: - *cuda_python - - output_types: [requirements, pyproject] - matrices: - - matrix: {cuda: "12.*"} - packages: - - *pylibraft_cu12 - - matrix: {cuda: "11.*"} - packages: - - *pylibraft_cu11 - - {matrix: null, packages: [*pylibraft_unsuffixed]} test_python_common: common: - output_types: [conda, requirements, pyproject] @@ -468,13 +462,12 @@ dependencies: common: - output_types: [conda, pyproject, requirements] packages: - - hnswlib=0.7.0 + - hnswlib=0.6.2 - nlohmann_json>=3.11.2 - glog>=0.6.0 - h5py>=3.8.0 - benchmark>=1.8.2 - openblas - - *rmm_unsuffixed bench_python: common: - output_types: [conda] @@ -484,3 +477,51 @@ dependencies: - pyyaml - pandas - click + depends_on_librmm: + common: + - output_types: conda + packages: + - &librmm_unsuffixed librmm==24.10.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - librmm-cu12==24.10.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - librmm-cu11==24.10.*,>=0.0.0a0 + - {matrix: null, packages: [*librmm_unsuffixed]} + depends_on_pylibraft: + common: + - output_types: conda + packages: + - &pylibraft_unsuffixed pylibraft==24.10.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - pylibraft-cu12==24.10.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - pylibraft-cu11==24.10.*,>=0.0.0a0 + - {matrix: null, packages: [*pylibraft_unsuffixed]} diff --git a/docs/source/c_api/neighbors_cagra_c.rst b/docs/source/c_api/neighbors_cagra_c.rst index eb40d55782..a5ffc45b9c 100644 --- a/docs/source/c_api/neighbors_cagra_c.rst +++ b/docs/source/c_api/neighbors_cagra_c.rst @@ -50,4 +50,10 @@ Index search :members: :content-only: +Index serialize +------------ +.. doxygengroup:: cagra_c_index_serialize + :project: cuvs + :members: + :content-only: diff --git a/docs/source/c_api/neighbors_hnsw_c.rst b/docs/source/c_api/neighbors_hnsw_c.rst new file mode 100644 index 0000000000..4d83cd3e38 --- /dev/null +++ b/docs/source/c_api/neighbors_hnsw_c.rst @@ -0,0 +1,43 @@ +HNSW +==== + +This is a wrapper for hnswlib, to load a CAGRA index as an immutable HNSW index. The loaded HNSW index is only compatible in cuVS, and can be searched using wrapper functions. + + +.. role:: py(code) + :language: c + :class: highlight + +``#include `` + +Index search parameters +----------------------- + +.. doxygengroup:: hnsw_c_search_params + :project: cuvs + :members: + :content-only: + +Index +----- + +.. doxygengroup:: hnsw_c_index + :project: cuvs + :members: + :content-only: + +Index search +------------ + +.. doxygengroup:: cagra_c_index_search + :project: cuvs + :members: + :content-only: + +Index serialize +------------ + +.. doxygengroup:: hnsw_c_index_serialize + :project: cuvs + :members: + :content-only: diff --git a/docs/source/cpp_api.rst b/docs/source/cpp_api.rst index 5b8b4841af..49732dc92f 100644 --- a/docs/source/cpp_api.rst +++ b/docs/source/cpp_api.rst @@ -11,3 +11,4 @@ C++ API Documentation cpp_api/distance.rst cpp_api/neighbors.rst cpp_api/selection.rst + cpp_api/stats.rst diff --git a/docs/source/cpp_api/neighbors_cagra.rst b/docs/source/cpp_api/neighbors_cagra.rst index 0e07406d1a..d9f5038718 100644 --- a/docs/source/cpp_api/neighbors_cagra.rst +++ b/docs/source/cpp_api/neighbors_cagra.rst @@ -28,7 +28,7 @@ Index search parameters :content-only: Index extend parameters ----------------------- +----------------------- .. doxygengroup:: cagra_cpp_extend_params :project: cuvs @@ -36,7 +36,7 @@ Index extend parameters :content-only: Index extend memory buffers ----------------------- +--------------------------- .. doxygengroup:: cagra_cpp_extend_memory_buffers :project: cuvs @@ -68,10 +68,17 @@ Index search :content-only: Index extend ------------ +------------ .. doxygengroup:: cagra_cpp_index_extend :project: cuvs :members: :content-only: +Index serialize +--------------- + +.. doxygengroup:: cagra_cpp_serialize + :project: cuvs + :members: + :content-only: diff --git a/docs/source/cpp_api/neighbors_hnsw.rst b/docs/source/cpp_api/neighbors_hnsw.rst new file mode 100644 index 0000000000..b0af88af00 --- /dev/null +++ b/docs/source/cpp_api/neighbors_hnsw.rst @@ -0,0 +1,52 @@ +HNSW +==== + +This is a wrapper for hnswlib, to load a CAGRA index as an immutable HNSW index. The loaded HNSW index is only compatible in cuVS, and can be searched using wrapper functions. + +.. role:: py(code) + :language: c++ + :class: highlight + +``#include `` + +namespace *cuvs::neighbors::hnsw* + +Index search parameters +----------------------- + +.. doxygengroup:: hnsw_cpp_search_params + :project: cuvs + :members: + :content-only: + +Index +----- + +.. doxygengroup:: hnsw_cpp_index + :project: cuvs + :members: + :content-only: + +Index load +------------ + +.. doxygengroup:: hnsw_cpp_index_search + :project: cuvs + :members: + :content-only: + +Index search +------------ + +.. doxygengroup:: hnsw_cpp_index_search + :project: cuvs + :members: + :content-only: + +Index deserialize +--------------- + +.. doxygengroup:: hnsw_cpp_index_deserialize + :project: cuvs + :members: + :content-only: diff --git a/docs/source/cpp_api/stats.rst b/docs/source/cpp_api/stats.rst new file mode 100644 index 0000000000..80d6c65fc9 --- /dev/null +++ b/docs/source/cpp_api/stats.rst @@ -0,0 +1,35 @@ +Stats +===== + + +This page provides C++ class references for the publicly-exposed elements of the `cuvs/stats` +package. + +.. role:: py(code) + :language: c++ + :class: highlight + +Silhouette Score +---------------- + +``#include `` + +namespace *cuvs::stats* + +.. doxygengroup:: stats_silhouette_score + :project: cuvs + :members: + :content-only: + +Trustworthiness Score +--------------------- + +``#include `` + +namespace *cuvs::stats* + +.. doxygengroup:: stats_trustworthiness + :project: cuvs + :members: + :content-only: + diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt index fb508728f0..d47cd4f1cb 100644 --- a/examples/c/CMakeLists.txt +++ b/examples/c/CMakeLists.txt @@ -35,3 +35,7 @@ include(../cmake/thirdparty/get_cuvs.cmake) add_executable(CAGRA_C_EXAMPLE src/cagra_c_example.c) target_include_directories(CAGRA_C_EXAMPLE PUBLIC "$") target_link_libraries(CAGRA_C_EXAMPLE PRIVATE cuvs::c_api $) + +add_executable(L2_C_EXAMPLE src/L2_c_example.c) +target_include_directories(L2_C_EXAMPLE PUBLIC "$") +target_link_libraries(L2_C_EXAMPLE PRIVATE cuvs::c_api $) diff --git a/examples/c/src/L2_c_example.c b/examples/c/src/L2_c_example.c new file mode 100644 index 0000000000..73ddf61039 --- /dev/null +++ b/examples/c/src/L2_c_example.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +#include +#include +#include + +#define DIM 4 +#define N_ROWS 1 + +float PointA[N_ROWS][DIM] = {1.0,2.0,3.0,4.0}; +float PointB[N_ROWS][DIM] = {2.0,3.0,4.0,5.0}; + +cuvsResources_t res; + +void outputVector(float * Vec) { + printf("Vector is "); + for (int i = 0; i < DIM; ++i){ + printf(" %f",Vec[i]); + } + printf("\n"); +} + +/** + * @brief Initialize Tensor. + * + * @param[in] x_d Pointer to a vector + * @param[in] x_shape[] Two-dimensional array, which stores the number of rows and columns of vectors. + * @param[out] x_tensor Stores the initialized DLManagedTensor. + */ +void tensor_initialize(float* x_d, int64_t x_shape[2], DLManagedTensor* x_tensor) { + x_tensor->dl_tensor.data = x_d; + x_tensor->dl_tensor.device.device_type = kDLCUDA; + x_tensor->dl_tensor.ndim = 2; + x_tensor->dl_tensor.dtype.code = kDLFloat; + x_tensor->dl_tensor.dtype.bits = 32; + x_tensor->dl_tensor.dtype.lanes = 1; + x_tensor->dl_tensor.shape = x_shape; + x_tensor->dl_tensor.strides = NULL; +} + +/** + * @brief Calculate the euclidean distance between two arrays. + * + * @param[in] n_cols array length,also the dimension of the vector + * @param[in] x[] Pointer to a vector + * @param[in] y[] Pointer to another vector + * @param[out] ret will store the result about the euclidean distance + */ +void l2_distance_calc(int64_t n_cols,float x[], float y[], float *ret) { + float *x_d, *y_d; + float *distance_d; + cuvsRMMAlloc(res, (void**) &x_d, sizeof(float) * N_ROWS * n_cols); + cuvsRMMAlloc(res, (void**) &y_d, sizeof(float) * N_ROWS * n_cols); + cuvsRMMAlloc(res, (void**) &distance_d, sizeof(float) * N_ROWS * N_ROWS); + + // Use DLPack to represent x[] and y[] as tensors + cudaMemcpy(x_d, x, sizeof(float) * N_ROWS * n_cols, cudaMemcpyDefault); + cudaMemcpy(y_d, y, sizeof(float) * N_ROWS * n_cols, cudaMemcpyDefault); + + DLManagedTensor x_tensor; + int64_t x_shape[2] = {N_ROWS, n_cols}; + tensor_initialize(x_d, x_shape, &x_tensor); + + DLManagedTensor y_tensor; + int64_t y_shape[2] = {N_ROWS, n_cols}; + tensor_initialize(y_d, y_shape, &y_tensor); + + DLManagedTensor dist_tensor; + int64_t distances_shape[2] = {N_ROWS, N_ROWS}; + tensor_initialize(distance_d, distances_shape, &dist_tensor); + + // metric_arg default value is 2.0,used for Minkowski distance + cuvsPairwiseDistance(res, &x_tensor, &y_tensor, &dist_tensor, L2SqrtUnexpanded, 2.0); + + cudaMemcpy(ret, distance_d, sizeof(float) * N_ROWS * N_ROWS, cudaMemcpyDefault); + + cuvsRMMFree(res, distance_d, sizeof(float) * N_ROWS * N_ROWS); + cuvsRMMFree(res, x_d, sizeof(float) * N_ROWS * n_cols); + cuvsRMMFree(res, y_d, sizeof(float) * N_ROWS * n_cols); + +} + +int euclidean_distance_calculation_example() { + // Create a cuvsResources_t object + cuvsResourcesCreate(&res); + + outputVector((float *)PointA); + outputVector((float *)PointB); + + float ret; + + l2_distance_calc(DIM, (float *)PointA, (float *)PointB, &ret); + printf("L2 distance is %f.\n", ret); + + cuvsResourcesDestroy(res); + + return 0; +} + +int main() { + euclidean_distance_calculation_example(); + return 0; +} diff --git a/notebooks/VectorSearch_QuestionRetrieval.ipynb b/notebooks/VectorSearch_QuestionRetrieval.ipynb index 4023a1821b..21d59975bb 100644 --- a/notebooks/VectorSearch_QuestionRetrieval.ipynb +++ b/notebooks/VectorSearch_QuestionRetrieval.ipynb @@ -344,7 +344,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/notebooks/ivf_flat_example.ipynb b/notebooks/ivf_flat_example.ipynb index 2d9c5fb58e..ce35866833 100644 --- a/notebooks/ivf_flat_example.ipynb +++ b/notebooks/ivf_flat_example.ipynb @@ -520,6 +520,30 @@ "metadata": {}, "outputs": [], "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23010fbc-8f5a-4403-a112-33f190a85498", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "774848e8-fa45-4223-bd2a-e8585650531e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6309b8a7-f4eb-4976-a824-cd4499a0000d", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -538,7 +562,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/notebooks/tutorial_ivf_pq.ipynb b/notebooks/tutorial_ivf_pq.ipynb index cc0fe41428..9d59daea23 100644 --- a/notebooks/tutorial_ivf_pq.ipynb +++ b/notebooks/tutorial_ivf_pq.ipynb @@ -124,6 +124,7 @@ "outputs": [], "source": [ "DATASET_URL = \"http://ann-benchmarks.com/sift-128-euclidean.hdf5\"\n", + "DATASET_NAME = \"SIFT-128\"\n", "f = load_dataset(DATASET_URL)" ] }, @@ -206,7 +207,7 @@ "# This function takes a row-major either numpy or cupy (GPU) array.\n", "# Generally, it's a bit faster with GPU inputs, but the CPU version may come in handy\n", "# if the whole dataset cannot fit into GPU memory.\n", - "index = ivf_pq.build(index_params, dataset, handle=resources)\n", + "index = ivf_pq.build(index_params, dataset, resources=resources)\n", "# This function is asynchronous so we need to explicitly synchronize the GPU before we can measure the execution time\n", "resources.sync()\n", "index" @@ -262,7 +263,7 @@ "outputs": [], "source": [ "%%time\n", - "distances, neighbors = ivf_pq.search(search_params, index, queries, k, handle=resources)\n", + "distances, neighbors = ivf_pq.search(search_params, index, queries, k, resources=resources)\n", "# Sync the GPU to make sure we've got the timing right\n", "resources.sync()" ] @@ -303,8 +304,8 @@ "source": [ "%%time\n", "\n", - "candidates = ivf_pq.search(search_params, index, queries, k * 2, handle=resources)[1]\n", - "distances, neighbors = refine(dataset, queries, candidates, k, handle=resources)\n", + "candidates = ivf_pq.search(search_params, index, queries, k * 2, resources=resources)[1]\n", + "distances, neighbors = refine(dataset, queries, candidates, k, resources=resources)\n", "resources.sync()" ] }, @@ -349,7 +350,7 @@ "bench_avg = np.zeros_like(bench_k, dtype=np.float32)\n", "bench_std = np.zeros_like(bench_k, dtype=np.float32)\n", "for i, k in enumerate(bench_k):\n", - " r = %timeit -o ivf_pq.search(search_params, index, queries, k, handle=resources); resources.sync()\n", + " r = %timeit -o ivf_pq.search(search_params, index, queries, k, resources=resources); resources.sync()\n", " bench_avg[i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n", " bench_std[i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).std()\n", "\n", @@ -387,9 +388,9 @@ "k = 100\n", "for i, n_probes in enumerate(bench_probes):\n", " sp = ivf_pq.SearchParams(n_probes=n_probes)\n", - " r = %timeit -o ivf_pq.search(sp, index, queries, k, handle=resources); resources.sync()\n", + " r = %timeit -o ivf_pq.search(sp, index, queries, k, resources=resources); resources.sync()\n", " bench_qps[i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n", - " bench_recall[i] = calc_recall(ivf_pq.search(sp, index, queries, k, handle=resources)[1], gt_neighbors)\n", + " bench_recall[i] = calc_recall(ivf_pq.search(sp, index, queries, k, resources=resources)[1], gt_neighbors)\n", " " ] }, @@ -492,9 +493,9 @@ "bench_names = ['32/32', '32/16', '32/8', '16/16', '16/8']\n", "\n", "for i, sp in enumerate(search_ps):\n", - " r = %timeit -o ivf_pq.search(sp, index, queries, k, handle=resources); resources.sync()\n", + " r = %timeit -o ivf_pq.search(sp, index, queries, k, resources=resources); resources.sync()\n", " bench_qps_s1[i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n", - " bench_recall_s1[i] = calc_recall(ivf_pq.search(sp, index, queries, k, handle=resources)[1], gt_neighbors)" + " bench_recall_s1[i] = calc_recall(ivf_pq.search(sp, index, queries, k, resources=resources)[1], gt_neighbors)" ] }, { @@ -505,7 +506,7 @@ "source": [ "fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1/2))\n", "fig.suptitle(\n", - " f'Effects of search parameters on QPS/recall trade-off ({DATASET_FILENAME})\\n' + \\\n", + " f'Effects of search parameters on QPS/recall trade-off ({DATASET_NAME})\\n' + \\\n", " f'k = {k}, n_probes = {n_probes}, pq_dim = {pq_dim}')\n", "ax.plot(bench_recall_s1, bench_qps_s1, 'o')\n", "ax.set_xlabel('recall')\n", @@ -553,8 +554,8 @@ "source": [ "def search_refine(ps, ratio):\n", " k_search = k * ratio\n", - " candidates = ivf_pq.search(ps, index, queries, k_search, handle=resources)[1]\n", - " return candidates if ratio == 1 else refine(dataset, queries, candidates, k, handle=resources)[1]\n", + " candidates = ivf_pq.search(ps, index, queries, k_search, resources=resources)[1]\n", + " return candidates if ratio == 1 else refine(dataset, queries, candidates, k, resources=resources)[1]\n", "\n", "ratios = [1, 2, 4]\n", "bench_qps_sr = np.zeros((len(ratios), len(search_ps)), dtype=np.float32)\n", @@ -575,7 +576,7 @@ "source": [ "fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1/2))\n", "fig.suptitle(\n", - " f'Effects of search parameters on QPS/recall trade-off ({DATASET_FILENAME})\\n' + \\\n", + " f'Effects of search parameters on QPS/recall trade-off ({DATASET_NAME})\\n' + \\\n", " f'k = {k}, n_probes = {n_probes}, pq_dim = {pq_dim}')\n", "labels = []\n", "for j, ratio in enumerate(ratios):\n", @@ -629,8 +630,8 @@ " n_probes=n_probes,\n", " internal_distance_dtype=internal_distance_dtype,\n", " lut_dtype=lut_dtype)\n", - " candidates = ivf_pq.search(ps, index, queries, k_search, handle=resources)[1]\n", - " return candidates if ratio == 1 else refine(dataset, queries, candidates, k, handle=resources)[1]\n", + " candidates = ivf_pq.search(ps, index, queries, k_search, resources=resources)[1]\n", + " return candidates if ratio == 1 else refine(dataset, queries, candidates, k, resources=resources)[1]\n", "\n", "search_configs = [\n", " lambda n_probes: search_refine(np.float16, np.float16, 1, n_probes),\n", @@ -703,12 +704,13 @@ "\n", "for i, n_lists in enumerate(n_list_variants):\n", " index_params = ivf_pq.IndexParams(n_lists=n_lists, metric=metric, pq_dim=pq_dim)\n", - " index = ivf_pq.build(index_params, dataset, handle=resources)\n", + " index = ivf_pq.build(index_params, dataset, resources=resources)\n", " for j, pl_ratio in enumerate(pl_ratio_variants):\n", " n_probes = max(1, n_lists // pl_ratio)\n", " r = %timeit -o search_fun(n_probes); resources.sync()\n", " bench_qps_nl[i, j] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n", - " bench_recall_nl[i, j] = calc_recall(search_fun(n_probes), gt_neighbors)" + " bench_recall_nl[i, j] = calc_recall(search_fun(n_probes), gt_neighbors)\n", + " del index" ] }, { @@ -719,7 +721,7 @@ "source": [ "fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1/2))\n", "fig.suptitle(\n", - " f'Effects of n_list on QPS/recall trade-off ({DATASET_FILENAME})\\n' + \\\n", + " f'Effects of n_list on QPS/recall trade-off ({DATASET_NAME})\\n' + \\\n", " f'k = {k}, pq_dim = {pq_dim}, search = {search_label}')\n", "labels = []\n", "for i, n_lists in enumerate(n_list_variants):\n", @@ -875,7 +877,7 @@ "bench_recall_ip = np.zeros_like(bench_qps_ip, dtype=np.float32)\n", "\n", "for i, index_params in enumerate(build_configs.values()):\n", - " index = ivf_pq.build(index_params, dataset, handle=resources)\n", + " index = ivf_pq.build(index_params, dataset, resources=resources)\n", " for l, search_fun in enumerate(search_configs):\n", " for j, n_probes in enumerate(n_probes_variants):\n", " r = %timeit -o search_fun(n_probes); resources.sync()\n", @@ -891,7 +893,7 @@ "source": [ "fig, ax = plt.subplots(len(search_config_names), 1, figsize=(16, len(search_config_names)*8))\n", "fig.suptitle(\n", - " f'Effects of index parameters on QPS/recall trade-off ({DATASET_FILENAME})\\n' + \\\n", + " f'Effects of index parameters on QPS/recall trade-off ({DATASET_NAME})\\n' + \\\n", " f'k = {k}, n_lists = {n_lists}')\n", "\n", "for j, search_label in enumerate(search_config_names):\n", @@ -932,7 +934,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.9" }, "vscode": { "interpreter": { diff --git a/pyproject.toml b/pyproject.toml index 2982db2a23..fbf4cf41fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.black] line-length = 79 -target-version = ["py39"] +target-version = ["py310"] include = '\.py?$' force-exclude = ''' /( diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt index 4d24682cfb..7d2f8dcf90 100644 --- a/python/cuvs/CMakeLists.txt +++ b/python/cuvs/CMakeLists.txt @@ -37,6 +37,7 @@ project( option(FIND_CUVS_CPP "Search for existing CUVS C++ installations before defaulting to local files" OFF ) +option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF) message( "CUVS_PY: Searching for existing cuVS C/C++ installations before defaulting to local files: ${FIND_CUVS_CPP}" @@ -62,6 +63,8 @@ else() endif() if(NOT cuvs_FOUND) + find_package(CUDAToolkit REQUIRED) + set(BUILD_TESTS OFF) set(BUILD_C_LIBRARY ON) @@ -70,8 +73,26 @@ if(NOT cuvs_FOUND) set(CUDA_STATIC_MATH_LIBRARIES ON) set(CUVS_USE_RAFT_STATIC ON) + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0) + set(CUDA_STATIC_MATH_LIBRARIES OFF) + elseif(USE_CUDA_MATH_WHEELS) + message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0") + endif() + add_subdirectory(../../cpp cuvs-cpp EXCLUDE_FROM_ALL) + if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS) + set(rpaths + "$ORIGIN/../nvidia/cublas/lib" + "$ORIGIN/../nvidia/curand/lib" + "$ORIGIN/../nvidia/cusolver/lib" + "$ORIGIN/../nvidia/cusparse/lib" + "$ORIGIN/../nvidia/nvjitlink/lib" + ) + set_property(TARGET cuvs PROPERTY INSTALL_RPATH ${rpaths} APPEND) + set_property(TARGET cuvs_c PROPERTY INSTALL_RPATH ${rpaths} APPEND) + endif() + set(cython_lib_dir cuvs) install(TARGETS cuvs cuvs_c DESTINATION ${cython_lib_dir}) endif() diff --git a/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx b/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx index a3799144c7..25b9b2aeeb 100644 --- a/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx +++ b/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx @@ -58,12 +58,14 @@ cdef class IndexParams: metric : str, default = "sqeuclidean" String denoting the metric type. Valid values for metric: ["sqeuclidean", "inner_product", - "euclidean"], where + "euclidean", "cosine"], where - sqeuclidean is the euclidean distance without the square root operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2, - euclidean is the euclidean distance - inner product distance is defined as distance(a, b) = \\sum_i a_i * b_i. + - cosine distance is defined as + distance(a, b) = 1 - \\sum_i a_i * b_i / ( ||a||_2 * ||b||_2). kmeans_n_iters : int, default = 20 The number of iterations searching for kmeans centers during index building. diff --git a/python/cuvs/cuvs/test/test_ivf_flat.py b/python/cuvs/cuvs/test/test_ivf_flat.py index bb50d35734..9dd4097dcb 100644 --- a/python/cuvs/cuvs/test/test_ivf_flat.py +++ b/python/cuvs/cuvs/test/test_ivf_flat.py @@ -92,6 +92,7 @@ def run_ivf_flat_build_search_test( skl_metric = { "sqeuclidean": "sqeuclidean", "inner_product": "cosine", + "cosine": "cosine", "euclidean": "euclidean", }[metric] nn_skl = NearestNeighbors( @@ -107,7 +108,7 @@ def run_ivf_flat_build_search_test( @pytest.mark.parametrize("inplace", [True, False]) @pytest.mark.parametrize("dtype", [np.float32]) @pytest.mark.parametrize( - "metric", ["sqeuclidean", "inner_product", "euclidean"] + "metric", ["sqeuclidean", "inner_product", "euclidean", "cosine"] ) def test_ivf_flat(inplace, dtype, metric): run_ivf_flat_build_search_test( diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml index cc2aa6a7fe..68bd9a8688 100644 --- a/python/cuvs/pyproject.toml +++ b/python/cuvs/pyproject.toml @@ -29,17 +29,22 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache 2.0" } -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = [ "cuda-python", - "numpy>=1.23,<2.0a0", + "numpy>=1.23,<3.0a0", + "nvidia-cublas", + "nvidia-curand", + "nvidia-cusolver", + "nvidia-cusparse", "pylibraft==24.10.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] [project.optional-dependencies] @@ -123,12 +128,10 @@ requires = [ "cuda-python", "cython>=3.0.0", "ninja", - "pylibraft==24.10.*,>=0.0.0a0", - "rmm==24.10.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. build-backend = "scikit_build_core.build" dependencies-file = "../../dependencies.yaml" -matrix-entry = "cuda_suffixed=true" +matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" [tool.pytest.ini_options] filterwarnings = [ diff --git a/python/cuvs_bench/LICENSE b/python/cuvs_bench/LICENSE deleted file mode 100644 index 1a89b9054d..0000000000 --- a/python/cuvs_bench/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 NVIDIA Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/python/cuvs_bench/LICENSE b/python/cuvs_bench/LICENSE new file mode 120000 index 0000000000..30cff7403d --- /dev/null +++ b/python/cuvs_bench/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/python/cuvs_bench/cuvs_bench/VERSION b/python/cuvs_bench/cuvs_bench/VERSION deleted file mode 100644 index 7c7ba04436..0000000000 --- a/python/cuvs_bench/cuvs_bench/VERSION +++ /dev/null @@ -1 +0,0 @@ -24.10.00 diff --git a/python/cuvs_bench/cuvs_bench/VERSION b/python/cuvs_bench/cuvs_bench/VERSION new file mode 120000 index 0000000000..d62dc733ef --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/VERSION @@ -0,0 +1 @@ +../../../VERSION \ No newline at end of file diff --git a/python/cuvs_bench/cuvs_bench/run/__main__.py b/python/cuvs_bench/cuvs_bench/run/__main__.py index 5590105b63..96a8d55039 100644 --- a/python/cuvs_bench/cuvs_bench/run/__main__.py +++ b/python/cuvs_bench/cuvs_bench/run/__main__.py @@ -27,7 +27,6 @@ @click.option( "--subset-size", type=click.IntRange(min=1), - prompt='Enter the subset size', help="The number of subset rows of the dataset to build the index" ) @click.option( @@ -49,12 +48,12 @@ ) @click.option( "--dataset-configuration", - prompt='Enter the path to YAML configuration file for datasets', + default=None, + show_default=True, help="Path to YAML configuration file for datasets" ) @click.option( "--configuration", - prompt='Enter the path to YAML configuration file or directory for algorithms', help="Path to YAML configuration file or directory for algorithms. " "Any run groups found in the specified file/directory will " "automatically override groups of the same name present in the " @@ -88,7 +87,7 @@ ) @click.option( "--algorithms", - default=None, + default="cuvs_cagra", show_default=True, prompt='Enter the comma separated list of named algorithms to run', help="Run only comma separated list of named algorithms. If parameters `groups` and `algo-groups` are both undefined, " @@ -103,7 +102,6 @@ ) @click.option( "--algo-groups", - prompt='Enter the comma separated . to run', help='Add comma separated . to run. Example usage: "--algo-groups=raft_cagra.large,hnswlib.large".', ) @click.option( @@ -125,7 +123,6 @@ "--search-threads", default=None, show_default=True, - prompt='Enter the number of threads to use for throughput benchmark', help="Specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. " "Example: --search-threads=1:4. Power of 2 values between 'min' and 'max' will be used. " "If only 'min' is specified, then a single test is run with 'min' threads. By default min=1, max=." diff --git a/python/cuvs_bench/cuvs_bench/run/run.py b/python/cuvs_bench/cuvs_bench/run/run.py index db9628b80c..2908b9deaf 100644 --- a/python/cuvs_bench/cuvs_bench/run/run.py +++ b/python/cuvs_bench/cuvs_bench/run/run.py @@ -24,6 +24,21 @@ from .runners import cuvs_bench_cpp +def rmm_present() -> bool: + """ + Check if RMM (RAPIDS Memory Manager) is present. + Returns + ------- + bool + True if RMM is present, False otherwise. + """ + try: + import rmm # noqa: F401 + return True + except ImportError: + return False + + def load_yaml_file(file_path: str) -> dict: """ Load a YAML file and return its contents as a dictionary. @@ -188,101 +203,31 @@ def prepare_executables(algos_conf: dict, algos_yaml: dict, gpu_present: bool, c executable = find_executable(algos_yaml, algo, group, count, batch_size) if executable not in executables_to_run: executables_to_run[executable] = {"index": []} - indexes = prepare_indexes(group_conf, algo, group, conf_file, dataset_path, dataset, count, batch_size) + indexes = prepare_indexes(group_conf, algo, group, conf_file, algos_conf, dataset_path, dataset, count, batch_size) executables_to_run[executable]["index"].extend(indexes) return executables_to_run -def prepare_indexes(group_conf: dict, algo: str, group: str, conf_file: dict, dataset_path: str, dataset: str, count: int, batch_size: int) -> list: - """ - Prepare the index configurations for the given algorithm and group. - Parameters - ---------- - group_conf : dict - The configuration for the algorithm group. - algo : str - The name of the algorithm. - group : str - The name of the group. - conf_file : dict - The main configuration file. - dataset_path : str - The path to the dataset directory. - dataset : str - The name of the dataset. - count : int - The number of nearest neighbors to search for. - batch_size : int - The size of each batch for processing. - Returns - ------- - list - A list of index configurations. - """ - indexes = [] - build_params = group_conf.get("build", {}) - search_params = group_conf.get("search", {}) - all_build_params = itertools.product(*build_params.values()) - search_param_names, search_param_lists = zip(*search_params.items()) if search_params else ([], []) - for params in all_build_params: - index = {"algo": algo, "build_param": dict(zip(build_params.keys(), params))} - index_name = f"{algo}_{group}" if group != "base" else f"{algo}" - index_filename = index_name if len(index_name) < 128 else str(hash(index_name)) - index["name"] = index_name - index["file"] = os.path.join(dataset_path, dataset, "index", index_filename) - index["search_params"] = validate_search_params( - itertools.product(*search_param_lists), search_param_names, algo, group_conf, conf_file, count, batch_size - ) - if index["search_params"]: - indexes.append(index) - return indexes - - -def validate_search_params(all_search_params, search_param_names, algo, group_conf, conf_file, count, batch_size) -> list: +def validate_algorithm(algos_conf: dict, algo: str, gpu_present: bool) -> bool: """ - Validate and prepare the search parameters for the given algorithm and group. + Validate the algorithm based on the available hardware (GPU presence). Parameters ---------- - all_search_params : itertools.product - The Cartesian product of search parameter values. - search_param_names : list - The names of the search parameters. + algos_conf : dict + The configuration dictionary for the algorithms. algo : str The name of the algorithm. - group_conf : dict - The configuration for the algorithm group. - conf_file : dict - The main configuration file. - count : int - The number of nearest neighbors to search for. - batch_size : int - The size of each batch for processing. - Returns - ------- - list - A list of validated search parameters. - """ - search_params_list = [] - for search_params in all_search_params: - search_dict = dict(zip(search_param_names, search_params)) - if validate_constraints(group_conf, algo, "search", search_dict, conf_file["dataset"].get("dims"), count, batch_size): - search_params_list.append(search_dict) - return search_params_list - - -def rmm_present() -> bool: - """ - Check if RMM (RAPIDS Memory Manager) is present. + gpu_present : bool + Whether a GPU is present. Returns ------- bool - True if RMM is present, False otherwise. + True if the algorithm is valid for the current hardware configuration, False otherwise. """ - try: - import rmm # noqa: F401 - return True - except ImportError: - return False + algos_conf_keys = set(algos_conf.keys()) + if gpu_present: + return algo in algos_conf_keys + return algo in algos_conf_keys and algos_conf[algo]["requires_gpu"] is False def find_executable(algos_conf: dict, algo: str, group: str, k: int, batch_size: int) -> Tuple[str, str, Tuple[str, str]]: @@ -348,26 +293,81 @@ def get_build_path(executable: str) -> Optional[str]: return None -def validate_algorithm(algos_conf: dict, algo: str, gpu_present: bool) -> bool: +def prepare_indexes(group_conf: dict, algo: str, group: str, conf_file: dict, algos_conf: dict, dataset_path: str, dataset: str, count: int, batch_size: int) -> list: """ - Validate the algorithm based on the available hardware (GPU presence). + Prepare the index configurations for the given algorithm and group. Parameters ---------- - algos_conf : dict - The configuration dictionary for the algorithms. + group_conf : dict + The configuration for the algorithm group. algo : str The name of the algorithm. - gpu_present : bool - Whether a GPU is present. + group : str + The name of the group. + conf_file : dict + The main configuration file. + dataset_path : str + The path to the dataset directory. + dataset : str + The name of the dataset. + count : int + The number of nearest neighbors to search for. + batch_size : int + The size of each batch for processing. Returns ------- - bool - True if the algorithm is valid for the current hardware configuration, False otherwise. + list + A list of index configurations. """ - algos_conf_keys = set(algos_conf.keys()) - if gpu_present: - return algo in algos_conf_keys - return algo in algos_conf_keys and algos_conf[algo]["requires_gpu"] is False + indexes = [] + build_params = group_conf.get("build", {}) + search_params = group_conf.get("search", {}) + all_build_params = itertools.product(*build_params.values()) + search_param_names, search_param_lists = zip(*search_params.items()) if search_params else ([], []) + for params in all_build_params: + index = {"algo": algo, "build_param": dict(zip(build_params.keys(), params))} + index_name = f"{algo}_{group}" if group != "base" else f"{algo}" + index_filename = index_name if len(index_name) < 128 else str(hash(index_name)) + index["name"] = index_name + index["file"] = os.path.join(dataset_path, dataset, "index", index_filename) + index["search_params"] = validate_search_params( + itertools.product(*search_param_lists), search_param_names, index["build_param"], algo, group_conf, algos_conf, conf_file, count, batch_size + ) + if index["search_params"]: + indexes.append(index) + return indexes + + +def validate_search_params(all_search_params, search_param_names, build_params, algo, group_conf, algos_conf, conf_file, count, batch_size) -> list: + """ + Validate and prepare the search parameters for the given algorithm and group. + Parameters + ---------- + all_search_params : itertools.product + The Cartesian product of search parameter values. + search_param_names : list + The names of the search parameters. + algo : str + The name of the algorithm. + group_conf : dict + The configuration for the algorithm group. + conf_file : dict + The main configuration file. + count : int + The number of nearest neighbors to search for. + batch_size : int + The size of each batch for processing. + Returns + ------- + list + A list of validated search parameters. + """ + search_params_list = [] + for search_params in all_search_params: + search_dict = dict(zip(search_param_names, search_params)) + if validate_constraints(algos_conf, algo, "search", search_dict, build_params, conf_file["dataset"].get("dims"), count, batch_size): + search_params_list.append(search_dict) + return search_params_list def validate_constraints( @@ -375,6 +375,7 @@ def validate_constraints( algo: str, constraint_type: str, param: Dict[str, Any], + build_param: dict, dims: Any, k: Optional[int], batch_size: Optional[int] @@ -411,9 +412,13 @@ def validate_constraints( module, func = ".".join(importable.split(".")[:-1]), importable.split(".")[-1] validator = import_module(module) constraints_func = getattr(validator, func) - if constraint_type == "build" and "dims" not in conf_file["dataset"]: - raise ValueError("`dims` needed for build constraints but not specified in datasets.yaml") - return constraints_func(param, dims) + if constraint_type == "build": + if "dims" not in conf_file["dataset"]: + raise ValueError("`dims` needed for build constraints but not specified in datasets.yaml") + else: + return constraints_func(param, dims) + else: + return constraints_func(param, build_param, k, batch_size) return True diff --git a/python/cuvs_bench/pyproject.toml b/python/cuvs_bench/pyproject.toml index 9894dfc171..468cd47bfb 100644 --- a/python/cuvs_bench/pyproject.toml +++ b/python/cuvs_bench/pyproject.toml @@ -1,6 +1,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION. [build-system] +build-backend = "rapids_build_backend.build" requires = [ "rapids-build-backend>=0.3.0,<0.4.0.dev0", "setuptools", @@ -8,14 +9,14 @@ requires = [ ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project] -name = "cuvs_bench" +name = "cuvs-bench" dynamic = ["version"] -description = "cuVS benchmarks" +description = "RAFT ANN benchmarks" authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache 2.0" } -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = [ ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -24,21 +25,17 @@ classifiers = [ "Topic :: Scientific/Engineering", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] -[tool.rapids-build-backend] -build-backend = "setuptools.build_meta" -dependencies-file = "../../dependencies.yaml" -matrix-entry = "cuda_suffixed=true" - [project.urls] -Homepage = "https://github.com/rapidsai/cuvs" +Homepage = "https://github.com/rapidsai/raft" -[tool.setuptools] -license-files = ["LICENSE"] + +[tool.setuptools.package-data] +"*" = ["*.*", "VERSION"] [tool.isort] line_length = 79 @@ -64,3 +61,9 @@ skip = [ [tool.setuptools.dynamic] version = { file = "cuvs_bench/VERSION" } +[tool.rapids-build-backend] +build-backend = "setuptools.build_meta" +requires = [] +dependencies-file = "../../dependencies.yaml" +commit-files = ["src/cuvs_bench/GIT_COMMIT"] +matrix-entry = "cuda_suffixed=true" \ No newline at end of file From 742e7c5d547812a074db2e7c0e25e0c0278e18a7 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 18 Sep 2024 14:22:49 -0700 Subject: [PATCH 10/18] fix runners --- build.sh | 5 + dependencies.yaml | 1 + .../cuvs_bench/config/algos/cuvs_cagra.yaml | 2 +- python/cuvs_bench/cuvs_bench/run/__main__.py | 104 +++---- python/cuvs_bench/cuvs_bench/run/run.py | 253 +++++++++++++++--- 5 files changed, 271 insertions(+), 94 deletions(-) diff --git a/build.sh b/build.sh index a283bcd070..e70f26f32a 100755 --- a/build.sh +++ b/build.sh @@ -419,6 +419,11 @@ if (( ${NUMARGS} == 0 )) || hasArg python; then python -m pip install --no-build-isolation --no-deps --config-settings rapidsai.disable-cuda=true ${REPODIR}/python/cuvs fi +# Build and (optionally) install the cuvs_bench Python package +if (( ${NUMARGS} == 0 )) || hasArg bench-ann; then + python -m pip install --no-build-isolation --no-deps --config-settings rapidsai.disable-cuda=true ${REPODIR}/python/cuvs_bench -vvv +fi + # Build the cuvs Rust bindings if (( ${NUMARGS} == 0 )) || hasArg rust; then cd ${REPODIR}/rust diff --git a/dependencies.yaml b/dependencies.yaml index 9fcbeaae2f..8fac79d951 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -38,6 +38,7 @@ files: - develop - bench - bench_python + - rapids_build_setuptools test_cpp: output: none includes: diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra.yaml index 4b0e0289b2..edacb25b5f 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra.yaml @@ -5,7 +5,7 @@ constraints: groups: base: build: - graph_degree: [32, 64, 128, 256] + graph_degree: [32, 64, 96, 128] intermediate_graph_degree: [32, 64, 96, 128] graph_build_algo: ["NN_DESCENT"] search: diff --git a/python/cuvs_bench/cuvs_bench/run/__main__.py b/python/cuvs_bench/cuvs_bench/run/__main__.py index 96a8d55039..b26e1293fb 100644 --- a/python/cuvs_bench/cuvs_bench/run/__main__.py +++ b/python/cuvs_bench/cuvs_bench/run/__main__.py @@ -14,20 +14,19 @@ # limitations under the License. # -import click import os - from pathlib import Path from typing import Optional -from . import run_benchmark +import click +from run import run_benchmark @click.command() @click.option( "--subset-size", type=click.IntRange(min=1), - help="The number of subset rows of the dataset to build the index" + help="The number of subset rows of the dataset to build the index", ) @click.option( "-k", @@ -35,80 +34,78 @@ default=10, show_default=True, type=click.IntRange(min=1), - prompt='Enter the number of neighbors to search for', - help="The number of nearest neighbors to search for" + prompt="Enter the number of neighbors to search for", + help="The number of nearest neighbors to search for", ) @click.option( - "-bs", "--batch-size", + "-bs", + "--batch-size", default=10000, show_default=True, type=click.IntRange(min=1), - prompt='Enter the batch size', - help="Number of query vectors to use in each query trial" + prompt="Enter the batch size", + help="Number of query vectors to use in each query trial", ) @click.option( "--dataset-configuration", default=None, show_default=True, - help="Path to YAML configuration file for datasets" + help="Path to YAML configuration file for datasets", ) @click.option( "--configuration", help="Path to YAML configuration file or directory for algorithms. " - "Any run groups found in the specified file/directory will " - "automatically override groups of the same name present in the " - "default configurations, including `base`." + "Any run groups found in the specified file/directory will " + "automatically override groups of the same name present in the " + "default configurations, including `base`.", ) @click.option( "--dataset", default="glove-100-inner", show_default=True, - prompt='Enter the name of dataset', - help="Name of dataset" + prompt="Enter the name of dataset", + help="Name of dataset", ) @click.option( "--dataset-path", - default=lambda: os.environ.get("RAPIDS_DATASET_ROOT_DIR", - os.path.join(Path(__file__).parent, "datasets/")), + default=lambda: os.environ.get( + "RAPIDS_DATASET_ROOT_DIR", + os.path.join(Path(__file__).parent, "datasets/"), + ), show_default=True, - prompt='Enter the path to dataset folder', - help="Path to dataset folder, by default will look in RAPIDS_DATASET_ROOT_DIR if defined, " - "otherwise a datasets subdirectory from the calling directory." -) -@click.option( - "--build", - is_flag=True, - help="Build the index" -) -@click.option( - "--search", - is_flag=True, - help="Perform the search" + prompt="Enter the path to dataset folder", + help="Path to dataset folder, by default will look in " + "RAPIDS_DATASET_ROOT_DIR if defined, otherwise a datasets " + "subdirectory from the calling directory.", ) +@click.option("--build", is_flag=True, help="Build the index") +@click.option("--search", is_flag=True, help="Perform the search") @click.option( "--algorithms", default="cuvs_cagra", show_default=True, - prompt='Enter the comma separated list of named algorithms to run', - help="Run only comma separated list of named algorithms. If parameters `groups` and `algo-groups` are both undefined, " - "then group `base` is run by default." + prompt="Enter the comma separated list of named algorithms to run", + help="Run only comma separated list of named algorithms. If parameters " + "`groups` and `algo-groups` are both undefined, then group `base` " + "is run by default.", ) @click.option( "--groups", default="base", show_default=True, - prompt='Enter the comma separated groups of parameters', - help="Run only comma separated groups of parameters" + prompt="Enter the comma separated groups of parameters", + help="Run only comma separated groups of parameters", ) @click.option( "--algo-groups", - help='Add comma separated . to run. Example usage: "--algo-groups=raft_cagra.large,hnswlib.large".', + help="Add comma separated . to run. Example usage: " + ' "--algo-groups=raft_cagra.large,hnswlib.large".', ) @click.option( "-f", "--force", is_flag=True, - help="Re-run algorithms even if their results already exist" + help="Re-run algorithms even if their results already exist", ) @click.option( "-m", @@ -116,33 +113,40 @@ default="latency", show_default=True, prompt='Enter the search mode ("latency" or "throughput")', - help="Run search in 'latency' (measure individual batches) or 'throughput' (pipeline batches and measure end-to-end) mode." + help="Run search in 'latency' (measure individual batches) or " + "'throughput' (pipeline batches and measure end-to-end) mode.", ) @click.option( "-t", "--search-threads", default=None, show_default=True, - help="Specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. " - "Example: --search-threads=1:4. Power of 2 values between 'min' and 'max' will be used. " - "If only 'min' is specified, then a single test is run with 'min' threads. By default min=1, max=." + help="Specify the number threads to use for throughput benchmark. " + "Single value or a pair of min and max separated by ':'. " + "Example: --search-threads=1:4. Power of 2 values between 'min' " + "and 'max' will be used. If only 'min' is specified, then a single " + "test is run with 'min' threads. By default min=1, " + "max=.", ) @click.option( "-r", "--dry-run", is_flag=True, - help="Dry-run mode will convert the yaml config for the specified algorithms and datasets to the json format that’s consumed " - "by the lower-level c++ binaries and then print the command to run execute the benchmarks but will not actually execute " - "the command." + help="Dry-run mode will convert the yaml config for the specified " + "algorithms and datasets to the json format that’s consumed " + "by the lower-level c++ binaries and then print the command to " + "run execute the benchmarks but will not actually execute " + "the command.", ) @click.option( "--raft-log-level", default="info", show_default=True, - prompt='Enter the log level', - help="Log level, possible values are [off, error, warn, info, debug, trace]. Default: 'info'. " - "Note that 'debug' or more detailed logging level requires that the library is compiled with " - "-DRAFT_ACTIVE_LEVEL= where >= ." + prompt="Enter the log level", + help="Log level, possible values are [off, error, warn, info, debug, " + "trace]. Default: 'info'. Note that 'debug' or more detailed " + "logging level requires that the library is compiled with " + "-DRAFT_ACTIVE_LEVEL= where >= .", ) def main( subset_size: Optional[int], @@ -161,7 +165,7 @@ def main( search_mode: str, search_threads: Optional[str], dry_run: bool, - raft_log_level: str + raft_log_level: str, ) -> None: """ Main function to run the benchmark with the provided options. @@ -208,5 +212,5 @@ def main( run_benchmark(**locals()) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/python/cuvs_bench/cuvs_bench/run/run.py b/python/cuvs_bench/cuvs_bench/run/run.py index 2908b9deaf..dbedcc183e 100644 --- a/python/cuvs_bench/cuvs_bench/run/run.py +++ b/python/cuvs_bench/cuvs_bench/run/run.py @@ -14,19 +14,20 @@ # limitations under the License. # -import importlib import itertools import os import warnings -import yaml from importlib import import_module -from typing import Optional, Dict, Any, Tuple -from .runners import cuvs_bench_cpp +from typing import Any, Dict, Optional, Tuple + +import yaml +from runners import cuvs_bench_cpp def rmm_present() -> bool: """ Check if RMM (RAPIDS Memory Manager) is present. + Returns ------- bool @@ -34,18 +35,21 @@ def rmm_present() -> bool: """ try: import rmm # noqa: F401 + return True except ImportError: return False - + def load_yaml_file(file_path: str) -> dict: """ Load a YAML file and return its contents as a dictionary. + Parameters ---------- file_path : str The path to the YAML file. + Returns ------- dict @@ -58,16 +62,19 @@ def load_yaml_file(file_path: str) -> dict: def get_dataset_configuration(dataset: str, dataset_conf_all: list) -> dict: """ Retrieve the configuration for a specific dataset. + Parameters ---------- dataset : str The name of the dataset to retrieve the configuration for. dataset_conf_all : list A list of dataset configurations. + Returns ------- dict The configuration for the specified dataset. + Raises ------ ValueError @@ -79,9 +86,12 @@ def get_dataset_configuration(dataset: str, dataset_conf_all: list) -> dict: raise ValueError("Could not find a dataset configuration") -def prepare_conf_file(dataset_conf: dict, subset_size: Optional[int], count: int, batch_size: int) -> dict: +def prepare_conf_file( + dataset_conf: dict, subset_size: Optional[int], count: int, batch_size: int +) -> dict: """ Prepare the main configuration file for the benchmark. + Parameters ---------- dataset_conf : dict @@ -92,6 +102,7 @@ def prepare_conf_file(dataset_conf: dict, subset_size: Optional[int], count: int The number of nearest neighbors to search for. batch_size : int The size of each batch for processing. + Returns ------- dict @@ -104,21 +115,27 @@ def prepare_conf_file(dataset_conf: dict, subset_size: Optional[int], count: int return conf_file -def gather_algorithm_configs(scripts_path: str, configuration: Optional[str]) -> list: +def gather_algorithm_configs( + scripts_path: str, configuration: Optional[str] +) -> list: """ Gather the list of algorithm configuration files. + Parameters ---------- scripts_path : str The path to the script directory. configuration : Optional[str] The path to the algorithm configuration directory or file. + Returns ------- list A list of paths to the algorithm configuration files. """ - algos_conf_fs = os.listdir(os.path.join(scripts_path, "../config", "algos")) + algos_conf_fs = os.listdir( + os.path.join(scripts_path, "../config", "algos") + ) algos_conf_fs = [ os.path.join(scripts_path, "../config", "algos", f) for f in algos_conf_fs @@ -137,9 +154,14 @@ def gather_algorithm_configs(scripts_path: str, configuration: Optional[str]) -> return algos_conf_fs -def load_algorithms_conf(algos_conf_fs: list, allowed_algos: Optional[list], allowed_algo_groups: Optional[tuple]) -> dict: +def load_algorithms_conf( + algos_conf_fs: list, + allowed_algos: Optional[list], + allowed_algo_groups: Optional[tuple], +) -> dict: """ Load and filter the algorithm configurations. + Parameters ---------- algos_conf_fs : list @@ -148,10 +170,12 @@ def load_algorithms_conf(algos_conf_fs: list, allowed_algos: Optional[list], all A list of allowed algorithm names to filter by. allowed_algo_groups : Optional[tuple] A tuple of allowed algorithm groups to filter by. + Returns ------- dict - A dictionary containing the loaded and filtered algorithm configurations. + A dictionary containing the loaded and filtered algorithm + configurations. """ algos_conf = {} for algo_f in algos_conf_fs: @@ -162,17 +186,34 @@ def load_algorithms_conf(algos_conf_fs: list, allowed_algos: Optional[list], all continue if allowed_algos and algo["name"] not in allowed_algos: continue - algos_conf[algo["name"]] = {"groups": algo.get("groups", {}), "constraints": algo.get("constraints", {})} + algos_conf[algo["name"]] = { + "groups": algo.get("groups", {}), + "constraints": algo.get("constraints", {}), + } if allowed_algo_groups and algo["name"] in allowed_algo_groups[0]: algos_conf[algo["name"]]["groups"].update( - {group: algo["groups"][group] for group in allowed_algo_groups[1] if group in algo["groups"]} + { + group: algo["groups"][group] + for group in allowed_algo_groups[1] + if group in algo["groups"] + } ) return algos_conf -def prepare_executables(algos_conf: dict, algos_yaml: dict, gpu_present: bool, conf_file: dict, dataset_path: str, dataset: str, count: int, batch_size: int) -> dict: +def prepare_executables( + algos_conf: dict, + algos_yaml: dict, + gpu_present: bool, + conf_file: dict, + dataset_path: str, + dataset: str, + count: int, + batch_size: int, +) -> dict: """ Prepare the list of executables to run based on the configurations. + Parameters ---------- algos_conf : dict @@ -191,19 +232,33 @@ def prepare_executables(algos_conf: dict, algos_yaml: dict, gpu_present: bool, c The number of nearest neighbors to search for. batch_size : int The size of each batch for processing. + Returns ------- dict - A dictionary of executables to run with their associated configurations. + A dictionary of executables to run with their associated + configurations. """ executables_to_run = {} for algo, algo_conf in algos_conf.items(): validate_algorithm(algos_yaml, algo, gpu_present) for group, group_conf in algo_conf["groups"].items(): - executable = find_executable(algos_yaml, algo, group, count, batch_size) + executable = find_executable( + algos_yaml, algo, group, count, batch_size + ) if executable not in executables_to_run: executables_to_run[executable] = {"index": []} - indexes = prepare_indexes(group_conf, algo, group, conf_file, algos_conf, dataset_path, dataset, count, batch_size) + indexes = prepare_indexes( + group_conf, + algo, + group, + conf_file, + algos_conf, + dataset_path, + dataset, + count, + batch_size, + ) executables_to_run[executable]["index"].extend(indexes) return executables_to_run @@ -211,6 +266,7 @@ def prepare_executables(algos_conf: dict, algos_yaml: dict, gpu_present: bool, c def validate_algorithm(algos_conf: dict, algo: str, gpu_present: bool) -> bool: """ Validate the algorithm based on the available hardware (GPU presence). + Parameters ---------- algos_conf : dict @@ -219,20 +275,27 @@ def validate_algorithm(algos_conf: dict, algo: str, gpu_present: bool) -> bool: The name of the algorithm. gpu_present : bool Whether a GPU is present. + Returns ------- bool - True if the algorithm is valid for the current hardware configuration, False otherwise. + True if the algorithm is valid for the current hardware + configuration, False otherwise. """ algos_conf_keys = set(algos_conf.keys()) if gpu_present: return algo in algos_conf_keys - return algo in algos_conf_keys and algos_conf[algo]["requires_gpu"] is False + return ( + algo in algos_conf_keys and algos_conf[algo]["requires_gpu"] is False + ) -def find_executable(algos_conf: dict, algo: str, group: str, k: int, batch_size: int) -> Tuple[str, str, Tuple[str, str]]: +def find_executable( + algos_conf: dict, algo: str, group: str, k: int, batch_size: int +) -> Tuple[str, str, Tuple[str, str]]: """ Find the executable for the given algorithm and group. + Parameters ---------- algos_conf : dict @@ -245,10 +308,12 @@ def find_executable(algos_conf: dict, algo: str, group: str, k: int, batch_size: The number of nearest neighbors to search for. batch_size : int The size of each batch for processing. + Returns ------- Tuple[str, str, Tuple[str, str]] - A tuple containing the executable name, the path to the executable, and the file name. + A tuple containing the executable name, the path to the executable, + and the file name. """ executable = algos_conf[algo]["executable"] file_name = (f"{algo},{group}", f"{algo},{group},k{k},bs{batch_size}") @@ -261,10 +326,12 @@ def find_executable(algos_conf: dict, algo: str, group: str, k: int, batch_size: def get_build_path(executable: str) -> Optional[str]: """ Get the build path for the given executable. + Parameters ---------- executable : str The name of the executable. + Returns ------- Optional[str] @@ -278,7 +345,9 @@ def get_build_path(executable: str) -> Optional[str]: build_path = os.getenv("CUVS_HOME") if build_path: - build_path = os.path.join(build_path, "cpp", "build", "release", executable) + build_path = os.path.join( + build_path, "cpp", "build", "release", executable + ) if os.path.exists(build_path): print(f"-- Using RAFT bench from repository in {build_path}.") return build_path @@ -293,9 +362,20 @@ def get_build_path(executable: str) -> Optional[str]: return None -def prepare_indexes(group_conf: dict, algo: str, group: str, conf_file: dict, algos_conf: dict, dataset_path: str, dataset: str, count: int, batch_size: int) -> list: +def prepare_indexes( + group_conf: dict, + algo: str, + group: str, + conf_file: dict, + algos_conf: dict, + dataset_path: str, + dataset: str, + count: int, + batch_size: int, +) -> list: """ Prepare the index configurations for the given algorithm and group. + Parameters ---------- group_conf : dict @@ -314,6 +394,7 @@ def prepare_indexes(group_conf: dict, algo: str, group: str, conf_file: dict, al The number of nearest neighbors to search for. batch_size : int The size of each batch for processing. + Returns ------- list @@ -323,24 +404,70 @@ def prepare_indexes(group_conf: dict, algo: str, group: str, conf_file: dict, al build_params = group_conf.get("build", {}) search_params = group_conf.get("search", {}) all_build_params = itertools.product(*build_params.values()) - search_param_names, search_param_lists = zip(*search_params.items()) if search_params else ([], []) + search_param_names, search_param_lists = ( + zip(*search_params.items()) if search_params else ([], []) + ) + param_names = list(build_params.keys()) for params in all_build_params: - index = {"algo": algo, "build_param": dict(zip(build_params.keys(), params))} + index = { + "algo": algo, + "build_param": dict(zip(build_params.keys(), params)), + } index_name = f"{algo}_{group}" if group != "base" else f"{algo}" - index_filename = index_name if len(index_name) < 128 else str(hash(index_name)) + for i in range(len(params)): + index["build_param"][param_names[i]] = params[i] + index_name += "." + f"{param_names[i]}{params[i]}" + + if not validate_constraints( + algos_conf, + algo, + "build", + index["build_param"], + None, + conf_file["dataset"].get("dims"), + count, + batch_size, + ): + continue + + index_filename = ( + index_name if len(index_name) < 128 else str(hash(index_name)) + ) index["name"] = index_name - index["file"] = os.path.join(dataset_path, dataset, "index", index_filename) + index["file"] = os.path.join( + dataset_path, dataset, "index", index_filename + ) index["search_params"] = validate_search_params( - itertools.product(*search_param_lists), search_param_names, index["build_param"], algo, group_conf, algos_conf, conf_file, count, batch_size + itertools.product(*search_param_lists), + search_param_names, + index["build_param"], + algo, + group_conf, + algos_conf, + conf_file, + count, + batch_size, ) if index["search_params"]: indexes.append(index) return indexes -def validate_search_params(all_search_params, search_param_names, build_params, algo, group_conf, algos_conf, conf_file, count, batch_size) -> list: +def validate_search_params( + all_search_params, + search_param_names, + build_params, + algo, + group_conf, + algos_conf, + conf_file, + count, + batch_size, +) -> list: """ - Validate and prepare the search parameters for the given algorithm and group. + Validate and prepare the search parameters for the given algorithm + and group. + Parameters ---------- all_search_params : itertools.product @@ -357,6 +484,7 @@ def validate_search_params(all_search_params, search_param_names, build_params, The number of nearest neighbors to search for. batch_size : int The size of each batch for processing. + Returns ------- list @@ -365,7 +493,16 @@ def validate_search_params(all_search_params, search_param_names, build_params, search_params_list = [] for search_params in all_search_params: search_dict = dict(zip(search_param_names, search_params)) - if validate_constraints(algos_conf, algo, "search", search_dict, build_params, conf_file["dataset"].get("dims"), count, batch_size): + if validate_constraints( + algos_conf, + algo, + "search", + search_dict, + build_params, + conf_file["dataset"].get("dims"), + count, + batch_size, + ): search_params_list.append(search_dict) return search_params_list @@ -378,10 +515,11 @@ def validate_constraints( build_param: dict, dims: Any, k: Optional[int], - batch_size: Optional[int] + batch_size: Optional[int], ) -> bool: """ Validate the constraints for the given algorithm and constraint type. + Parameters ---------- algos_conf : Dict[str, Any] @@ -398,25 +536,28 @@ def validate_constraints( The number of nearest neighbors to search for. batch_size : Optional[int] The size of each batch for processing. + Returns ------- bool True if the constraints are valid, False otherwise. + Raises ------ ValueError - If `dims` are needed for build constraints but not specified in the dataset configuration. + If `dims` are needed for build constraints but not specified in the + dataset configuration. """ if constraint_type in algos_conf[algo]["constraints"]: importable = algos_conf[algo]["constraints"][constraint_type] - module, func = ".".join(importable.split(".")[:-1]), importable.split(".")[-1] + module, func = ( + ".".join(importable.split(".")[:-1]), + importable.split(".")[-1], + ) validator = import_module(module) constraints_func = getattr(validator, func) if constraint_type == "build": - if "dims" not in conf_file["dataset"]: - raise ValueError("`dims` needed for build constraints but not specified in datasets.yaml") - else: - return constraints_func(param, dims) + return constraints_func(param, dims) else: return constraints_func(param, build_param, k, batch_size) return True @@ -439,10 +580,11 @@ def run_benchmark( search_mode: str, search_threads: int, dry_run: bool, - raft_log_level: int + raft_log_level: int, ) -> None: """ Runs a benchmarking process based on the provided configurations. + Parameters ---------- subset_size : int @@ -479,6 +621,7 @@ def run_benchmark( Whether to perform a dry run without actual execution. raft_log_level : int The logging level for the RAFT library. + Returns ------- None @@ -489,21 +632,45 @@ def run_benchmark( if not build and not search: build, search = True, True - dataset_conf_all = load_yaml_file(dataset_configuration or os.path.join(scripts_path, "../config/datasets", "datasets.yaml")) + dataset_conf_all = load_yaml_file( + dataset_configuration + or os.path.join(scripts_path, "../config/datasets", "datasets.yaml") + ) dataset_conf = get_dataset_configuration(dataset, dataset_conf_all) conf_file = prepare_conf_file(dataset_conf, subset_size, count, batch_size) algos_conf_fs = gather_algorithm_configs(scripts_path, configuration) allowed_algos = algorithms.split(",") if algorithms else None - allowed_algo_groups = [algo_group.split(".") for algo_group in algo_groups.split(",")] if algo_groups else None - algos_conf = load_algorithms_conf(algos_conf_fs, allowed_algos, list(zip(*allowed_algo_groups)) if allowed_algo_groups else None) + allowed_algo_groups = ( + [algo_group.split(".") for algo_group in algo_groups.split(",")] + if algo_groups + else None + ) + algos_conf = load_algorithms_conf( + algos_conf_fs, + allowed_algos, + list(zip(*allowed_algo_groups)) if allowed_algo_groups else None, + ) - executables_to_run = prepare_executables(algos_conf, load_yaml_file(os.path.join(scripts_path, "../config", "algorithms.yaml")), gpu_present, conf_file, dataset_path, dataset, count, batch_size) + executables_to_run = prepare_executables( + algos_conf, + load_yaml_file( + os.path.join(scripts_path, "../config", "algorithms.yaml") + ), + gpu_present, + conf_file, + dataset_path, + dataset, + count, + batch_size, + ) cuvs_bench_cpp( conf_file, dataset, - os.path.dirname(configuration) if configuration and os.path.isfile(configuration) else os.path.join(scripts_path, "conf", "algos"), + os.path.dirname(configuration) + if configuration and os.path.isfile(configuration) + else os.path.join(scripts_path, "conf", "algos"), executables_to_run, dataset_path, force, From 9b69460db4a7b691c67b785cc4333f538b788a7b Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 24 Sep 2024 12:57:37 -0700 Subject: [PATCH 11/18] pre-commit --- .gitignore | 3 + cpp/bench/ann/CMakeLists.txt | 52 +++------ python/cuvs/CMakeLists.txt | 22 ++-- .../cuvs/neighbors/filters/CMakeLists.txt | 3 +- python/cuvs_bench/cuvs_bench/run/runners.py | 65 +++++++---- .../cuvs_bench/cuvs_bench/tests/test_run.py | 106 +++++++++++------- 6 files changed, 142 insertions(+), 109 deletions(-) diff --git a/.gitignore b/.gitignore index fcbe0fa3a2..7a4a39caf2 100644 --- a/.gitignore +++ b/.gitignore @@ -79,3 +79,6 @@ cagra_index ivf_flat_index ivf_pq_index +# cuvs_bench +datasets/ +*.json diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 6fe23483e0..3224587e44 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -199,30 +199,19 @@ if(NOT TARGET CUVS_ANN_BENCH_ALL) endif() if(CUVS_ANN_BENCH_USE_HNSWLIB) - ConfigureAnnBench( - NAME HNSWLIB PATH src/hnswlib/hnswlib_benchmark.cpp LINKS hnswlib::hnswlib - ) + ConfigureAnnBench(NAME HNSWLIB PATH src/hnswlib/hnswlib_benchmark.cpp LINKS hnswlib::hnswlib) endif() if(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ) ConfigureAnnBench( - NAME CUVS_IVF_PQ - PATH - src/cuvs/cuvs_benchmark.cu - src/cuvs/cuvs_ivf_pq.cu - LINKS cuvs + NAME CUVS_IVF_PQ PATH src/cuvs/cuvs_benchmark.cu src/cuvs/cuvs_ivf_pq.cu LINKS cuvs ) endif() if(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT) ConfigureAnnBench( - NAME CUVS_IVF_FLAT - PATH - src/cuvs/cuvs_benchmark.cu - src/cuvs/cuvs_ivf_flat.cu - LINKS - cuvs + NAME CUVS_IVF_FLAT PATH src/cuvs/cuvs_benchmark.cu src/cuvs/cuvs_ivf_flat.cu LINKS cuvs ) endif() @@ -232,12 +221,8 @@ endif() if(CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE) ConfigureAnnBench( - NAME - CUVS_KNN_BRUTE_FORCE - PATH - $<$:src/cuvs/cuvs_brute_force_knn.cu> - LINKS - cuvs + NAME CUVS_KNN_BRUTE_FORCE PATH + $<$:src/cuvs/cuvs_brute_force_knn.cu> LINKS cuvs ) endif() @@ -258,8 +243,7 @@ endif() if(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) ConfigureAnnBench( - NAME CUVS_CAGRA_HNSWLIB PATH src/cuvs/cuvs_cagra_hnswlib.cu LINKS cuvs - hnswlib::hnswlib + NAME CUVS_CAGRA_HNSWLIB PATH src/cuvs/cuvs_cagra_hnswlib.cu LINKS cuvs hnswlib::hnswlib ) endif() @@ -267,36 +251,31 @@ message("CUVS_FAISS_TARGETS: ${CUVS_FAISS_TARGETS}") message("CUDAToolkit_LIBRARY_DIR: ${CUDAToolkit_LIBRARY_DIR}") if(CUVS_ANN_BENCH_USE_FAISS_CPU_FLAT) ConfigureAnnBench( - NAME FAISS_CPU_FLAT PATH src/faiss/faiss_cpu_benchmark.cpp LINKS - ${CUVS_FAISS_TARGETS} + NAME FAISS_CPU_FLAT PATH src/faiss/faiss_cpu_benchmark.cpp LINKS ${CUVS_FAISS_TARGETS} ) endif() if(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT) ConfigureAnnBench( - NAME FAISS_CPU_IVF_FLAT PATH src/faiss/faiss_cpu_benchmark.cpp LINKS - ${CUVS_FAISS_TARGETS} + NAME FAISS_CPU_IVF_FLAT PATH src/faiss/faiss_cpu_benchmark.cpp LINKS ${CUVS_FAISS_TARGETS} ) endif() if(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_PQ) ConfigureAnnBench( - NAME FAISS_CPU_IVF_PQ PATH src/faiss/faiss_cpu_benchmark.cpp LINKS - ${CUVS_FAISS_TARGETS} + NAME FAISS_CPU_IVF_PQ PATH src/faiss/faiss_cpu_benchmark.cpp LINKS ${CUVS_FAISS_TARGETS} ) endif() if(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT AND CUVS_FAISS_ENABLE_GPU) ConfigureAnnBench( - NAME FAISS_GPU_IVF_FLAT PATH src/faiss/faiss_gpu_benchmark.cu LINKS - ${CUVS_FAISS_TARGETS} + NAME FAISS_GPU_IVF_FLAT PATH src/faiss/faiss_gpu_benchmark.cu LINKS ${CUVS_FAISS_TARGETS} ) endif() if(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_PQ AND CUVS_FAISS_ENABLE_GPU) ConfigureAnnBench( - NAME FAISS_GPU_IVF_PQ PATH src/faiss/faiss_gpu_benchmark.cu LINKS - ${CUVS_FAISS_TARGETS} + NAME FAISS_GPU_IVF_PQ PATH src/faiss/faiss_gpu_benchmark.cu LINKS ${CUVS_FAISS_TARGETS} ) endif() @@ -322,13 +301,8 @@ if(CUVS_ANN_BENCH_SINGLE_EXE) target_link_libraries( ANN_BENCH - PRIVATE raft::raft - nlohmann_json::nlohmann_json - benchmark::benchmark - dl - fmt::fmt-header-only - spdlog::spdlog_header_only - $<$:CUDA::nvtx3> + PRIVATE raft::raft nlohmann_json::nlohmann_json benchmark::benchmark dl fmt::fmt-header-only + spdlog::spdlog_header_only $<$:CUDA::nvtx3> ) set_target_properties( ANN_BENCH diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt index 7d2f8dcf90..feb3bd58c4 100644 --- a/python/cuvs/CMakeLists.txt +++ b/python/cuvs/CMakeLists.txt @@ -83,14 +83,22 @@ if(NOT cuvs_FOUND) if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS) set(rpaths - "$ORIGIN/../nvidia/cublas/lib" - "$ORIGIN/../nvidia/curand/lib" - "$ORIGIN/../nvidia/cusolver/lib" - "$ORIGIN/../nvidia/cusparse/lib" - "$ORIGIN/../nvidia/nvjitlink/lib" + "$ORIGIN/../nvidia/cublas/lib" + "$ORIGIN/../nvidia/curand/lib" + "$ORIGIN/../nvidia/cusolver/lib" + "$ORIGIN/../nvidia/cusparse/lib" + "$ORIGIN/../nvidia/nvjitlink/lib" + ) + set_property( + TARGET cuvs + PROPERTY INSTALL_RPATH ${rpaths} + APPEND + ) + set_property( + TARGET cuvs_c + PROPERTY INSTALL_RPATH ${rpaths} + APPEND ) - set_property(TARGET cuvs PROPERTY INSTALL_RPATH ${rpaths} APPEND) - set_property(TARGET cuvs_c PROPERTY INSTALL_RPATH ${rpaths} APPEND) endif() set(cython_lib_dir cuvs) diff --git a/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt b/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt index 8f281d1c84..c90615feb4 100644 --- a/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt @@ -20,6 +20,5 @@ set(linked_libraries cuvs::cuvs cuvs::c_api) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX - neighbors_prefilter_ + LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_prefilter_ ) diff --git a/python/cuvs_bench/cuvs_bench/run/runners.py b/python/cuvs_bench/cuvs_bench/run/runners.py index 54d32c77d2..5a540d2e5e 100644 --- a/python/cuvs_bench/cuvs_bench/run/runners.py +++ b/python/cuvs_bench/cuvs_bench/run/runners.py @@ -14,19 +14,20 @@ # limitations under the License. # -import os import json +import os import subprocess import uuid - -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Tuple def cuvs_bench_cpp( conf_file: Dict, conf_filename: str, conf_filedir: str, - executables_to_run: Dict[Tuple[str, str, Tuple[str, str]], Dict[str, List[Dict]]], + executables_to_run: Dict[ + Tuple[str, str, Tuple[str, str]], Dict[str, List[Dict]] + ], dataset_path: str, force: bool, build: bool, @@ -36,7 +37,7 @@ def cuvs_bench_cpp( batch_size: int, search_threads: Optional[int], mode: str = "throughput", - raft_log_level: str = "info" + raft_log_level: str = "info", ) -> None: """ Run the CUVS benchmarking tool with the provided configuration. @@ -49,7 +50,8 @@ def cuvs_bench_cpp( The name of the configuration file. conf_filedir : str The directory of the configuration file. - executables_to_run : Dict[Tuple[str, str, Tuple[str, str]], Dict[str, List[Dict]]] + executables_to_run : Dict[Tuple[str, str, Tuple[str, str]], + Dict[str, List[Dict]]] Dictionary of executables to run and their configurations. dataset_path : str The path to the dataset. @@ -68,7 +70,8 @@ def cuvs_bench_cpp( search_threads : Optional[int] The number of threads to use for searching. mode : str, optional - The mode of search to perform ('latency' or 'throughput'), by default 'throughput'. + The mode of search to perform ('latency' or 'throughput'), + by default 'throughput'. raft_log_level : str, optional The logging level for the RAFT library, by default 'info'. @@ -76,19 +79,29 @@ def cuvs_bench_cpp( ------- None """ - for executable, ann_executable_path, output_filename in executables_to_run.keys(): + for ( + executable, + ann_executable_path, + output_filename, + ) in executables_to_run.keys(): # Need to write temporary configuration - temp_conf_filename = f"{conf_filename}_{output_filename[1]}_{uuid.uuid1()}.json" + temp_conf_filename = ( + f"{conf_filename}_{output_filename[1]}_{uuid.uuid1()}.json" + ) with open(temp_conf_filename, "w") as f: temp_conf = { "dataset": conf_file["dataset"], "search_basic_param": conf_file["search_basic_param"], - "index": executables_to_run[(executable, ann_executable_path, output_filename)]["index"] + "index": executables_to_run[ + (executable, ann_executable_path, output_filename) + ]["index"], } json_str = json.dumps(temp_conf, indent=2) f.write(json_str) - legacy_result_folder = os.path.join(dataset_path, conf_file["dataset"]["name"], "result") + legacy_result_folder = os.path.join( + dataset_path, conf_file["dataset"]["name"], "result" + ) os.makedirs(legacy_result_folder, exist_ok=True) if build: @@ -96,25 +109,31 @@ def cuvs_bench_cpp( os.makedirs(build_folder, exist_ok=True) build_file = f"{output_filename[0]}.json" temp_build_file = f"{build_file}.lock" + benchmark_out = os.path.join(build_folder, temp_build_file) cmd = [ ann_executable_path, "--build", f"--data_prefix={dataset_path}", "--benchmark_out_format=json", "--benchmark_counters_tabular=true", - f"--benchmark_out={os.path.join(build_folder, temp_build_file)}", - f"--raft_log_level={parse_log_level(raft_log_level)}" + f"--benchmark_out={os.path.join(benchmark_out)}", + f"--raft_log_level={parse_log_level(raft_log_level)}", ] if force: cmd.append("--force") cmd.append(temp_conf_filename) if dry_run: - print(f"Benchmark command for {output_filename[0]}:\n{' '.join(cmd)}\n") + print( + f"Benchmark command for {output_filename[0]}:\n" + f"{' '.join(cmd)}\n" + ) else: try: subprocess.run(cmd, check=True) - merge_build_files(build_folder, build_file, temp_build_file) + merge_build_files( + build_folder, build_file, temp_build_file + ) except Exception as e: print(f"Error occurred running benchmark: {e}") finally: @@ -137,7 +156,7 @@ def cuvs_bench_cpp( "--benchmark_out_format=json", f"--mode={mode}", f"--benchmark_out={os.path.join(search_folder, search_file)}", - f"--raft_log_level={parse_log_level(raft_log_level)}" + f"--raft_log_level={parse_log_level(raft_log_level)}", ] if force: cmd.append("--force") @@ -146,7 +165,10 @@ def cuvs_bench_cpp( cmd.append(temp_conf_filename) if dry_run: - print(f"Benchmark command for {output_filename[1]}:\n{' '.join(cmd)}\n") + print( + f"Benchmark command for {output_filename[1]}:\n" + f"{' '.join(cmd)}\n" + ) else: try: subprocess.run(cmd, check=True) @@ -165,6 +187,7 @@ def cuvs_bench_cpp( "trace": 5, } + def parse_log_level(level_str: str) -> int: """ Parse the log level from string to integer. @@ -189,7 +212,9 @@ def parse_log_level(level_str: str) -> int: return log_levels[level_str.lower()] -def merge_build_files(build_dir: str, build_file: str, temp_build_file: str) -> None: +def merge_build_files( + build_dir: str, build_file: str, temp_build_file: str +) -> None: """ Merge temporary build files into the main build file. @@ -221,7 +246,9 @@ def merge_build_files(build_dir: str, build_file: str, temp_build_file: str) -> with open(build_json_path, "r") as f: build_dict = json.load(f) except Exception as e: - print(f"Error loading existing build file: {build_json_path} ({e})") + print( + f"Error loading existing build file: {build_json_path} ({e})" + ) temp_build_dict = {} if os.path.isfile(tmp_build_json_path): diff --git a/python/cuvs_bench/cuvs_bench/tests/test_run.py b/python/cuvs_bench/cuvs_bench/tests/test_run.py index 41ed5316b5..7b7a481a07 100644 --- a/python/cuvs_bench/cuvs_bench/tests/test_run.py +++ b/python/cuvs_bench/cuvs_bench/tests/test_run.py @@ -15,23 +15,23 @@ # -import os -import pytest +import itertools +from unittest.mock import MagicMock, mock_open, patch -from unittest.mock import patch, mock_open, MagicMock +import pytest from benchmark import ( - load_yaml_file, - get_dataset_configuration, - prepare_conf_file, + find_executable, gather_algorithm_configs, + get_dataset_configuration, load_algorithms_conf, + load_yaml_file, + prepare_conf_file, prepare_executables, prepare_indexes, - validate_search_params, rmm_present, - find_executable, validate_algorithm, validate_constraints, + validate_search_params, ) @@ -72,13 +72,11 @@ def test_gather_algorithm_configs(tmpdir): result = gather_algorithm_configs(str(scripts_path), None) assert len(result) == 2 - custom_conf_dir = tmpdir.mkdir("custom_conf") custom_conf_dir.join("custom_algo.yaml").write("key: value") result = gather_algorithm_configs(str(scripts_path), str(custom_conf_dir)) assert len(result) == 3 - custom_conf_file = custom_conf_dir.join("custom_algo_file.yaml") custom_conf_file.write("key: value") result = gather_algorithm_configs(str(scripts_path), str(custom_conf_file)) @@ -96,7 +94,6 @@ def test_load_algorithms_conf(): result = load_algorithms_conf(algos_conf_fs, None, None) assert "algo1" in result - with patch("builtins.open", mock_open(read_data=yaml_content)): result = load_algorithms_conf(algos_conf_fs, ["algo1"], None) assert "algo1" in result @@ -104,20 +101,18 @@ def test_load_algorithms_conf(): assert "algo1" not in result -@patch("benchmark.find_executable", return_value=("executable", "path", "filename")) +@patch( + "benchmark.find_executable", + return_value=("executable", "path", "filename"), +) @patch("benchmark.validate_algorithm", return_value=True) -@patch("benchmark.prepare_indexes", return_value=[{"index_key": "index_value"}]) -def test_prepare_executables(mock_prepare_indexes, mock_validate_algorithm, mock_find_executable): - algos_conf = { - "algo1": { - "groups": { - "group1": { - "build": {}, - "search": {} - } - } - } - } +@patch( + "benchmark.prepare_indexes", return_value=[{"index_key": "index_value"}] +) +def test_prepare_executables( + mock_prepare_indexes, mock_validate_algorithm, mock_find_executable +): + algos_conf = {"algo1": {"groups": {"group1": {"build": {}, "search": {}}}}} algos_yaml = {"algo1": {}} gpu_present = True conf_file = {} @@ -125,18 +120,33 @@ def test_prepare_executables(mock_prepare_indexes, mock_validate_algorithm, mock dataset = "dataset" count = 10 batch_size = 128 - result = prepare_executables(algos_conf, algos_yaml, gpu_present, conf_file, dataset_path, dataset, count, batch_size) + result = prepare_executables( + algos_conf, + algos_yaml, + gpu_present, + conf_file, + dataset_path, + dataset, + count, + batch_size, + ) assert "executable" in result assert len(result["executable"]["index"]) == 1 def test_prepare_indexes(): - group_conf = { - "build": {"param1": [1, 2]}, - "search": {"param2": [3, 4]} - } + group_conf = {"build": {"param1": [1, 2]}, "search": {"param2": [3, 4]}} conf_file = {"dataset": {"dims": 128}} - result = prepare_indexes(group_conf, "algo", "group", conf_file, "dataset_path", "dataset", 10, 128) + result = prepare_indexes( + group_conf, + "algo", + "group", + conf_file, + "dataset_path", + "dataset", + 10, + 128, + ) assert len(result) == 2 assert "param1" in result[0]["build_param"] @@ -146,7 +156,15 @@ def test_validate_search_params(): search_param_names = ["param1", "param2"] group_conf = {} conf_file = {"dataset": {"dims": 128}} - result = validate_search_params(all_search_params, search_param_names, "algo", group_conf, conf_file, 10, 128) + result = validate_search_params( + all_search_params, + search_param_names, + "algo", + group_conf, + conf_file, + 10, + 128, + ) assert len(result) == 4 @@ -161,7 +179,11 @@ def test_rmm_present(): def test_find_executable(mock_get_build_path): algos_conf = {"algo1": {"executable": "executable1"}} result = find_executable(algos_conf, "algo1", "group1", 10, 128) - assert result == ("executable1", "build_path", ("algo1,group1", "algo1,group1,k10,bs128")) + assert result == ( + "executable1", + "build_path", + ("algo1,group1", "algo1,group1,k10,bs128"), + ) mock_get_build_path.return_value = None with pytest.raises(FileNotFoundError): find_executable(algos_conf, "algo1", "group1", 10, 128) @@ -184,22 +206,22 @@ def test_validate_constraints(mock_import_module): mock_import_module.return_value = mock_validator mock_validator.constraint_func.return_value = True algos_conf = { - "algo1": { - "constraints": { - "build": "module.constraint_func" - } - } + "algo1": {"constraints": {"build": "module.constraint_func"}} } - result = validate_constraints(algos_conf, "algo1", "build", {"param1": "value1"}, 128, None, None) + result = validate_constraints( + algos_conf, "algo1", "build", {"param1": "value1"}, 128, None, None + ) assert result is True - algos_conf = {"algo1": {"constraints": {}}} - result = validate_constraints(algos_conf, "algo1", "build", {"param1": "value1"}, 128, None, None) + result = validate_constraints( + algos_conf, "algo1", "build", {"param1": "value1"}, 128, None, None + ) assert result is True - mock_validator.constraint_func.return_value = False algos_conf["algo1"]["constraints"]["build"] = "module.constraint_func" - result = validate_constraints(algos_conf, "algo1", "build", {"param1": "value1"}, 128, None, None) + result = validate_constraints( + algos_conf, "algo1", "build", {"param1": "value1"}, 128, None, None + ) assert result is False From 2b4cf8c1471d515e291025245fa5e585cba6917b Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 25 Sep 2024 14:05:03 -0700 Subject: [PATCH 12/18] remove bench-ann build from cpp conda recipe --- conda/recipes/libcuvs/build_libcuvs_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/recipes/libcuvs/build_libcuvs_tests.sh b/conda/recipes/libcuvs/build_libcuvs_tests.sh index 5d77ae2d1b..fb116a6920 100644 --- a/conda/recipes/libcuvs/build_libcuvs_tests.sh +++ b/conda/recipes/libcuvs/build_libcuvs_tests.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash # Copyright (c) 2022-2024, NVIDIA CORPORATION. -./build.sh tests bench-ann --allgpuarch --no-nvtx --build-metrics=tests_bench --incl-cache-stats +./build.sh tests --allgpuarch --no-nvtx --build-metrics=tests_bench --incl-cache-stats cmake --install cpp/build --component testing From 611f1904b061e99ff25bb4f27353184747c3c37b Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 25 Sep 2024 15:52:03 -0700 Subject: [PATCH 13/18] address review --- build.sh | 4 ++-- cpp/CMakeLists.txt | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/build.sh b/build.sh index e70f26f32a..9c9daddb3e 100755 --- a/build.sh +++ b/build.sh @@ -275,7 +275,7 @@ if hasArg tests || (( ${NUMARGS} == 0 )); then fi if hasArg bench-ann || (( ${NUMARGS} == 0 )); then - BUILD_ANN_BENCH=ON + BUILD_CUVS_BENCH=ON CMAKE_TARGET="${CMAKE_TARGET};${ANN_BENCH_TARGETS}" fi @@ -351,7 +351,7 @@ if (( ${NUMARGS} == 0 )) || hasArg libcuvs || hasArg docs || hasArg tests || has -DDISABLE_DEPRECATION_WARNINGS=${DISABLE_DEPRECATION_WARNINGS} \ -DBUILD_TESTS=${BUILD_TESTS} \ -DBUILD_C_TESTS=${BUILD_TESTS} \ - -DBUILD_ANN_BENCH=${BUILD_ANN_BENCH} \ + -DBUILD_CUVS_BENCH=${BUILD_CUVS_BENCH} \ -DBUILD_CPU_ONLY=${BUILD_CPU_ONLY} \ -DCMAKE_MESSAGE_LOG_LEVEL=${CMAKE_LOG_LEVEL} \ ${CACHE_ARGS} \ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d8d554648b..b72d7f1658 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -55,7 +55,7 @@ option(BUILD_SHARED_LIBS "Build cuvs shared libraries" ON) option(BUILD_TESTS "Build cuvs unit-tests" ON) option(BUILD_C_LIBRARY "Build cuVS C API library" OFF) option(BUILD_C_TESTS "Build cuVS C API tests" OFF) -option(BUILD_ANN_BENCH "Build cuVS ann benchmarks" OFF) +option(BUILD_CUVS_BENCH "Build cuVS ann benchmarks" OFF) option(BUILD_CAGRA_HNSWLIB "Build CAGRA+hnswlib interface" ON) option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF) option(CUDA_ENABLE_LINEINFO @@ -96,7 +96,7 @@ include(CMakeDependentOption) message(VERBOSE "cuVS: Build cuVS unit-tests: ${BUILD_TESTS}") message(VERBOSE "cuVS: Build CPU only components: ${BUILD_CPU_ONLY}") -message(VERBOSE "cuVS: Build ANN benchmarks: ${BUILD_ANN_BENCH}") +message(VERBOSE "cuVS: Build ANN benchmarks: ${BUILD_CUVS_BENCH}") message(VERBOSE "cuVS: Enable detection of conda environment for dependencies: ${DETECT_CONDA_ENV}") message(VERBOSE "cuVS: Disable depreaction warnings " ${DISABLE_DEPRECATION_WARNINGS}) message(VERBOSE "cuVS: Disable OpenMP: ${DISABLE_OPENMP}") @@ -188,7 +188,7 @@ endif() include(cmake/thirdparty/get_cutlass.cmake) -if(BUILD_ANN_BENCH) +if(BUILD_CUVS_BENCH) include(${rapids-cmake-dir}/cpm/gbench.cmake) rapids_cpm_gbench(BUILD_STATIC) endif() @@ -651,6 +651,6 @@ endif() # ################################################################################################## # * build ann benchmark executable ----------------------------------------------- -if(BUILD_ANN_BENCH) +if(BUILD_CUVS_BENCH) add_subdirectory(bench/ann/) endif() From 2f868bb628fcbb02e742f15c09024792e5ba7738 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 26 Sep 2024 12:13:52 -0500 Subject: [PATCH 14/18] Manage rapids-build-backend dependencies with dependencies.yaml. --- dependencies.yaml | 7 +++++++ python/cuvs_bench/pyproject.toml | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/dependencies.yaml b/dependencies.yaml index 9450537c3b..aeea2238ea 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -116,6 +116,13 @@ files: table: build-system includes: - rapids_build_setuptools + py_rapids_build_py_cuvs_bench: + output: pyproject + pyproject_dir: python/cuvs_bench + extras: + table: tool.rapids-build-backend + key: requires + includes: [] py_run_cuvs_bench: output: pyproject pyproject_dir: python/cuvs_bench diff --git a/python/cuvs_bench/pyproject.toml b/python/cuvs_bench/pyproject.toml index 48f799743b..37236a0832 100644 --- a/python/cuvs_bench/pyproject.toml +++ b/python/cuvs_bench/pyproject.toml @@ -62,6 +62,7 @@ version = { file = "cuvs_bench/VERSION" } [tool.rapids-build-backend] build-backend = "setuptools.build_meta" -requires = [] +requires = [ +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true" From f460b3b75e6062ad1122e1cfa2827b817d139897 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 26 Sep 2024 13:04:21 -0500 Subject: [PATCH 15/18] Remove extraneous file listing. --- dependencies.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/dependencies.yaml b/dependencies.yaml index aeea2238ea..cf8aa9919e 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -38,7 +38,6 @@ files: - develop - bench - bench_python - - rapids_build_setuptools test_cpp: output: none includes: From 8a82802768bd76e69eeed8fdb4315f004a793c23 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 26 Sep 2024 13:08:31 -0500 Subject: [PATCH 16/18] Fixes discussed with Divye. --- conda/environments/bench_ann_cuda-118_arch-aarch64.yaml | 3 +++ conda/environments/bench_ann_cuda-118_arch-x86_64.yaml | 3 +++ conda/environments/bench_ann_cuda-125_arch-aarch64.yaml | 3 +++ conda/environments/bench_ann_cuda-125_arch-x86_64.yaml | 3 +++ dependencies.yaml | 8 ++++---- python/cuvs_bench/pyproject.toml | 4 ++++ 6 files changed, 20 insertions(+), 4 deletions(-) diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index 7e1014f250..73c42ca71c 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -42,5 +42,8 @@ dependencies: - pandas - pylibraft==24.10.*,>=0.0.0a0 - pyyaml +- rapids-build-backend>=0.3.0,<0.4.0.dev0 +- setuptools - sysroot_linux-aarch64==2.17 +- wheel name: bench_ann_cuda-118_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 120b7afca0..473e50bc6c 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -42,5 +42,8 @@ dependencies: - pandas - pylibraft==24.10.*,>=0.0.0a0 - pyyaml +- rapids-build-backend>=0.3.0,<0.4.0.dev0 +- setuptools - sysroot_linux-64==2.17 +- wheel name: bench_ann_cuda-118_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml index ac0ea97e6a..8a877c4c0e 100644 --- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -38,5 +38,8 @@ dependencies: - pandas - pylibraft==24.10.*,>=0.0.0a0 - pyyaml +- rapids-build-backend>=0.3.0,<0.4.0.dev0 +- setuptools - sysroot_linux-aarch64==2.17 +- wheel name: bench_ann_cuda-125_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml index e593c240d1..54859a77f1 100644 --- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -38,5 +38,8 @@ dependencies: - pandas - pylibraft==24.10.*,>=0.0.0a0 - pyyaml +- rapids-build-backend>=0.3.0,<0.4.0.dev0 +- setuptools - sysroot_linux-64==2.17 +- wheel name: bench_ann_cuda-125_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index cf8aa9919e..c18f53305d 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -38,6 +38,7 @@ files: - develop - bench - bench_python + - rapids_build_setuptools test_cpp: output: none includes: @@ -194,7 +195,7 @@ dependencies: rapids_build_setuptools: common: - - output_types: [requirements, pyproject] + - output_types: [conda, requirements, pyproject] packages: - &rapids_build_backend rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools @@ -476,13 +477,12 @@ dependencies: - openblas bench_python: common: - - output_types: [conda] + - output_types: [conda, pyproject, requirements] packages: + - click - matplotlib - pandas - pyyaml - - pandas - - click depends_on_librmm: common: - output_types: conda diff --git a/python/cuvs_bench/pyproject.toml b/python/cuvs_bench/pyproject.toml index 37236a0832..6a48c7a58d 100644 --- a/python/cuvs_bench/pyproject.toml +++ b/python/cuvs_bench/pyproject.toml @@ -18,6 +18,10 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ + "click", + "matplotlib", + "pandas", + "pyyaml", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", From ac81e2f4b3b573c911ac6237230b6959b19b1fac Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 26 Sep 2024 15:55:01 -0400 Subject: [PATCH 17/18] Apply suggestions from code review Co-authored-by: Bradley Dice --- build.sh | 2 +- conda/recipes/libcuvs/build_libcuvs_tests.sh | 2 +- python/cuvs_bench/cuvs_bench/run/__main__.py | 2 +- python/cuvs_bench/pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/build.sh b/build.sh index 9c9daddb3e..b463f0f0d9 100755 --- a/build.sh +++ b/build.sh @@ -421,7 +421,7 @@ fi # Build and (optionally) install the cuvs_bench Python package if (( ${NUMARGS} == 0 )) || hasArg bench-ann; then - python -m pip install --no-build-isolation --no-deps --config-settings rapidsai.disable-cuda=true ${REPODIR}/python/cuvs_bench -vvv + python -m pip install --no-build-isolation --no-deps --config-settings rapidsai.disable-cuda=true ${REPODIR}/python/cuvs_bench fi # Build the cuvs Rust bindings diff --git a/conda/recipes/libcuvs/build_libcuvs_tests.sh b/conda/recipes/libcuvs/build_libcuvs_tests.sh index fb116a6920..b077dbe609 100644 --- a/conda/recipes/libcuvs/build_libcuvs_tests.sh +++ b/conda/recipes/libcuvs/build_libcuvs_tests.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash # Copyright (c) 2022-2024, NVIDIA CORPORATION. -./build.sh tests --allgpuarch --no-nvtx --build-metrics=tests_bench --incl-cache-stats +./build.sh tests --allgpuarch --no-nvtx --build-metrics=tests --incl-cache-stats cmake --install cpp/build --component testing diff --git a/python/cuvs_bench/cuvs_bench/run/__main__.py b/python/cuvs_bench/cuvs_bench/run/__main__.py index b26e1293fb..b5d99a4bfe 100644 --- a/python/cuvs_bench/cuvs_bench/run/__main__.py +++ b/python/cuvs_bench/cuvs_bench/run/__main__.py @@ -99,7 +99,7 @@ @click.option( "--algo-groups", help="Add comma separated . to run. Example usage: " - ' "--algo-groups=raft_cagra.large,hnswlib.large".', + ' "--algo-groups=cuvs_cagra.large,hnswlib.large".', ) @click.option( "-f", diff --git a/python/cuvs_bench/pyproject.toml b/python/cuvs_bench/pyproject.toml index 6a48c7a58d..41ebad1165 100644 --- a/python/cuvs_bench/pyproject.toml +++ b/python/cuvs_bench/pyproject.toml @@ -35,7 +35,7 @@ classifiers = [ ] [project.urls] -Homepage = "https://github.com/rapidsai/raft" +Homepage = "https://github.com/rapidsai/cuvs" [tool.setuptools.package-data] "*" = ["*.*", "VERSION"] From b2e5c489e3f2c2313af21a31e18a311a6b57b1b0 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 26 Sep 2024 12:57:54 -0700 Subject: [PATCH 18/18] json ignore only in root --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 7a4a39caf2..17258e3dee 100644 --- a/.gitignore +++ b/.gitignore @@ -81,4 +81,4 @@ ivf_pq_index # cuvs_bench datasets/ -*.json +/*.json \ No newline at end of file