-
Notifications
You must be signed in to change notification settings - Fork 638
feat!: support hamming distance & binary vector #3198
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cd4651e
a2af388
aa206a2
f3211b0
4f279ea
085db49
1a2893b
844fa3c
b52af6d
d951bed
6e428fc
1c9561d
4ace2d0
07e2197
bd27747
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,7 +15,7 @@ use lance_index::vector::v3::subindex::IvfSubIndex; | |
| use pprof::criterion::{Output, PProfProfiler}; | ||
|
|
||
| use lance_index::vector::{ | ||
| flat::storage::FlatStorage, | ||
| flat::storage::FlatFloatStorage, | ||
| hnsw::builder::{HnswBuildParams, HNSW}, | ||
| }; | ||
| use lance_linalg::distance::DistanceType; | ||
|
|
@@ -31,7 +31,7 @@ fn bench_hnsw(c: &mut Criterion) { | |
|
|
||
| let data = generate_random_array_with_seed::<Float32Type>(TOTAL * DIMENSION, SEED); | ||
| let fsl = FixedSizeListArray::try_new_from_values(data, DIMENSION as i32).unwrap(); | ||
| let vectors = Arc::new(FlatStorage::new(fsl.clone(), DistanceType::L2)); | ||
| let vectors = Arc::new(FlatFloatStorage::new(fsl.clone(), DistanceType::L2)); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, do we need FlatStorage to be typed? This is sad
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah... unfortunately now we have to do this.
|
||
|
|
||
| let query = fsl.value(0); | ||
| c.bench_function( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,7 +28,7 @@ use crate::{ | |
| }, | ||
| }; | ||
|
|
||
| use super::storage::{FlatStorage, FLAT_COLUMN}; | ||
| use super::storage::{FlatBinStorage, FlatFloatStorage, FLAT_COLUMN}; | ||
|
|
||
| /// A Flat index is any index that stores no metadata, and | ||
| /// during query, it simply scans over the storage and returns the top k results | ||
|
|
@@ -166,7 +166,7 @@ impl FlatQuantizer { | |
| impl Quantization for FlatQuantizer { | ||
| type BuildParams = (); | ||
| type Metadata = FlatMetadata; | ||
| type Storage = FlatStorage; | ||
| type Storage = FlatFloatStorage; | ||
|
|
||
| fn build(data: &dyn Array, distance_type: DistanceType, _: &Self::BuildParams) -> Result<Self> { | ||
| let dim = data.as_fixed_size_list().value_length(); | ||
|
|
@@ -228,3 +228,81 @@ impl TryFrom<Quantizer> for FlatQuantizer { | |
| } | ||
| } | ||
| } | ||
|
|
||
| #[derive(Debug, Clone, DeepSizeOf)] | ||
| pub struct FlatBinQuantizer { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have to add this rather than reusing |
||
| dim: usize, | ||
| distance_type: DistanceType, | ||
| } | ||
|
|
||
| impl FlatBinQuantizer { | ||
| pub fn new(dim: usize, distance_type: DistanceType) -> Self { | ||
| Self { dim, distance_type } | ||
| } | ||
| } | ||
|
|
||
| impl Quantization for FlatBinQuantizer { | ||
| type BuildParams = (); | ||
| type Metadata = FlatMetadata; | ||
| type Storage = FlatBinStorage; | ||
|
|
||
| fn build(data: &dyn Array, distance_type: DistanceType, _: &Self::BuildParams) -> Result<Self> { | ||
| let dim = data.as_fixed_size_list().value_length(); | ||
| Ok(Self::new(dim as usize, distance_type)) | ||
| } | ||
|
|
||
| fn code_dim(&self) -> usize { | ||
| self.dim | ||
| } | ||
|
|
||
| fn column(&self) -> &'static str { | ||
| FLAT_COLUMN | ||
| } | ||
|
|
||
| fn from_metadata(metadata: &Self::Metadata, distance_type: DistanceType) -> Result<Quantizer> { | ||
| Ok(Quantizer::FlatBin(Self { | ||
| dim: metadata.dim, | ||
| distance_type, | ||
| })) | ||
| } | ||
|
|
||
| fn metadata( | ||
| &self, | ||
| _: Option<crate::vector::quantizer::QuantizationMetadata>, | ||
| ) -> Result<serde_json::Value> { | ||
| let metadata = FlatMetadata { dim: self.dim }; | ||
| Ok(serde_json::to_value(metadata)?) | ||
| } | ||
|
|
||
| fn metadata_key() -> &'static str { | ||
| "flat" | ||
| } | ||
|
|
||
| fn quantization_type() -> QuantizationType { | ||
| QuantizationType::Flat | ||
| } | ||
|
|
||
| fn quantize(&self, vectors: &dyn Array) -> Result<ArrayRef> { | ||
| Ok(vectors.slice(0, vectors.len())) | ||
| } | ||
| } | ||
|
|
||
| impl From<FlatBinQuantizer> for Quantizer { | ||
| fn from(value: FlatBinQuantizer) -> Self { | ||
| Self::FlatBin(value) | ||
| } | ||
| } | ||
|
|
||
| impl TryFrom<Quantizer> for FlatBinQuantizer { | ||
| type Error = Error; | ||
|
|
||
| fn try_from(value: Quantizer) -> Result<Self> { | ||
| match value { | ||
| Quantizer::FlatBin(quantizer) => Ok(quantizer), | ||
| _ => Err(Error::invalid_input( | ||
| "quantizer is not FlatBinQuantizer", | ||
| location!(), | ||
| )), | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not relevant?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this PR introduces a breaking change so bump the version