From 7cfcb6b4e2b4ead98fae4c8f90148da4bdf2fba6 Mon Sep 17 00:00:00 2001 From: dyma solovei Date: Fri, 25 Jul 2025 16:57:55 +0200 Subject: [PATCH 1/6] refactor: replace Float[] with float[] We started off using Float[] and Float[][] because v5 did so and it was easier to reuse (read: copy-paste) some bits of code. Thing with object arrays is that they name N allocations (1 for each element) compared to 1 that a primitive array makes. That creates unnecessary work for GC and we aren't gaining anything from using boxed values. They're still useful for request parameters like distance/certainty/force, because they can be null. But vectors are not sparse and should never contain nulls in principle. --- src/it/java/io/weaviate/ConcurrentTest.java | 8 +- .../io/weaviate/integration/DataITest.java | 8 +- .../io/weaviate/integration/SearchITest.java | 6 +- .../client6/v1/api/collections/Vectors.java | 90 ++++++++++++++----- .../aggregate/AbstractAggregateClient.java | 8 +- .../collections/data/InsertManyRequest.java | 4 +- .../query/AbstractQueryClient.java | 8 +- .../v1/api/collections/query/NearVector.java | 10 +-- .../v1/internal/grpc/ByteStringUtil.java | 56 ++++++------ ...{GRPCTest.java => ByteStringUtilTest.java} | 16 ++-- .../client6/v1/internal/json/JSONTest.java | 18 ++-- 11 files changed, 143 insertions(+), 89 deletions(-) rename src/test/java/io/weaviate/client6/v1/internal/grpc/{GRPCTest.java => ByteStringUtilTest.java} (81%) diff --git a/src/it/java/io/weaviate/ConcurrentTest.java b/src/it/java/io/weaviate/ConcurrentTest.java index 2e2036d18..11f8211cb 100644 --- a/src/it/java/io/weaviate/ConcurrentTest.java +++ b/src/it/java/io/weaviate/ConcurrentTest.java @@ -8,6 +8,8 @@ import org.junit.Rule; import org.junit.rules.TestName; +import com.google.common.primitives.Floats; + /** * ConcurrentTest is the base class for integration tests, which provides * utility methods to uniqualize collections and objects created in the @@ -56,9 +58,9 @@ protected static String randomUUID() { * @param bound Value range upper bound. * @return */ - protected static Float[] randomVector(int length, float origin, float bound) { - return IntStream.range(0, length) + protected static float[] randomVector(int length, float origin, float bound) { + return Floats.toArray(IntStream.range(0, length) .mapToObj(f -> rand.nextFloat(origin, bound)) - .toArray(Float[]::new); + .toList()); } } diff --git a/src/it/java/io/weaviate/integration/DataITest.java b/src/it/java/io/weaviate/integration/DataITest.java index 827236163..712a0957d 100644 --- a/src/it/java/io/weaviate/integration/DataITest.java +++ b/src/it/java/io/weaviate/integration/DataITest.java @@ -37,7 +37,7 @@ public static void beforeAll() throws IOException { public void testCreateGetDelete() throws IOException { var artists = client.collections.use(COLLECTION); var id = randomUUID(); - Float[] vector = { 1f, 2f, 3f }; + float[] vector = { 1, 2, 3 }; artists.data.insert(Map.of("name", "john doe"), metadata -> metadata @@ -56,8 +56,8 @@ public void testCreateGetDelete() throws IOException { Assertions.assertThat(obj.metadata().uuid()) .as("object id").isEqualTo(id); - Assertions.assertThat(obj.metadata().vectors()).extracting(v -> v.getSingle(VECTOR_INDEX)) - .asInstanceOf(InstanceOfAssertFactories.array(Float[].class)).containsExactly(vector); + Assertions.assertThat(obj.metadata().vectors().getSingle(VECTOR_INDEX)) + .containsExactly(vector); Assertions.assertThat(obj.properties()) .as("has expected properties") @@ -227,7 +227,7 @@ public void testUpdate() throws IOException { var authors = client.collections.use(nsAuthors); var walter = authors.data.insert(Map.of("name", "walter scott")); - var vector = new Float[] { 1f, 2f, 3f }; + var vector = new float[] { 1, 2, 3 }; var books = client.collections.use(nsBooks); diff --git a/src/it/java/io/weaviate/integration/SearchITest.java b/src/it/java/io/weaviate/integration/SearchITest.java index 3f67240a5..1ddf588cc 100644 --- a/src/it/java/io/weaviate/integration/SearchITest.java +++ b/src/it/java/io/weaviate/integration/SearchITest.java @@ -53,7 +53,7 @@ public class SearchITest extends ConcurrentTest { /** * One of the inserted vectors which will be used as target vector for search. */ - private static Float[] searchVector; + private static float[] searchVector; @BeforeClass public static void beforeAll() throws IOException { @@ -104,8 +104,8 @@ public void testNearVector_groupBy() { * * @returns IDs of inserted objects and their corresponding vectors. */ - private static Map populateTest(int n) throws IOException { - var created = new HashMap(); + private static Map populateTest(int n) throws IOException { + var created = new HashMap(); var things = client.collections.use(COLLECTION); for (int i = 0; i < n; i++) { diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/Vectors.java b/src/main/java/io/weaviate/client6/v1/api/collections/Vectors.java index 9638bed49..3549a7b7b 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/Vectors.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/Vectors.java @@ -25,21 +25,22 @@ */ @ToString public class Vectors { + /** Elements of this map must only be {@code float[]} or {@code float[][]}. */ private final Map namedVectors; - public static Vectors of(Float[] vector) { - return new Vectors(VectorIndex.DEFAULT_VECTOR_NAME, vector); + public static Vectors of(float[] vector) { + return of(VectorIndex.DEFAULT_VECTOR_NAME, vector); } - public static Vectors of(String name, Float[] vector) { + public static Vectors of(String name, float[] vector) { return new Vectors(name, vector); } - public static Vectors of(Float[][] vector) { - return new Vectors(VectorIndex.DEFAULT_VECTOR_NAME, vector); + public static Vectors of(float[][] vector) { + return of(VectorIndex.DEFAULT_VECTOR_NAME, vector); } - public static Vectors of(String name, Float[][] vector) { + public static Vectors of(String name, float[][] vector) { return new Vectors(name, vector); } @@ -51,20 +52,30 @@ public Vectors(Builder builder) { this.namedVectors = builder.namedVectors; } - /* + /** * Create a single named vector. - * Intended to be used by factory methods, which can statically restrict - * vector's type to {@code Float[]} and {@code Float[][]}. * - * @param name Vector name. - * - * @param vector {@code Float[]} or {@code Float[][]} vector. + *

+ * Callers must ensure that vectors are either + * {@code float[]} or {@code float[][]}. * + * @param name Vector name. + * @param vector {@code float[]} or {@code float[][]} vector. */ private Vectors(String name, Object vector) { this.namedVectors = Collections.singletonMap(name, vector); } + /** + * Create a Vectors from a map. + * + *

+ * Callers must ensure that vectors are either + * {@code float[]} or {@code float[][]}. + * + * @param name Vector name. + * @param vector Map of named vectors. + */ private Vectors(Map namedVectors) { this.namedVectors = namedVectors; } @@ -72,12 +83,12 @@ private Vectors(Map namedVectors) { public static class Builder implements ObjectBuilder { private final Map namedVectors = new HashMap<>(); - public Builder vector(String name, Float[] vector) { + public Builder vector(String name, float[] vector) { this.namedVectors.put(name, vector); return this; } - public Builder vector(String name, Float[][] vector) { + public Builder vector(String name, float[][] vector) { this.namedVectors.put(name, vector); return this; } @@ -88,22 +99,55 @@ public Vectors build() { } } - public Float[] getSingle(String name) { - return (Float[]) namedVectors.get(name); + /** + * Get 1-dimensional vector by name. + * + * @returns Vector as {@code float[]} or {@code null}. + * @throws ClassCastException The underlying vector is not a {@code float[]}. + */ + public float[] getSingle(String name) { + return (float[]) namedVectors.get(name); } - public Float[] getDefaultSingle() { + /** + * Get default 1-dimensional vector. + * + * @returns Vector as {@code float[]} or {@code null}. + * @throws ClassCastException if the underlying object is not a {@code float[]}. + */ + public float[] getDefaultSingle() { return getSingle(VectorIndex.DEFAULT_VECTOR_NAME); } - public Float[][] getMulti(String name) { - return (Float[][]) namedVectors.get(name); + /** + * Get 2-dimensional vector by name. + * + * @returns Vector as {@code float[][]} or {@code null}. + * @throws ClassCastException if the underlying object is not a + * {@code float[][]}. + */ + public float[][] getMulti(String name) { + return (float[][]) namedVectors.get(name); } - public Float[][] getDefaultMulti() { + /** + * Get default 2-dimensional vector. + * + * @returns Vector as {@code float[][]} or {@code null}. + * @throws ClassCastException if the underlying object is not a + * {@code float[][]}. + */ + public float[][] getDefaultMulti() { return getMulti(VectorIndex.DEFAULT_VECTOR_NAME); } + /** + * Get all vectors. + * Each element is either a {@code float[]} or a {@code float[][]}. + * + * + * @returns Map of name-vector pairs. The returned map is immutable. + */ public Map asMap() { return Map.copyOf(namedVectors); } @@ -119,8 +163,8 @@ public TypeAdapter create(Gson gson, TypeToken type) { } final var mapAdapter = gson.getDelegateAdapter(this, new TypeToken>() { }); - final var float_1d = gson.getDelegateAdapter(this, TypeToken.get(Float[].class)); - final var float_2d = gson.getDelegateAdapter(this, TypeToken.get(Float[][].class)); + final var float_1d = gson.getDelegateAdapter(this, TypeToken.get(float[].class)); + final var float_2d = gson.getDelegateAdapter(this, TypeToken.get(float[][].class)); return (TypeAdapter) new TypeAdapter() { @Override @@ -144,6 +188,8 @@ public Vectors read(JsonReader in) throws IOException { } else { vector = float_1d.fromJsonTree(array); } + + assert (vector instanceof float[]) || (vector instanceof float[][]) : "invalid vector type"; namedVectors.put(vectorName, vector); } } diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/aggregate/AbstractAggregateClient.java b/src/main/java/io/weaviate/client6/v1/api/collections/aggregate/AbstractAggregateClient.java index 23fce1bc5..4258947bd 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/aggregate/AbstractAggregateClient.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/aggregate/AbstractAggregateClient.java @@ -72,11 +72,11 @@ public GroupedResponseT hybrid(Hybrid filter, Function> fn) { + public ResponseT nearVector(float[] vector, Function> fn) { return nearVector(NearVector.of(vector), fn); } - public ResponseT nearVector(Float[] vector, Function> nv, + public ResponseT nearVector(float[] vector, Function> nv, Function> fn) { return nearVector(NearVector.of(vector, nv), fn); } @@ -85,12 +85,12 @@ public ResponseT nearVector(NearVector filter, Function> fn, + public GroupedResponseT nearVector(float[] vector, Function> fn, GroupBy groupBy) { return nearVector(NearVector.of(vector), fn, groupBy); } - public GroupedResponseT nearVector(Float[] vector, Function> nv, + public GroupedResponseT nearVector(float[] vector, Function> nv, Function> fn, GroupBy groupBy) { return nearVector(NearVector.of(vector, nv), fn, groupBy); } diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/data/InsertManyRequest.java b/src/main/java/io/weaviate/client6/v1/api/collections/data/InsertManyRequest.java index 6c6f42748..48c41ebec 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/data/InsertManyRequest.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/data/InsertManyRequest.java @@ -101,10 +101,10 @@ public static void buildObject(WeaviateProtoBatch.BatchObject.Builder object var vector = WeaviateProtoBase.Vectors.newBuilder() .setName(entry.getKey()); - if (value instanceof Float[] single) { + if (value instanceof float[] single) { vector.setType(VectorType.VECTOR_TYPE_SINGLE_FP32); vector.setVectorBytes(ByteStringUtil.encodeVectorSingle(single)); - } else if (value instanceof Float[][] multi) { + } else if (value instanceof float[][] multi) { vector.setVectorBytes(ByteStringUtil.encodeVectorMulti(multi)); vector.setType(VectorType.VECTOR_TYPE_MULTI_FP32); } diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/query/AbstractQueryClient.java b/src/main/java/io/weaviate/client6/v1/api/collections/query/AbstractQueryClient.java index cc0017527..0db66bd6b 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/query/AbstractQueryClient.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/query/AbstractQueryClient.java @@ -110,11 +110,11 @@ public GroupedResponseT hybrid(Hybrid query, GroupBy groupBy) { // NearVector queries ------------------------------------------------------- - public ResponseT nearVector(Float[] vector) { + public ResponseT nearVector(float[] vector) { return nearVector(NearVector.of(vector)); } - public ResponseT nearVector(Float[] vector, Function> fn) { + public ResponseT nearVector(float[] vector, Function> fn) { return nearVector(NearVector.of(vector, fn)); } @@ -122,11 +122,11 @@ public ResponseT nearVector(NearVector query) { return performRequest(query); } - public GroupedResponseT nearVector(Float[] vector, GroupBy groupBy) { + public GroupedResponseT nearVector(float[] vector, GroupBy groupBy) { return nearVector(NearVector.of(vector), groupBy); } - public GroupedResponseT nearVector(Float[] vector, Function> fn, + public GroupedResponseT nearVector(float[] vector, Function> fn, GroupBy groupBy) { return nearVector(NearVector.of(vector, fn), groupBy); } diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/query/NearVector.java b/src/main/java/io/weaviate/client6/v1/api/collections/query/NearVector.java index 266e9ddbf..303729879 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/query/NearVector.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/query/NearVector.java @@ -10,14 +10,14 @@ import io.weaviate.client6.v1.internal.grpc.protocol.WeaviateProtoBaseSearch; import io.weaviate.client6.v1.internal.grpc.protocol.WeaviateProtoSearchGet; -public record NearVector(Float[] vector, Float distance, Float certainty, BaseQueryOptions common) +public record NearVector(float[] vector, Float distance, Float certainty, BaseQueryOptions common) implements QueryOperator, AggregateObjectFilter { - public static final NearVector of(Float[] vector) { + public static final NearVector of(float[] vector) { return of(vector, ObjectBuilder.identity()); } - public static final NearVector of(Float[] vector, Function> fn) { + public static final NearVector of(float[] vector, Function> fn) { return fn.apply(new Builder(vector)).build(); } @@ -27,9 +27,9 @@ public NearVector(Builder builder) { public static class Builder extends BaseVectorSearchBuilder { // Required query parameters. - private final Float[] vector; + private final float[] vector; - public Builder(Float[] vector) { + public Builder(float[] vector) { this.vector = vector; } diff --git a/src/main/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtil.java b/src/main/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtil.java index c4dbd7785..9b9a207dd 100644 --- a/src/main/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtil.java +++ b/src/main/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtil.java @@ -6,8 +6,6 @@ import java.util.Arrays; import java.util.UUID; -import org.apache.commons.lang3.ArrayUtils; - import com.google.protobuf.ByteString; public class ByteStringUtil { @@ -21,32 +19,25 @@ public static UUID decodeUuid(ByteString bs) { return new UUID(most, least); } - /** Encode Float[] to ByteString. */ - public static ByteString encodeVectorSingle(Float[] vector) { + /** Encode float[] to ByteString. */ + public static ByteString encodeVectorSingle(float[] vector) { if (vector == null || vector.length == 0) { return ByteString.EMPTY; } ByteBuffer buffer = ByteBuffer.allocate(vector.length * Float.BYTES).order(BYTE_ORDER); - Arrays.stream(vector).forEach(buffer::putFloat); - return ByteString.copyFrom(buffer.array()); - } - - /** Encode float[] to ByteString. */ - public static ByteString encodeVectorSingle(float[] vector) { - ByteBuffer buffer = ByteBuffer.allocate(vector.length * Float.BYTES).order(BYTE_ORDER); - for (float f : vector) { + for (final var f : vector) { buffer.putFloat(f); } return ByteString.copyFrom(buffer.array()); } /** - * Encode Float[][] to ByteString. + * Encode float[][] to ByteString. *

* The first 2 bytes of the resulting ByteString encode the number of dimensions * (uint16 / short) followed by concatenated vectors (4 bytes per element). */ - public static ByteString encodeVectorMulti(Float[][] vectors) { + public static ByteString encodeVectorMulti(float[][] vectors) { if (vectors == null || vectors.length == 0 || vectors[0].length == 0) { return ByteString.EMPTY; } @@ -57,28 +48,39 @@ public static ByteString encodeVectorMulti(Float[][] vectors) { /* concatenated elements */ (n * dimensions * Float.BYTES); ByteBuffer buffer = ByteBuffer.allocate(capacity).order(BYTE_ORDER) .putShort(dimensions); - Arrays.stream(vectors).forEach(v -> Arrays.stream(v).forEach(buffer::putFloat)); + Arrays.stream(vectors).forEach(vector -> { + for (final var f : vector) { + buffer.putFloat(f); + } + }); return ByteString.copyFrom(buffer.array()); } /** - * Decode ByteString into a Float[]. ByteString size must be a multiple of - * {@link Float#BYTES}, throws {@link IllegalArgumentException} otherwise. + * Decode ByteString to float[]. + * + * @throws IllegalArgumentException if ByteString size is not + * a multiple of {@link Float#BYTES}. */ - public static Float[] decodeVectorSingle(ByteString bs) { + public static float[] decodeVectorSingle(ByteString bs) { if (bs.size() % Float.BYTES != 0) { throw new IllegalArgumentException( - "byte string size not a multiple of " + String.valueOf(Float.BYTES) + " (Float.BYTES)"); + "ByteString is size " + bs.size() + ", not a multiple of " + String.valueOf(Float.BYTES) + " (Float.BYTES)"); } float[] vector = new float[bs.size() / Float.BYTES]; bs.asReadOnlyByteBuffer().order(BYTE_ORDER).asFloatBuffer().get(vector); - return ArrayUtils.toObject(vector); + return vector; } - /** Decode ByteString to Float[][]. */ - public static Float[][] decodeVectorMulti(ByteString bs) { + /** + * Decode ByteString to float[][]. + * + * @throws IllegalArgumentException if ByteString size is not + * a multiple of {@link Float#BYTES}. + */ + public static float[][] decodeVectorMulti(ByteString bs) { if (bs == null || bs.size() == 0) { - return new Float[0][0]; + return new float[0][0]; } ByteBuffer buf = bs.asReadOnlyByteBuffer().order(BYTE_ORDER); @@ -86,16 +88,20 @@ public static Float[][] decodeVectorMulti(ByteString bs) { // Dimensions are encoded in the first 2 bytes. short dimensions = buf.getShort(); // advances current position + // TODO: throw IllegalArgumentException if fbuf.remaining not a multile of + // Float.BYTES FloatBuffer fbuf = buf.asFloatBuffer(); int n = fbuf.remaining() / dimensions; // fbuf size is buf / Float.BYTES // Reading from buffer advances current position, // so we always read from offset=0. - Float[][] vectors = new Float[n][dimensions]; + float[][] vectors = new float[n][dimensions]; for (int i = 0; i < n; i++) { float[] v = new float[dimensions]; + // TODO: use pre-allocated array rather than creating a new one + // fbuf.get(vectors[i], 0, dimensions); fbuf.get(v, 0, dimensions); - vectors[i] = ArrayUtils.toObject(v); + vectors[i] = v; } return vectors; } diff --git a/src/test/java/io/weaviate/client6/v1/internal/grpc/GRPCTest.java b/src/test/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtilTest.java similarity index 81% rename from src/test/java/io/weaviate/client6/v1/internal/grpc/GRPCTest.java rename to src/test/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtilTest.java index 1bc5d76a4..44f7a1c61 100644 --- a/src/test/java/io/weaviate/client6/v1/internal/grpc/GRPCTest.java +++ b/src/test/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtilTest.java @@ -14,10 +14,10 @@ * For this tests purposes the distinction is immaterial, as "want" arrays * are "golden values" meant to be a readable respresentation for the test. */ -public class GRPCTest { +public class ByteStringUtilTest { @Test public void test_encodeVector_1d() { - Float[] vector = { 1f, 2f, 3f }; + float[] vector = { 1f, 2f, 3f }; byte[] want = { 0, 0, -128, 63, 0, 0, 0, 64, 0, 0, 64, 64 }; byte[] got = ByteStringUtil.encodeVectorSingle(vector).toByteArray(); assertArrayEquals(want, got); @@ -26,14 +26,14 @@ public void test_encodeVector_1d() { @Test public void test_decodeVector_1d() { byte[] bytes = { 0, 0, -128, 63, 0, 0, 0, 64, 0, 0, 64, 64 }; - Float[] want = { 1f, 2f, 3f }; - Float[] got = ByteStringUtil.decodeVectorSingle(ByteString.copyFrom(bytes)); - assertArrayEquals(want, got); + float[] want = { 1f, 2f, 3f }; + float[] got = ByteStringUtil.decodeVectorSingle(ByteString.copyFrom(bytes)); + assertArrayEquals(want, got, 0); } @Test public void test_encodeVector_2d() { - Float[][] vector = { { 1f, 2f, 3f }, { 4f, 5f, 6f } }; + float[][] vector = { { 1f, 2f, 3f }, { 4f, 5f, 6f } }; byte[] want = { 3, 0, 0, 0, -128, 63, 0, 0, 0, 64, 0, 0, 64, 64, 0, 0, -128, 64, 0, 0, -96, 64, 0, 0, -64, 64 }; byte[] got = ByteStringUtil.encodeVectorMulti(vector).toByteArray(); assertArrayEquals(want, got); @@ -42,8 +42,8 @@ public void test_encodeVector_2d() { @Test public void test_decodeVector_2d() { byte[] bytes = { 3, 0, 0, 0, -128, 63, 0, 0, 0, 64, 0, 0, 64, 64, 0, 0, -128, 64, 0, 0, -96, 64, 0, 0, -64, 64 }; - Float[][] want = { { 1f, 2f, 3f }, { 4f, 5f, 6f } }; - Float[][] got = ByteStringUtil.decodeVectorMulti(ByteString.copyFrom(bytes)); + float[][] want = { { 1f, 2f, 3f }, { 4f, 5f, 6f } }; + float[][] got = ByteStringUtil.decodeVectorMulti(ByteString.copyFrom(bytes)); assertArrayEquals(want, got); } diff --git a/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java b/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java index 82a205ae9..b931db440 100644 --- a/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java +++ b/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java @@ -188,33 +188,33 @@ public static Object[][] testCases() { // Vectors.CustomTypeAdapterFactory { Vectors.class, - Vectors.of(new Float[] { 1f, 2f }), + Vectors.of(new float[] { 1f, 2f }), "{\"default\": [1.0, 2.0]}", (CustomAssert) JSONTest::compareVectors, }, { Vectors.class, - Vectors.of(new Float[][] { { 1f, 2f }, { 3f, 4f } }), + Vectors.of(new float[][] { { 1f, 2f }, { 3f, 4f } }), "{\"default\": [[1.0, 2.0], [3.0, 4.0]]}", (CustomAssert) JSONTest::compareVectors, }, { Vectors.class, - Vectors.of("custom", new Float[] { 1f, 2f }), + Vectors.of("custom", new float[] { 1f, 2f }), "{\"custom\": [1.0, 2.0]}", (CustomAssert) JSONTest::compareVectors, }, { Vectors.class, - Vectors.of("custom", new Float[][] { { 1f, 2f }, { 3f, 4f } }), + Vectors.of("custom", new float[][] { { 1f, 2f }, { 3f, 4f } }), "{\"custom\": [[1.0, 2.0], [3.0, 4.0]]}", (CustomAssert) JSONTest::compareVectors, }, { Vectors.class, Vectors.of(named -> named - .vector("1d", new Float[] { 1f, 2f }) - .vector("2d", new Float[][] { { 1f, 2f }, { 3f, 4f } })), + .vector("1d", new float[] { 1f, 2f }) + .vector("2d", new float[][] { { 1f, 2f }, { 3f, 4f } })), "{\"1d\": [1.0, 2.0], \"2d\": [[1.0, 2.0], [3.0, 4.0]]}", (CustomAssert) JSONTest::compareVectors, }, @@ -382,13 +382,13 @@ private static void assertEqualJson(String want, String got) { /** * Custom assert function that uses deep array equality - * to correctly compare Float[] and Float[][] nested in the object. + * to correctly compare float[] and float[][] nested in the object. */ private static void compareVectors(Object got, Object want) { Assertions.assertThat(got) .usingRecursiveComparison() - .withEqualsForType(Arrays::equals, Float[].class) - .withEqualsForType(Arrays::deepEquals, Float[][].class) + .withEqualsForType(Arrays::equals, float[].class) + .withEqualsForType(Arrays::deepEquals, float[][].class) .isEqualTo(want); } From 56b525ea647e871d1c393abe0913d2c3b6db53b3 Mon Sep 17 00:00:00 2001 From: dyma solovei Date: Fri, 25 Jul 2025 17:00:37 +0200 Subject: [PATCH 2/6] refactor: avoid redundant array allocation --- .../weaviate/client6/v1/internal/grpc/ByteStringUtil.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/main/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtil.java b/src/main/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtil.java index 9b9a207dd..78f909bda 100644 --- a/src/main/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtil.java +++ b/src/main/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtil.java @@ -97,11 +97,7 @@ public static float[][] decodeVectorMulti(ByteString bs) { // so we always read from offset=0. float[][] vectors = new float[n][dimensions]; for (int i = 0; i < n; i++) { - float[] v = new float[dimensions]; - // TODO: use pre-allocated array rather than creating a new one - // fbuf.get(vectors[i], 0, dimensions); - fbuf.get(v, 0, dimensions); - vectors[i] = v; + fbuf.get(vectors[i], 0, dimensions); } return vectors; } From 5b0e6d669b6592d79d9aa1d2d54e800e5110df49 Mon Sep 17 00:00:00 2001 From: dyma solovei Date: Fri, 25 Jul 2025 17:45:29 +0200 Subject: [PATCH 3/6] test: add edge cases for ByteStringUtil --- .../v1/internal/grpc/ByteStringUtil.java | 39 ++++++++++++------- .../v1/internal/grpc/ByteStringUtilTest.java | 39 +++++++++++++++++++ 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/src/main/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtil.java b/src/main/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtil.java index 78f909bda..1d45bed0f 100644 --- a/src/main/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtil.java +++ b/src/main/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtil.java @@ -57,7 +57,7 @@ public static ByteString encodeVectorMulti(float[][] vectors) { } /** - * Decode ByteString to float[]. + * Decode ByteString to {@code float[]}. * * @throws IllegalArgumentException if ByteString size is not * a multiple of {@link Float#BYTES}. @@ -65,7 +65,7 @@ public static ByteString encodeVectorMulti(float[][] vectors) { public static float[] decodeVectorSingle(ByteString bs) { if (bs.size() % Float.BYTES != 0) { throw new IllegalArgumentException( - "ByteString is size " + bs.size() + ", not a multiple of " + String.valueOf(Float.BYTES) + " (Float.BYTES)"); + "ByteString size " + bs.size() + " is not a multiple of " + String.valueOf(Float.BYTES) + " (Float.BYTES)"); } float[] vector = new float[bs.size() / Float.BYTES]; bs.asReadOnlyByteBuffer().order(BYTE_ORDER).asFloatBuffer().get(vector); @@ -73,10 +73,17 @@ public static float[] decodeVectorSingle(ByteString bs) { } /** - * Decode ByteString to float[][]. + * Decode ByteString to {@code float[][]}. * - * @throws IllegalArgumentException if ByteString size is not - * a multiple of {@link Float#BYTES}. + *

+ * The expected structure of the byte string of total size N is: + *

    + *
  • [2 bytes]: dimensionality of the inner vector ({@code dim}) + *
  • [N-2 bytes]: concatenated inner vectors. N-2 must be a multiple of + * {@code Float.BYTES * dim} + *
+ * + * @throws IllegalArgumentException if ByteString is not of a valid size. */ public static float[][] decodeVectorMulti(ByteString bs) { if (bs == null || bs.size() == 0) { @@ -84,20 +91,24 @@ public static float[][] decodeVectorMulti(ByteString bs) { } ByteBuffer buf = bs.asReadOnlyByteBuffer().order(BYTE_ORDER); + short dim = buf.getShort(); // advances current position + if (dim == 0) { + return new float[0][0]; + } - // Dimensions are encoded in the first 2 bytes. - short dimensions = buf.getShort(); // advances current position - - // TODO: throw IllegalArgumentException if fbuf.remaining not a multile of - // Float.BYTES - FloatBuffer fbuf = buf.asFloatBuffer(); - int n = fbuf.remaining() / dimensions; // fbuf size is buf / Float.BYTES + FloatBuffer fbuf = buf.asFloatBuffer(); // fbuf size is buf / Float.BYTES + if (fbuf.remaining() % dim != 0) { + throw new IllegalArgumentException( + "Remaing ByteString size " + fbuf.remaining() + " is not a multiple of " + dim + + " (dim)"); + } + int n = fbuf.remaining() / dim; // Reading from buffer advances current position, // so we always read from offset=0. - float[][] vectors = new float[n][dimensions]; + float[][] vectors = new float[n][dim]; for (int i = 0; i < n; i++) { - fbuf.get(vectors[i], 0, dimensions); + fbuf.get(vectors[i], 0, dim); } return vectors; } diff --git a/src/test/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtilTest.java b/src/test/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtilTest.java index 44f7a1c61..f9c6d1f71 100644 --- a/src/test/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtilTest.java +++ b/src/test/java/io/weaviate/client6/v1/internal/grpc/ByteStringUtilTest.java @@ -54,4 +54,43 @@ public void test_decodeUuid() { String got = ByteStringUtil.decodeUuid(ByteString.copyFrom(bytes)).toString(); assertEquals(want, got); } + + @Test + public void test_decodeVector_1d_empty() { + byte[] bytes = new byte[0]; + float[] got = ByteStringUtil.decodeVectorSingle(ByteString.copyFrom(bytes)); + assertEquals(0, got.length); + } + + @Test + public void test_decodeVector_2d_empty() { + byte[] bytes = new byte[0]; + float[][] got = ByteStringUtil.decodeVectorMulti(ByteString.copyFrom(bytes)); + assertEquals(0, got.length); + } + + @Test + public void test_decodeVector_2d_dim_zero() { + byte[] bytes = new byte[] { 0, 0 }; + float[][] got = ByteStringUtil.decodeVectorMulti(ByteString.copyFrom(bytes)); + assertEquals(0, got.length); + } + + @Test(expected = IllegalArgumentException.class) + public void test_decodeVector_1d_illegal() { + byte[] bytes = new byte[Float.BYTES - 1]; // must be a multiple of Float.BYTES + ByteStringUtil.decodeVectorSingle(ByteString.copyFrom(bytes)); + } + + @Test(expected = IllegalArgumentException.class) + public void test_decodeVector_2d_illegal() { + // The first Short.BYTES is the dimensionality of each array. + // The size of the rest must be a multiple of Float.BYTES * dimensionality. + var dimensionality = 5; + byte[] bytes = new byte[Short.BYTES + (Float.BYTES * dimensionality - 1)]; + bytes[0] = 0; + bytes[1] = (byte) dimensionality; + + ByteStringUtil.decodeVectorMulti(ByteString.copyFrom(bytes)); + } } From bb7f0dd5474ce4df2af191d36cccd8586fa56336 Mon Sep 17 00:00:00 2001 From: dyma solovei Date: Fri, 25 Jul 2025 17:56:14 +0200 Subject: [PATCH 4/6] chore: replace usages of boxed primitives where possible --- .../client6/v1/api/collections/aggregate/GroupedBy.java | 8 ++++---- .../v1/api/collections/query/BaseQueryOptions.java | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/aggregate/GroupedBy.java b/src/main/java/io/weaviate/client6/v1/api/collections/aggregate/GroupedBy.java index d3db6e971..f853780c7 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/aggregate/GroupedBy.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/aggregate/GroupedBy.java @@ -13,12 +13,12 @@ public String text() { } public boolean isInteger() { - return value instanceof String; + return value instanceof Long; } - public Integer integer() { - checkPropertyType(this::isInteger, "Integer"); - return (Integer) value; + public Long integer() { + checkPropertyType(this::isInteger, "Long"); + return (Long) value; } private void checkPropertyType(Supplier check, String expected) { diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/query/BaseQueryOptions.java b/src/main/java/io/weaviate/client6/v1/api/collections/query/BaseQueryOptions.java index 3a2815864..e709ee069 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/query/BaseQueryOptions.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/query/BaseQueryOptions.java @@ -47,17 +47,17 @@ public static abstract class Builder, T extends Ob private List returnReferences = new ArrayList<>(); private List returnMetadata = new ArrayList<>(); - public final SELF limit(Integer limit) { + public final SELF limit(int limit) { this.limit = limit; return (SELF) this; } - public final SELF offset(Integer offset) { + public final SELF offset(int offset) { this.offset = offset; return (SELF) this; } - public final SELF autocut(Integer autocut) { + public final SELF autocut(int autocut) { this.autocut = autocut; return (SELF) this; } From f419efe50c88a1581ec5bf635683241469f5b57f Mon Sep 17 00:00:00 2001 From: dyma solovei Date: Fri, 25 Jul 2025 18:05:43 +0200 Subject: [PATCH 5/6] chore: fix javadoc --- src/it/java/io/weaviate/integration/SearchITest.java | 2 +- .../weaviate/client6/v1/api/collections/Vectors.java | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/it/java/io/weaviate/integration/SearchITest.java b/src/it/java/io/weaviate/integration/SearchITest.java index 1ddf588cc..480e18c55 100644 --- a/src/it/java/io/weaviate/integration/SearchITest.java +++ b/src/it/java/io/weaviate/integration/SearchITest.java @@ -102,7 +102,7 @@ public void testNearVector_groupBy() { /** * Insert 10 objects with random vectors. * - * @returns IDs of inserted objects and their corresponding vectors. + * @return IDs of inserted objects and their corresponding vectors. */ private static Map populateTest(int n) throws IOException { var created = new HashMap(); diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/Vectors.java b/src/main/java/io/weaviate/client6/v1/api/collections/Vectors.java index 3549a7b7b..5c3a6a778 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/Vectors.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/Vectors.java @@ -102,7 +102,7 @@ public Vectors build() { /** * Get 1-dimensional vector by name. * - * @returns Vector as {@code float[]} or {@code null}. + * @return Vector as {@code float[]} or {@code null}. * @throws ClassCastException The underlying vector is not a {@code float[]}. */ public float[] getSingle(String name) { @@ -112,7 +112,7 @@ public float[] getSingle(String name) { /** * Get default 1-dimensional vector. * - * @returns Vector as {@code float[]} or {@code null}. + * @return Vector as {@code float[]} or {@code null}. * @throws ClassCastException if the underlying object is not a {@code float[]}. */ public float[] getDefaultSingle() { @@ -122,7 +122,7 @@ public float[] getDefaultSingle() { /** * Get 2-dimensional vector by name. * - * @returns Vector as {@code float[][]} or {@code null}. + * @return Vector as {@code float[][]} or {@code null}. * @throws ClassCastException if the underlying object is not a * {@code float[][]}. */ @@ -133,7 +133,7 @@ public float[][] getMulti(String name) { /** * Get default 2-dimensional vector. * - * @returns Vector as {@code float[][]} or {@code null}. + * @return Vector as {@code float[][]} or {@code null}. * @throws ClassCastException if the underlying object is not a * {@code float[][]}. */ @@ -146,7 +146,7 @@ public float[][] getDefaultMulti() { * Each element is either a {@code float[]} or a {@code float[][]}. * * - * @returns Map of name-vector pairs. The returned map is immutable. + * @return Map of name-vector pairs. The returned map is immutable. */ public Map asMap() { return Map.copyOf(namedVectors); From 59cfad17444aa2df702b9835faeffdbe16a1d755 Mon Sep 17 00:00:00 2001 From: dyma solovei Date: Mon, 28 Jul 2025 14:12:45 +0200 Subject: [PATCH 6/6] test: simplify test fixture --- src/it/java/io/weaviate/ConcurrentTest.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/it/java/io/weaviate/ConcurrentTest.java b/src/it/java/io/weaviate/ConcurrentTest.java index 11f8211cb..f3a70cc37 100644 --- a/src/it/java/io/weaviate/ConcurrentTest.java +++ b/src/it/java/io/weaviate/ConcurrentTest.java @@ -2,14 +2,11 @@ import java.util.Random; import java.util.UUID; -import java.util.stream.IntStream; import org.apache.commons.lang3.RandomStringUtils; import org.junit.Rule; import org.junit.rules.TestName; -import com.google.common.primitives.Floats; - /** * ConcurrentTest is the base class for integration tests, which provides * utility methods to uniqualize collections and objects created in the @@ -59,8 +56,10 @@ protected static String randomUUID() { * @return */ protected static float[] randomVector(int length, float origin, float bound) { - return Floats.toArray(IntStream.range(0, length) - .mapToObj(f -> rand.nextFloat(origin, bound)) - .toList()); + var vector = new float[length]; + for (var i = 0; i < length; i++) { + vector[i] = rand.nextFloat(origin, bound); + } + return vector; } }