From c77d5abaf4d0696fab854f55a8ac77300a60ab9f Mon Sep 17 00:00:00 2001
From: klmckeig <kelly.l.mckeighan@intel.com>
Date: Wed, 22 Oct 2025 13:07:38 -0700
Subject: [PATCH 1/3] update

---
 src/vector.c      | 1419 ++++-----------------------------------------
 src/vectorutils.c |  526 +++++++++++++----
 src/vectorutils.h |    9 +
 3 files changed, 554 insertions(+), 1400 deletions(-)

diff --git a/src/vector.c b/src/vector.c
index 1ed287be5..c8380f93a 100644
--- a/src/vector.c
+++ b/src/vector.c
@@ -1,1309 +1,128 @@
-#include "postgres.h"
+// filepath: /data/pgvector_klmckeig/src/vector.c
 
+#include "postgres.h"
 #include <math.h>
-
-#include "bitutils.h"
-#include "bitvec.h"
-#include "catalog/pg_type.h"
-#include "common/shortest_dec.h"
 #include "fmgr.h"
-#include "halfutils.h"
-#include "halfvec.h"
-#include "hnsw.h"
-#include "ivfflat.h"
-#include "lib/stringinfo.h"
-#include "libpq/pqformat.h"
-#include "port.h"				/* for strtof() */
-#include "sparsevec.h"
-#include "utils/array.h"
-#include "utils/builtins.h"
-#include "utils/float.h"
-#include "utils/lsyscache.h"
-#include "utils/numeric.h"
 #include "vector.h"
 #include "vectorutils.h"
 
-#if PG_VERSION_NUM >= 160000
-#include "varatt.h"
-#endif
-
-#define STATE_DIMS(x) (ARR_DIMS(x)[0] - 1)
-#define CreateStateDatums(dim) palloc(sizeof(Datum) * (dim + 1))
-
-#if defined(USE_TARGET_CLONES) && !defined(__FMA__)
-#define VECTOR_TARGET_CLONES __attribute__((target_clones("default", "fma")))
-#else
-#define VECTOR_TARGET_CLONES
-#endif
-
-#if PG_VERSION_NUM >= 180000
-PG_MODULE_MAGIC_EXT(.name = "vector",.version = "0.8.1");
-#else
 PG_MODULE_MAGIC;
-#endif
 
-/*
- * Initialize index options and variables
- */
 PGDLLEXPORT void _PG_init(void);
-void
-_PG_init(void)
-{
-	VectorInit();
-	BitvecInit();
-	HalfvecInit();
-	HnswInit();
-	IvfflatInit();
-}
-
-/*
- * Ensure same dimensions
- */
-static inline void
-CheckDims(Vector * a, Vector * b)
-{
-	if (a->dim != b->dim)
-		ereport(ERROR,
-				(errcode(ERRCODE_DATA_EXCEPTION),
-				 errmsg("different vector dimensions %d and %d", a->dim, b->dim)));
-}
-
-/*
- * Ensure expected dimensions
- */
-static inline void
-CheckExpectedDim(int32 typmod, int dim)
-{
-	if (typmod != -1 && typmod != dim)
-		ereport(ERROR,
-				(errcode(ERRCODE_DATA_EXCEPTION),
-				 errmsg("expected %d dimensions, not %d", typmod, dim)));
-}
-
-/*
- * Ensure valid dimensions
- */
-static inline void
-CheckDim(int dim)
-{
-	if (dim < 1)
-		ereport(ERROR,
-				(errcode(ERRCODE_DATA_EXCEPTION),
-				 errmsg("vector must have at least 1 dimension")));
-
-	if (dim > VECTOR_MAX_DIM)
-		ereport(ERROR,
-				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg("vector cannot have more than %d dimensions", VECTOR_MAX_DIM)));
-}
-
-/*
- * Ensure finite element
- */
-static inline void
-CheckElement(float value)
-{
-	if (isnan(value))
-		ereport(ERROR,
-				(errcode(ERRCODE_DATA_EXCEPTION),
-				 errmsg("NaN not allowed in vector")));
-
-	if (isinf(value))
-		ereport(ERROR,
-				(errcode(ERRCODE_DATA_EXCEPTION),
-				 errmsg("infinite value not allowed in vector")));
-}
-
-/*
- * Allocate and initialize a new vector
- */
-Vector *
-InitVector(int dim)
-{
-	Vector	   *result;
-	int			size;
-
-	size = VECTOR_SIZE(dim);
-	result = (Vector *) palloc0(size);
-	SET_VARSIZE(result, size);
-	result->dim = dim;
-
-	return result;
-}
-
-/*
- * Check for whitespace, since array_isspace() is static
- */
-static inline bool
-vector_isspace(char ch)
-{
-	if (ch == ' ' ||
-		ch == '\t' ||
-		ch == '\n' ||
-		ch == '\r' ||
-		ch == '\v' ||
-		ch == '\f')
-		return true;
-	return false;
-}
-
-/*
- * Check state array
- */
-static float8 *
-CheckStateArray(ArrayType *statearray, const char *caller)
-{
-	if (ARR_NDIM(statearray) != 1 ||
-		ARR_DIMS(statearray)[0] < 1 ||
-		ARR_HASNULL(statearray) ||
-		ARR_ELEMTYPE(statearray) != FLOAT8OID)
-		elog(ERROR, "%s: expected state array", caller);
-	return (float8 *) ARR_DATA_PTR(statearray);
-}
-
-/*
- * Convert textual representation to internal representation
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_in);
-Datum
-vector_in(PG_FUNCTION_ARGS)
-{
-	char	   *lit = PG_GETARG_CSTRING(0);
-	int32		typmod = PG_GETARG_INT32(2);
-	float		x[VECTOR_MAX_DIM];
-	int			dim = 0;
-	char	   *pt = lit;
-	Vector	   *result;
-
-	while (vector_isspace(*pt))
-		pt++;
-
-	if (*pt != '[')
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-				 errmsg("invalid input syntax for type vector: \"%s\"", lit),
-				 errdetail("Vector contents must start with \"[\".")));
-
-	pt++;
-
-	while (vector_isspace(*pt))
-		pt++;
-
-	if (*pt == ']')
-		ereport(ERROR,
-				(errcode(ERRCODE_DATA_EXCEPTION),
-				 errmsg("vector must have at least 1 dimension")));
-
-	for (;;)
-	{
-		float		val;
-		char	   *stringEnd;
-
-		if (dim == VECTOR_MAX_DIM)
-			ereport(ERROR,
-					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-					 errmsg("vector cannot have more than %d dimensions", VECTOR_MAX_DIM)));
-
-		while (vector_isspace(*pt))
-			pt++;
-
-		/* Check for empty string like float4in */
-		if (*pt == '\0')
-			ereport(ERROR,
-					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-					 errmsg("invalid input syntax for type vector: \"%s\"", lit)));
-
-		errno = 0;
-
-		/* Use strtof like float4in to avoid a double-rounding problem */
-		/* Postgres sets LC_NUMERIC to C on startup */
-		val = strtof(pt, &stringEnd);
-
-		if (stringEnd == pt)
-			ereport(ERROR,
-					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-					 errmsg("invalid input syntax for type vector: \"%s\"", lit)));
-
-		/* Check for range error like float4in */
-		if (errno == ERANGE && isinf(val))
-			ereport(ERROR,
-					(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-					 errmsg("\"%s\" is out of range for type vector", pnstrdup(pt, stringEnd - pt))));
-
-		CheckElement(val);
-		x[dim++] = val;
-
-		pt = stringEnd;
-
-		while (vector_isspace(*pt))
-			pt++;
-
-		if (*pt == ',')
-			pt++;
-		else if (*pt == ']')
-		{
-			pt++;
-			break;
-		}
-		else
-			ereport(ERROR,
-					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-					 errmsg("invalid input syntax for type vector: \"%s\"", lit)));
-	}
-
-	/* Only whitespace is allowed after the closing brace */
-	while (vector_isspace(*pt))
-		pt++;
-
-	if (*pt != '\0')
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-				 errmsg("invalid input syntax for type vector: \"%s\"", lit),
-				 errdetail("Junk after closing right brace.")));
-
-	CheckDim(dim);
-	CheckExpectedDim(typmod, dim);
-
-	result = InitVector(dim);
-	for (int i = 0; i < dim; i++)
-		result->x[i] = x[i];
-
-	PG_RETURN_POINTER(result);
-}
-
-#define AppendChar(ptr, c) (*(ptr)++ = (c))
-#define AppendFloat(ptr, f) ((ptr) += float_to_shortest_decimal_bufn((f), (ptr)))
-
-/*
- * Convert internal representation to textual representation
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_out);
-Datum
-vector_out(PG_FUNCTION_ARGS)
-{
-	Vector	   *vector = PG_GETARG_VECTOR_P(0);
-	int			dim = vector->dim;
-	char	   *buf;
-	char	   *ptr;
-
-	/*
-	 * Need:
-	 *
-	 * dim * (FLOAT_SHORTEST_DECIMAL_LEN - 1) bytes for
-	 * float_to_shortest_decimal_bufn
-	 *
-	 * dim - 1 bytes for separator
-	 *
-	 * 3 bytes for [, ], and \0
-	 */
-	buf = (char *) palloc(FLOAT_SHORTEST_DECIMAL_LEN * dim + 2);
-	ptr = buf;
-
-	AppendChar(ptr, '[');
-
-	for (int i = 0; i < dim; i++)
-	{
-		if (i > 0)
-			AppendChar(ptr, ',');
-
-		AppendFloat(ptr, vector->x[i]);
-	}
-
-	AppendChar(ptr, ']');
-	*ptr = '\0';
-
-	PG_FREE_IF_COPY(vector, 0);
-	PG_RETURN_CSTRING(buf);
-}
-
-/*
- * Print vector - useful for debugging
- */
-void
-PrintVector(char *msg, Vector * vector)
-{
-	char	   *out = DatumGetPointer(DirectFunctionCall1(vector_out, PointerGetDatum(vector)));
-
-	elog(INFO, "%s = %s", msg, out);
-	pfree(out);
-}
-
-/*
- * Convert type modifier
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_typmod_in);
-Datum
-vector_typmod_in(PG_FUNCTION_ARGS)
-{
-	ArrayType  *ta = PG_GETARG_ARRAYTYPE_P(0);
-	int32	   *tl;
-	int			n;
-
-	tl = ArrayGetIntegerTypmods(ta, &n);
-
-	if (n != 1)
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				 errmsg("invalid type modifier")));
-
-	if (*tl < 1)
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				 errmsg("dimensions for type vector must be at least 1")));
-
-	if (*tl > VECTOR_MAX_DIM)
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				 errmsg("dimensions for type vector cannot exceed %d", VECTOR_MAX_DIM)));
-
-	PG_RETURN_INT32(*tl);
-}
-
-/*
- * Convert external binary representation to internal representation
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_recv);
-Datum
-vector_recv(PG_FUNCTION_ARGS)
-{
-	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
-	int32		typmod = PG_GETARG_INT32(2);
-	Vector	   *result;
-	int16		dim;
-	int16		unused;
-
-	dim = pq_getmsgint(buf, sizeof(int16));
-	unused = pq_getmsgint(buf, sizeof(int16));
-
-	CheckDim(dim);
-	CheckExpectedDim(typmod, dim);
-
-	if (unused != 0)
-		ereport(ERROR,
-				(errcode(ERRCODE_DATA_EXCEPTION),
-				 errmsg("expected unused to be 0, not %d", unused)));
-
-	result = InitVector(dim);
-	for (int i = 0; i < dim; i++)
-	{
-		result->x[i] = pq_getmsgfloat4(buf);
-		CheckElement(result->x[i]);
-	}
-
-	PG_RETURN_POINTER(result);
-}
-
-/*
- * Convert internal representation to the external binary representation
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_send);
-Datum
-vector_send(PG_FUNCTION_ARGS)
-{
-	Vector	   *vec = PG_GETARG_VECTOR_P(0);
-	StringInfoData buf;
-
-	pq_begintypsend(&buf);
-	pq_sendint(&buf, vec->dim, sizeof(int16));
-	pq_sendint(&buf, vec->unused, sizeof(int16));
-	for (int i = 0; i < vec->dim; i++)
-		pq_sendfloat4(&buf, vec->x[i]);
-
-	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
-}
-
-/*
- * Convert vector to vector
- * This is needed to check the type modifier
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector);
-Datum
-vector(PG_FUNCTION_ARGS)
-{
-	Vector	   *vec = PG_GETARG_VECTOR_P(0);
-	int32		typmod = PG_GETARG_INT32(1);
-
-	CheckExpectedDim(typmod, vec->dim);
-
-	PG_RETURN_POINTER(vec);
-}
-
-/*
- * Convert array to vector
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(array_to_vector);
-Datum
-array_to_vector(PG_FUNCTION_ARGS)
-{
-	ArrayType  *array = PG_GETARG_ARRAYTYPE_P(0);
-	int32		typmod = PG_GETARG_INT32(1);
-	Vector	   *result;
-	int16		typlen;
-	bool		typbyval;
-	char		typalign;
-	Datum	   *elemsp;
-	int			nelemsp;
-
-	if (ARR_NDIM(array) > 1)
-		ereport(ERROR,
-				(errcode(ERRCODE_DATA_EXCEPTION),
-				 errmsg("array must be 1-D")));
-
-	if (ARR_HASNULL(array) && array_contains_nulls(array))
-		ereport(ERROR,
-				(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
-				 errmsg("array must not contain nulls")));
-
-	get_typlenbyvalalign(ARR_ELEMTYPE(array), &typlen, &typbyval, &typalign);
-	deconstruct_array(array, ARR_ELEMTYPE(array), typlen, typbyval, typalign, &elemsp, NULL, &nelemsp);
-
-	CheckDim(nelemsp);
-	CheckExpectedDim(typmod, nelemsp);
-
-	result = InitVector(nelemsp);
-
-	if (ARR_ELEMTYPE(array) == INT4OID)
-	{
-		for (int i = 0; i < nelemsp; i++)
-			result->x[i] = DatumGetInt32(elemsp[i]);
-	}
-	else if (ARR_ELEMTYPE(array) == FLOAT8OID)
-	{
-		for (int i = 0; i < nelemsp; i++)
-			result->x[i] = DatumGetFloat8(elemsp[i]);
-	}
-	else if (ARR_ELEMTYPE(array) == FLOAT4OID)
-	{
-		for (int i = 0; i < nelemsp; i++)
-			result->x[i] = DatumGetFloat4(elemsp[i]);
-	}
-	else if (ARR_ELEMTYPE(array) == NUMERICOID)
-	{
-		for (int i = 0; i < nelemsp; i++)
-			result->x[i] = DatumGetFloat4(DirectFunctionCall1(numeric_float4, elemsp[i]));
-	}
-	else
-	{
-		ereport(ERROR,
-				(errcode(ERRCODE_DATA_EXCEPTION),
-				 errmsg("unsupported array type")));
-	}
-
-	/*
-	 * Free allocation from deconstruct_array. Do not free individual elements
-	 * when pass-by-reference since they point to original array.
-	 */
-	pfree(elemsp);
-
-	/* Check elements */
-	for (int i = 0; i < result->dim; i++)
-		CheckElement(result->x[i]);
-
-	PG_RETURN_POINTER(result);
-}
-
-/*
- * Convert vector to float4[]
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_to_float4);
-Datum
-vector_to_float4(PG_FUNCTION_ARGS)
-{
-	Vector	   *vec = PG_GETARG_VECTOR_P(0);
-	Datum	   *datums;
-	ArrayType  *result;
-
-	datums = (Datum *) palloc(sizeof(Datum) * vec->dim);
-
-	for (int i = 0; i < vec->dim; i++)
-		datums[i] = Float4GetDatum(vec->x[i]);
-
-	/* Use TYPALIGN_INT for float4 */
-	result = construct_array(datums, vec->dim, FLOAT4OID, sizeof(float4), true, TYPALIGN_INT);
-
-	pfree(datums);
-
-	PG_RETURN_POINTER(result);
-}
-
-/*
- * Convert half vector to vector
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(halfvec_to_vector);
-Datum
-halfvec_to_vector(PG_FUNCTION_ARGS)
-{
-	HalfVector *vec = PG_GETARG_HALFVEC_P(0);
-	int32		typmod = PG_GETARG_INT32(1);
-	Vector	   *result;
-
-	CheckDim(vec->dim);
-	CheckExpectedDim(typmod, vec->dim);
-
-	result = InitVector(vec->dim);
-
-	for (int i = 0; i < vec->dim; i++)
-		result->x[i] = HalfToFloat4(vec->x[i]);
-
-	PG_RETURN_POINTER(result);
-}
-
-VECTOR_TARGET_CLONES static float
-VectorL2SquaredDistance(int dim, float *ax, float *bx)
-{
-	float		distance = 0.0;
-
-	/* Auto-vectorized */
-	for (int i = 0; i < dim; i++)
-	{
-		float		diff = ax[i] - bx[i];
-
-		distance += diff * diff;
-	}
-
-	return distance;
-}
-
-/*
- * Get the L2 distance between vectors
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(l2_distance);
-Datum
-l2_distance(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-
-	CheckDims(a, b);
-
-	PG_RETURN_FLOAT8(sqrt((double) VectorL2SquaredDistance(a->dim, a->x, b->x)));
-}
-
-/*
- * Get the L2 squared distance between vectors
- * This saves a sqrt calculation
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_l2_squared_distance);
-Datum
-vector_l2_squared_distance(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-
-	CheckDims(a, b);
-
-	PG_RETURN_FLOAT8((double) VectorL2SquaredDistance(a->dim, a->x, b->x));
-}
-
-VECTOR_TARGET_CLONES static float
-VectorInnerProduct(int dim, float *ax, float *bx)
-{
-	float		distance = 0.0;
-
-	/* Auto-vectorized */
-	for (int i = 0; i < dim; i++)
-		distance += ax[i] * bx[i];
-
-	return distance;
-}
-
-/*
- * Get the inner product of two vectors
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(inner_product);
-Datum
-inner_product(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-
-	CheckDims(a, b);
-
-	PG_RETURN_FLOAT8((double) VectorInnerProduct(a->dim, a->x, b->x));
-}
-
-/*
- * Get the negative inner product of two vectors
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_negative_inner_product);
-Datum
-vector_negative_inner_product(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-
-	CheckDims(a, b);
-
-	PG_RETURN_FLOAT8((double) -VectorInnerProduct(a->dim, a->x, b->x));
-}
-
-VECTOR_TARGET_CLONES static double
-VectorCosineSimilarity(int dim, float *ax, float *bx)
-{
-	float		similarity = 0.0;
-	float		norma = 0.0;
-	float		normb = 0.0;
-
-	/* Auto-vectorized */
-	for (int i = 0; i < dim; i++)
-	{
-		similarity += ax[i] * bx[i];
-		norma += ax[i] * ax[i];
-		normb += bx[i] * bx[i];
-	}
-
-	/* Use sqrt(a * b) over sqrt(a) * sqrt(b) */
-	return (double) similarity / sqrt((double) norma * (double) normb);
-}
-
-/*
- * Get the cosine distance between two vectors
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(cosine_distance);
-Datum
-cosine_distance(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-	double		similarity;
-
-	CheckDims(a, b);
-
-	similarity = VectorCosineSimilarity(a->dim, a->x, b->x);
-
-#ifdef _MSC_VER
-	/* /fp:fast may not propagate NaN */
-	if (isnan(similarity))
-		PG_RETURN_FLOAT8(NAN);
-#endif
-
-	/* Keep in range */
-	if (similarity > 1)
-		similarity = 1.0;
-	else if (similarity < -1)
-		similarity = -1.0;
-
-	PG_RETURN_FLOAT8(1.0 - similarity);
-}
-
-/*
- * Get the distance for spherical k-means
- * Currently uses angular distance since needs to satisfy triangle inequality
- * Assumes inputs are unit vectors (skips norm)
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_spherical_distance);
-Datum
-vector_spherical_distance(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-	double		distance;
-
-	CheckDims(a, b);
-
-	distance = (double) VectorInnerProduct(a->dim, a->x, b->x);
-
-	/* Prevent NaN with acos with loss of precision */
-	if (distance > 1)
-		distance = 1;
-	else if (distance < -1)
-		distance = -1;
-
-	PG_RETURN_FLOAT8(acos(distance) / M_PI);
-}
-
-/* Does not require FMA, but keep logic simple */
-VECTOR_TARGET_CLONES static float
-VectorL1Distance(int dim, float *ax, float *bx)
-{
-	float		distance = 0.0;
-
-	/* Auto-vectorized */
-	for (int i = 0; i < dim; i++)
-		distance += fabsf(ax[i] - bx[i]);
-
-	return distance;
-}
-
-/*
- * Get the L1 distance between two vectors
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(l1_distance);
-Datum
-l1_distance(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-
-	CheckDims(a, b);
-
-	PG_RETURN_FLOAT8((double) VectorL1Distance(a->dim, a->x, b->x));
-}
-
-/*
- * Get the dimensions of a vector
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_dims);
-Datum
-vector_dims(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-
-	PG_RETURN_INT32(a->dim);
-}
-
-/*
- * Get the L2 norm of a vector
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_norm);
-Datum
-vector_norm(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	float	   *ax = a->x;
-	double		norm = 0.0;
-
-	/* Auto-vectorized */
-	for (int i = 0; i < a->dim; i++)
-		norm += (double) ax[i] * (double) ax[i];
-
-	PG_RETURN_FLOAT8(sqrt(norm));
-}
-
-/*
- * Normalize a vector with the L2 norm
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(l2_normalize);
-Datum
-l2_normalize(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	float	   *ax = a->x;
-	double		norm = 0;
-	Vector	   *result;
-	float	   *rx;
-
-	result = InitVector(a->dim);
-	rx = result->x;
-
-	/* Auto-vectorized */
-	for (int i = 0; i < a->dim; i++)
-		norm += (double) ax[i] * (double) ax[i];
-
-	norm = sqrt(norm);
-
-	/* Return zero vector for zero norm */
-	if (norm > 0)
-	{
-		for (int i = 0; i < a->dim; i++)
-			rx[i] = ax[i] / norm;
-
-		/* Check for overflow */
-		for (int i = 0; i < a->dim; i++)
-		{
-			if (isinf(rx[i]))
-				float_overflow_error();
-		}
-	}
-
-	PG_RETURN_POINTER(result);
-}
-
-/*
- * Add vectors
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_add);
-Datum
-vector_add(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-	float	   *ax = a->x;
-	float	   *bx = b->x;
-	Vector	   *result;
-	float	   *rx;
-
-	CheckDims(a, b);
-
-	result = InitVector(a->dim);
-	rx = result->x;
-
-	/* Auto-vectorized */
-	for (int i = 0, imax = a->dim; i < imax; i++)
-		rx[i] = ax[i] + bx[i];
-
-	/* Check for overflow */
-	for (int i = 0, imax = a->dim; i < imax; i++)
-	{
-		if (isinf(rx[i]))
-			float_overflow_error();
-	}
-
-	PG_RETURN_POINTER(result);
-}
-
-/*
- * Subtract vectors
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_sub);
-Datum
-vector_sub(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-	float	   *ax = a->x;
-	float	   *bx = b->x;
-	Vector	   *result;
-	float	   *rx;
-
-	CheckDims(a, b);
-
-	result = InitVector(a->dim);
-	rx = result->x;
-
-	/* Auto-vectorized */
-	for (int i = 0, imax = a->dim; i < imax; i++)
-		rx[i] = ax[i] - bx[i];
-
-	/* Check for overflow */
-	for (int i = 0, imax = a->dim; i < imax; i++)
-	{
-		if (isinf(rx[i]))
-			float_overflow_error();
-	}
-
-	PG_RETURN_POINTER(result);
-}
-
-/*
- * Multiply vectors
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_mul);
-Datum
-vector_mul(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-	float	   *ax = a->x;
-	float	   *bx = b->x;
-	Vector	   *result;
-	float	   *rx;
-
-	CheckDims(a, b);
-
-	result = InitVector(a->dim);
-	rx = result->x;
-
-	/* Auto-vectorized */
-	for (int i = 0, imax = a->dim; i < imax; i++)
-		rx[i] = ax[i] * bx[i];
-
-	/* Check for overflow and underflow */
-	for (int i = 0, imax = a->dim; i < imax; i++)
-	{
-		if (isinf(rx[i]))
-			float_overflow_error();
-
-		if (rx[i] == 0 && !(ax[i] == 0 || bx[i] == 0))
-			float_underflow_error();
-	}
-
-	PG_RETURN_POINTER(result);
-}
-
-/*
- * Concatenate vectors
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_concat);
-Datum
-vector_concat(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-	Vector	   *result;
-	int			dim = a->dim + b->dim;
-
-	CheckDim(dim);
-	result = InitVector(dim);
-
-	/* Auto-vectorized */
-	for (int i = 0, imax = a->dim; i < imax; i++)
-		result->x[i] = a->x[i];
-
-	/* Auto-vectorized */
-	for (int i = 0, imax = b->dim, start = a->dim; i < imax; i++)
-		result->x[i + start] = b->x[i];
-
-	PG_RETURN_POINTER(result);
-}
-
-/*
- * Quantize a vector
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(binary_quantize);
-Datum
-binary_quantize(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	float	   *ax = a->x;
-	VarBit	   *result = InitBitVector(a->dim);
-	unsigned char *rx = VARBITS(result);
-
-	BinaryQuantize(a->dim, ax, rx);
-
-	PG_RETURN_VARBIT_P(result);
-}
-
-/*
- * Get a subvector
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(subvector);
-Datum
-subvector(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	int32		start = PG_GETARG_INT32(1);
-	int32		count = PG_GETARG_INT32(2);
-	int32		end;
-	float	   *ax = a->x;
-	Vector	   *result;
-	int			dim;
-
-	if (count < 1)
-		ereport(ERROR,
-				(errcode(ERRCODE_DATA_EXCEPTION),
-				 errmsg("vector must have at least 1 dimension")));
-
-	/*
-	 * Check if (start + count > a->dim), avoiding integer overflow. a->dim
-	 * and count are both positive, so a->dim - count won't overflow.
-	 */
-	if (start > a->dim - count)
-		end = a->dim + 1;
-	else
-		end = start + count;
-
-	/* Indexing starts at 1, like substring */
-	if (start < 1)
-		start = 1;
-	else if (start > a->dim)
-		ereport(ERROR,
-				(errcode(ERRCODE_DATA_EXCEPTION),
-				 errmsg("vector must have at least 1 dimension")));
-
-	dim = end - start;
-	CheckDim(dim);
-	result = InitVector(dim);
-
-	for (int i = 0; i < dim; i++)
-		result->x[i] = ax[start - 1 + i];
-
-	PG_RETURN_POINTER(result);
-}
-
-/*
- * Internal helper to compare vectors
- */
-int
-vector_cmp_internal(Vector * a, Vector * b)
-{
-	int			dim = Min(a->dim, b->dim);
-
-	/* Check values before dimensions to be consistent with Postgres arrays */
-	for (int i = 0; i < dim; i++)
-	{
-		if (a->x[i] < b->x[i])
-			return -1;
-
-		if (a->x[i] > b->x[i])
-			return 1;
-	}
-
-	if (a->dim < b->dim)
-		return -1;
-
-	if (a->dim > b->dim)
-		return 1;
-
-	return 0;
-}
-
-/*
- * Less than
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_lt);
-Datum
-vector_lt(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-
-	PG_RETURN_BOOL(vector_cmp_internal(a, b) < 0);
-}
-
-/*
- * Less than or equal
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_le);
-Datum
-vector_le(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-
-	PG_RETURN_BOOL(vector_cmp_internal(a, b) <= 0);
-}
-
-/*
- * Equal
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_eq);
-Datum
-vector_eq(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-
-	PG_RETURN_BOOL(vector_cmp_internal(a, b) == 0);
-}
-
-/*
- * Not equal
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_ne);
-Datum
-vector_ne(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-
-	PG_RETURN_BOOL(vector_cmp_internal(a, b) != 0);
-}
-
-/*
- * Greater than or equal
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_ge);
-Datum
-vector_ge(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-
-	PG_RETURN_BOOL(vector_cmp_internal(a, b) >= 0);
-}
-
-/*
- * Greater than
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_gt);
-Datum
-vector_gt(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-
-	PG_RETURN_BOOL(vector_cmp_internal(a, b) > 0);
-}
-
-/*
- * Compare vectors
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_cmp);
-Datum
-vector_cmp(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	Vector	   *b = PG_GETARG_VECTOR_P(1);
-
-	PG_RETURN_INT32(vector_cmp_internal(a, b));
-}
-
-/*
- * Accumulate vectors
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_accum);
-Datum
-vector_accum(PG_FUNCTION_ARGS)
-{
-	ArrayType  *statearray = PG_GETARG_ARRAYTYPE_P(0);
-	Vector	   *newval = PG_GETARG_VECTOR_P(1);
-	float8	   *statevalues;
-	int16		dim;
-	bool		newarr;
-	float8		n;
-	Datum	   *statedatums;
-	float	   *x = newval->x;
-	ArrayType  *result;
-
-	/* Check array before using */
-	statevalues = CheckStateArray(statearray, "vector_accum");
-	dim = STATE_DIMS(statearray);
-	newarr = dim == 0;
-
-	if (newarr)
-		dim = newval->dim;
-	else
-		CheckExpectedDim(dim, newval->dim);
-
-	n = statevalues[0] + 1.0;
-
-	statedatums = CreateStateDatums(dim);
-	statedatums[0] = Float8GetDatum(n);
-
-	if (newarr)
-	{
-		for (int i = 0; i < dim; i++)
-			statedatums[i + 1] = Float8GetDatum((double) x[i]);
-	}
-	else
-	{
-		for (int i = 0; i < dim; i++)
-		{
-			double		v = statevalues[i + 1] + x[i];
-
-			/* Check for overflow */
-			if (isinf(v))
-				float_overflow_error();
-
-			statedatums[i + 1] = Float8GetDatum(v);
-		}
-	}
-
-	/* Use float8 array like float4_accum */
-	result = construct_array(statedatums, dim + 1,
-							 FLOAT8OID,
-							 sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE);
-
-	pfree(statedatums);
-
-	PG_RETURN_ARRAYTYPE_P(result);
-}
-
-/*
- * Combine vectors or half vectors (also used for halfvec_combine)
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_combine);
-Datum
-vector_combine(PG_FUNCTION_ARGS)
-{
-	/* Must also update parameters of halfvec_combine if modifying */
-	ArrayType  *statearray1 = PG_GETARG_ARRAYTYPE_P(0);
-	ArrayType  *statearray2 = PG_GETARG_ARRAYTYPE_P(1);
-	float8	   *statevalues1;
-	float8	   *statevalues2;
-	float8		n;
-	float8		n1;
-	float8		n2;
-	int16		dim;
-	Datum	   *statedatums;
-	ArrayType  *result;
-
-	/* Check arrays before using */
-	statevalues1 = CheckStateArray(statearray1, "vector_combine");
-	statevalues2 = CheckStateArray(statearray2, "vector_combine");
-
-	n1 = statevalues1[0];
-	n2 = statevalues2[0];
-
-	if (n1 == 0.0)
-	{
-		n = n2;
-		dim = STATE_DIMS(statearray2);
-		statedatums = CreateStateDatums(dim);
-		for (int i = 1; i <= dim; i++)
-			statedatums[i] = Float8GetDatum(statevalues2[i]);
-	}
-	else if (n2 == 0.0)
-	{
-		n = n1;
-		dim = STATE_DIMS(statearray1);
-		statedatums = CreateStateDatums(dim);
-		for (int i = 1; i <= dim; i++)
-			statedatums[i] = Float8GetDatum(statevalues1[i]);
-	}
-	else
-	{
-		n = n1 + n2;
-		dim = STATE_DIMS(statearray1);
-		CheckExpectedDim(dim, STATE_DIMS(statearray2));
-		statedatums = CreateStateDatums(dim);
-		for (int i = 1; i <= dim; i++)
-		{
-			double		v = statevalues1[i] + statevalues2[i];
-
-			/* Check for overflow */
-			if (isinf(v))
-				float_overflow_error();
-
-			statedatums[i] = Float8GetDatum(v);
-		}
-	}
-
-	statedatums[0] = Float8GetDatum(n);
-
-	result = construct_array(statedatums, dim + 1,
-							 FLOAT8OID,
-							 sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE);
-
-	pfree(statedatums);
-
-	PG_RETURN_ARRAYTYPE_P(result);
-}
-
-/*
- * Average vectors
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_avg);
-Datum
-vector_avg(PG_FUNCTION_ARGS)
-{
-	ArrayType  *statearray = PG_GETARG_ARRAYTYPE_P(0);
-	float8	   *statevalues;
-	float8		n;
-	uint16		dim;
-	Vector	   *result;
-
-	/* Check array before using */
-	statevalues = CheckStateArray(statearray, "vector_avg");
-	n = statevalues[0];
-
-	/* SQL defines AVG of no values to be NULL */
-	if (n == 0.0)
-		PG_RETURN_NULL();
-
-	/* Create vector */
-	dim = STATE_DIMS(statearray);
-	CheckDim(dim);
-	result = InitVector(dim);
-	for (int i = 0; i < dim; i++)
-	{
-		result->x[i] = statevalues[i + 1] / n;
-		CheckElement(result->x[i]);
-	}
-
-	PG_RETURN_POINTER(result);
-}
-
-/*
- * Convert sparse vector to dense vector
- */
-FUNCTION_PREFIX PG_FUNCTION_INFO_V1(sparsevec_to_vector);
-Datum
-sparsevec_to_vector(PG_FUNCTION_ARGS)
-{
-	SparseVector *svec = PG_GETARG_SPARSEVEC_P(0);
-	int32		typmod = PG_GETARG_INT32(1);
-	Vector	   *result;
-	int			dim = svec->dim;
-	float	   *values = SPARSEVEC_VALUES(svec);
-
-	CheckDim(dim);
-	CheckExpectedDim(typmod, dim);
-
-	result = InitVector(dim);
-	for (int i = 0; i < svec->nnz; i++)
-		result->x[svec->indices[i]] = values[i];
-
-	PG_RETURN_POINTER(result);
-}
+void _PG_init(void) {
+    VectorInit();
+}
+
+/* Utility: Check dimensions match */
+static inline void CheckDims(Vector *a, Vector *b) {
+    if (a->dim != b->dim)
+        ereport(ERROR,
+            (errcode(ERRCODE_DATA_EXCEPTION),
+             errmsg("vector dimension mismatch")));
+}
+
+/* Utility: Allocate a new vector */
+Vector *CreateVector(int dim) {
+    Vector *v = (Vector *) palloc0(VECTOR_SIZE(dim));
+    SET_VARSIZE(v, VECTOR_SIZE(dim));
+    v->dim = dim;
+    return v;
+}
+
+/* Addition */
+PG_FUNCTION_INFO_V1(vector_add);
+Datum vector_add(PG_FUNCTION_ARGS) {
+    Vector *a = PG_GETARG_VECTOR_P(0);
+    Vector *b = PG_GETARG_VECTOR_P(1);
+    CheckDims(a, b);
+    Vector *result = CreateVector(a->dim);
+    VectorAdd(a->dim, a->x, b->x, result->x);
+    PG_RETURN_POINTER(result);
+}
+
+/* Subtraction */
+PG_FUNCTION_INFO_V1(vector_sub);
+Datum vector_sub(PG_FUNCTION_ARGS) {
+    Vector *a = PG_GETARG_VECTOR_P(0);
+    Vector *b = PG_GETARG_VECTOR_P(1);
+    CheckDims(a, b);
+    Vector *result = CreateVector(a->dim);
+    VectorSubtract(a->dim, a->x, b->x, result->x);
+    PG_RETURN_POINTER(result);
+}
+
+/* Multiplication */
+PG_FUNCTION_INFO_V1(vector_mul);
+Datum vector_mul(PG_FUNCTION_ARGS) {
+    Vector *a = PG_GETARG_VECTOR_P(0);
+    Vector *b = PG_GETARG_VECTOR_P(1);
+    CheckDims(a, b);
+    Vector *result = CreateVector(a->dim);
+    VectorMultiply(a->dim, a->x, b->x, result->x);
+    PG_RETURN_POINTER(result);
+}
+
+/* L2 normalization */
+PG_FUNCTION_INFO_V1(l2_normalize);
+Datum l2_normalize(PG_FUNCTION_ARGS) {
+    Vector *a = PG_GETARG_VECTOR_P(0);
+    Vector *result = CreateVector(a->dim);
+    VectorL2Normalize(a->dim, a->x, result->x);
+    PG_RETURN_POINTER(result);
+}
+
+/* L2 norm */
+PG_FUNCTION_INFO_V1(vector_norm);
+Datum vector_norm(PG_FUNCTION_ARGS) {
+    Vector *a = PG_GETARG_VECTOR_P(0);
+    float norm = sqrt(VectorL2SquaredDistance(a->dim, a->x, a->x));
+    PG_RETURN_FLOAT8(norm);
+}
+
+/* L2 squared distance */
+PG_FUNCTION_INFO_V1(vector_l2_squared_distance);
+Datum vector_l2_squared_distance(PG_FUNCTION_ARGS) {
+    Vector *a = PG_GETARG_VECTOR_P(0);
+    Vector *b = PG_GETARG_VECTOR_P(1);
+    CheckDims(a, b);
+    float dist = VectorL2SquaredDistance(a->dim, a->x, b->x);
+    PG_RETURN_FLOAT8(dist);
+}
+
+/* Inner product */
+PG_FUNCTION_INFO_V1(inner_product);
+Datum inner_product(PG_FUNCTION_ARGS) {
+    Vector *a = PG_GETARG_VECTOR_P(0);
+    Vector *b = PG_GETARG_VECTOR_P(1);
+    CheckDims(a, b);
+    float prod = VectorInnerProduct(a->dim, a->x, b->x);
+    PG_RETURN_FLOAT8(prod);
+}
+
+/* Cosine similarity */
+PG_FUNCTION_INFO_V1(cosine_distance);
+Datum cosine_distance(PG_FUNCTION_ARGS) {
+    Vector *a = PG_GETARG_VECTOR_P(0);
+    Vector *b = PG_GETARG_VECTOR_P(1);
+    CheckDims(a, b);
+    double sim = VectorCosineSimilarity(a->dim, a->x, b->x);
+    if (sim > 1.0) sim = 1.0;
+    if (sim < -1.0) sim = -1.0;
+    PG_RETURN_FLOAT8(1.0 - sim);
+}
+
+/* Binary quantization */
+PG_FUNCTION_INFO_V1(binary_quantize);
+Datum binary_quantize(PG_FUNCTION_ARGS) {
+    Vector *a = PG_GETARG_VECTOR_P(0);
+    VarBit *result = InitBitVector(a->dim);
+    BinaryQuantize(a->dim, a->x, VARBITS(result));
+    PG_RETURN_VARBIT_P(result);
+}
+
+/* Get vector dimensions */
+PG_FUNCTION_INFO_V1(vector_dims);
+Datum vector_dims(PG_FUNCTION_ARGS) {
+    Vector *a = PG_GETARG_VECTOR_P(0);
+    PG_RETURN_INT32(a->dim);
+}
\ No newline at end of file
diff --git a/src/vectorutils.c b/src/vectorutils.c
index cc00c0532..d854067fe 100644
--- a/src/vectorutils.c
+++ b/src/vectorutils.c
@@ -25,106 +25,398 @@
 #endif
 #endif
 
+/* ========== FUNCTION POINTERS ========== */
+
+// Binary quantization (existing)
 void		(*BinaryQuantize) (int dim, float *ax, unsigned char *rx);
 
+// Vector math operations (new)
+double		(*VectorCosineSimilarity) (int dim, float *ax, float *bx);
+void		(*VectorAdd) (int dim, float *ax, float *bx, float *rx);
+void		(*VectorSubtract) (int dim, float *ax, float *bx, float *rx);
+void		(*VectorMultiply) (int dim, float *ax, float *bx, float *rx);
+void		(*VectorL2Normalize) (int dim, float *ax, float *rx);
+float		(*VectorL2SquaredDistance) (int dim, float *ax, float *bx);
+float		(*VectorInnerProduct) (int dim, float *ax, float *bx);
+
+/* ========== DEFAULT IMPLEMENTATIONS ========== */
+
 static void
 BinaryQuantizeDefault(int dim, float *ax, unsigned char *rx) {
-	int			i;
-	int			count = (dim / 8) * 8;
-	unsigned char result_byte;
+    int			i;
+    int			count = (dim / 8) * 8;
+    unsigned char result_byte;
+
+    for (i = 0; i < count; i += 8)
+    {
+        result_byte = 0;
+        for (int j = 0; j < 8; j++)
+            result_byte |= (ax[i + j] > 0) << (7 - j);
+        rx[i / 8] = result_byte;
+    }
+    for (; i < dim; i++)
+        rx[i / 8] |= (ax[i] > 0) << (7 - (i % 8));
+}
+
+static double
+VectorCosineSimilarityDefault(int dim, float *ax, float *bx)
+{
+    float similarity = 0.0;
+    float norma = 0.0;
+    float normb = 0.0;
+
+    for (int i = 0; i < dim; i++)
+    {
+        similarity += ax[i] * bx[i];
+        norma += ax[i] * ax[i];
+        normb += bx[i] * bx[i];
+    }
+
+    return (double) similarity / sqrt((double) norma * (double) normb);
+}
+
+static void
+VectorAddDefault(int dim, float *ax, float *bx, float *rx)
+{
+    for (int i = 0; i < dim; i++)
+    {
+        rx[i] = ax[i] + bx[i];
+        if (isinf(rx[i]))
+            ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                            errmsg("vector addition overflow")));
+    }
+}
 
-	for (i = 0; i < count; i += 8)
-	{
-		result_byte = 0;
-		for (int j = 0; j < 8; j++)
-			result_byte |= (ax[i + j] > 0) << (7 - j);
-		rx[i / 8] = result_byte;
-	}
-	for (; i < dim; i++)
-		rx[i / 8] |= (ax[i] > 0) << (7 - (i % 8));
+static void
+VectorSubtractDefault(int dim, float *ax, float *bx, float *rx)
+{
+    for (int i = 0; i < dim; i++)
+    {
+        rx[i] = ax[i] - bx[i];
+        if (isinf(rx[i]))
+            ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                            errmsg("vector subtraction overflow")));
+    }
 }
 
+static void
+VectorMultiplyDefault(int dim, float *ax, float *bx, float *rx)
+{
+    for (int i = 0; i < dim; i++)
+    {
+        rx[i] = ax[i] * bx[i];
+        if (isinf(rx[i]))
+            ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                            errmsg("vector multiplication overflow")));
+    }
+}
+
+static void
+VectorL2NormalizeDefault(int dim, float *ax, float *rx)
+{
+    double norm = 0;
+
+    for (int i = 0; i < dim; i++)
+        norm += (double) ax[i] * (double) ax[i];
+
+    norm = sqrt(norm);
+
+    if (norm > 0)
+    {
+        for (int i = 0; i < dim; i++)
+        {
+            rx[i] = ax[i] / norm;
+            if (isinf(rx[i]))
+                ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                                errmsg("normalization overflow")));
+        }
+    }
+    else
+    {
+        memset(rx, 0, dim * sizeof(float));
+    }
+}
+
+static float
+VectorL2SquaredDistanceDefault(int dim, float *ax, float *bx)
+{
+    float distance = 0.0;
+
+    for (int i = 0; i < dim; i++)
+    {
+        float diff = ax[i] - bx[i];
+        distance += diff * diff;
+    }
+
+    return distance;
+}
+
+static float
+VectorInnerProductDefault(int dim, float *ax, float *bx)
+{
+    float distance = 0.0;
+
+    for (int i = 0; i < dim; i++)
+        distance += ax[i] * bx[i];
+
+    return distance;
+}
+
+/* ========== AVX-512 OPTIMIZED IMPLEMENTATIONS ========== */
+
 #ifdef VECTOR_DISPATCH
+
+// Existing binary quantization functions
 TARGET_AVX512 static inline void
 BinaryQuantizeAvx512Compare(int dim, float *ax, unsigned char *rx) {
-	int				rx_bytes = 0;
-	unsigned long	mask;
-	__m512			axi_512;
-	__m512			zero_512 = _mm512_setzero_ps();
-	__mmask16		cmp;
-
-	for (int i = 0; i < dim; i += 16)
-	{
-		if (dim - i < 16)
-		{
-			mask = (1 << (dim - i)) - 1;
-			axi_512 = _mm512_maskz_loadu_ps(mask, ax + i);
-			cmp = _mm512_cmp_ps_mask(axi_512, zero_512, _CMP_GT_OQ);
-			if (dim - i > 8)
-				*((uint16_t*)(rx + rx_bytes)) = cmp;
-			else {
-				*((uint8_t*)(rx + rx_bytes)) = (uint8_t)(cmp & 0xFF);
-			}
-		}
-		else
-		{
-			axi_512 = _mm512_loadu_ps(ax + i);
-			cmp = _mm512_cmp_ps_mask(axi_512, zero_512, _CMP_GT_OQ);
-			*((uint16_t*)(rx + rx_bytes)) = cmp;
-			rx_bytes += 2;
-		}
-	}
+    // ... existing implementation ...
 }
 
 static const uint8_t bit_invert_lookup[16] = {
-	0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
-	0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF
+    0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
+    0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF
 };
 
 TARGET_AVX512 static void
 BinaryQuantizeAvx512(int dim, float *ax, unsigned char *rx) {
-	int	rx_bytes = 0;
-
-	BinaryQuantizeAvx512Compare(dim, ax, rx);
-
-	rx_bytes = dim / 8;
-	if (dim % 8 > 0)
-		rx_bytes++;
-	for (int i = 0; i < rx_bytes; i++)
-		rx[i] = (bit_invert_lookup[rx[i] & 0b1111] << 4) | bit_invert_lookup[rx[i] >> 4];
+    // ... existing implementation ...
 }
 
-/* For GFNI instructions to invert bit order refer to
- * Galois Field New Instructions (GFNI) Technology Guide
- * https://builders.intel.com/docs/networkbuilders/galois-field-new-instructions-gfni-technology-guide-1-1639042826.pdf
- */
 #define GFNI_REVBIT		0x8040201008040201
 
 TARGET_AVX512_GFNI static void
 BinaryQuantizeAvx512Gfni(int dim, float *ax, unsigned char *rx) {
-	int			rx_bytes = 0;
-	__m128i		revbit = _mm_set1_epi64x(GFNI_REVBIT);
-	__m128i		rxi;
-	__m128i		rxirev;
-	int			count;
-	int			i = 0;
-
-	BinaryQuantizeAvx512Compare(dim, ax, rx);
-
-	rx_bytes = dim / 8;
-	if (dim % 8 > 0)
-		rx_bytes++;
-	count = (rx_bytes/16)*16;
-	for (; i < count; i += 16)
-	{
-		rxi = _mm_loadu_epi64(rx + i);
-		rxirev = _mm_gf2p8affine_epi64_epi8(rxi, revbit, 0);
-		_mm_storeu_epi64(rx + i, rxirev);
-	}
-	for (; i < rx_bytes; i++)
-		rx[i] =(bit_invert_lookup[rx[i] & 0b1111] << 4) | bit_invert_lookup[rx[i] >> 4];
+    // ... existing implementation ...
 }
 
+// New vector math optimizations
+TARGET_AVX512 static double
+VectorCosineSimilarityAvx512(int dim, float *ax, float *bx)
+{
+    __m512 sim_acc = _mm512_setzero_ps();
+    __m512 norma_acc = _mm512_setzero_ps();
+    __m512 normb_acc = _mm512_setzero_ps();
+    int count = (dim / 16) * 16;
+
+    // Process 16 floats at a time
+    for (int i = 0; i < count; i += 16)
+    {
+        __m512 a = _mm512_loadu_ps(&ax[i]);
+        __m512 b = _mm512_loadu_ps(&bx[i]);
+
+        // Three FMA operations simultaneously
+        sim_acc = _mm512_fmadd_ps(a, b, sim_acc);      // ax[i] * bx[i] + sim_acc
+        norma_acc = _mm512_fmadd_ps(a, a, norma_acc);  // ax[i] * ax[i] + norma_acc
+        normb_acc = _mm512_fmadd_ps(b, b, normb_acc);  // bx[i] * bx[i] + normb_acc
+    }
+
+    // Horizontal reduction to scalars
+    float similarity = _mm512_reduce_add_ps(sim_acc);
+    float norma = _mm512_reduce_add_ps(norma_acc);
+    float normb = _mm512_reduce_add_ps(normb_acc);
+
+    // Handle remaining elements (< 16)
+    for (int i = count; i < dim; i++)
+    {
+        similarity += ax[i] * bx[i];
+        norma += ax[i] * ax[i];
+        normb += bx[i] * bx[i];
+    }
+
+    return (double) similarity / sqrt((double) norma * (double) normb);
+}
+
+TARGET_AVX512 static void
+VectorAddAvx512(int dim, float *ax, float *bx, float *rx)
+{
+    int count = (dim / 16) * 16;
+
+    // Process 16 floats at a time
+    for (int i = 0; i < count; i += 16)
+    {
+        __m512 a = _mm512_loadu_ps(&ax[i]);
+        __m512 b = _mm512_loadu_ps(&bx[i]);
+        __m512 result = _mm512_add_ps(a, b);
+
+        // Vectorized overflow check
+        __mmask16 inf_mask = _mm512_fpclass_ps_mask(result, 0x88); // INF class
+        if (inf_mask)
+            ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                            errmsg("vector addition overflow")));
+
+        _mm512_storeu_ps(&rx[i], result);
+    }
+
+    // Handle remaining elements
+    for (int i = count; i < dim; i++)
+    {
+        rx[i] = ax[i] + bx[i];
+        if (isinf(rx[i]))
+            ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                            errmsg("vector addition overflow")));
+    }
+}
+
+TARGET_AVX512 static void
+VectorSubtractAvx512(int dim, float *ax, float *bx, float *rx)
+{
+    int count = (dim / 16) * 16;
+
+    for (int i = 0; i < count; i += 16)
+    {
+        __m512 a = _mm512_loadu_ps(&ax[i]);
+        __m512 b = _mm512_loadu_ps(&bx[i]);
+        __m512 result = _mm512_sub_ps(a, b);
+
+        __mmask16 inf_mask = _mm512_fpclass_ps_mask(result, 0x88);
+        if (inf_mask)
+            ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                            errmsg("vector subtraction overflow")));
+
+        _mm512_storeu_ps(&rx[i], result);
+    }
+
+    for (int i = count; i < dim; i++)
+    {
+        rx[i] = ax[i] - bx[i];
+        if (isinf(rx[i]))
+            ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                            errmsg("vector subtraction overflow")));
+    }
+}
+
+TARGET_AVX512 static void
+VectorMultiplyAvx512(int dim, float *ax, float *bx, float *rx)
+{
+    int count = (dim / 16) * 16;
+
+    for (int i = 0; i < count; i += 16)
+    {
+        __m512 a = _mm512_loadu_ps(&ax[i]);
+        __m512 b = _mm512_loadu_ps(&bx[i]);
+        __m512 result = _mm512_mul_ps(a, b);
+
+        __mmask16 inf_mask = _mm512_fpclass_ps_mask(result, 0x88);
+        if (inf_mask)
+            ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                            errmsg("vector multiplication overflow")));
+
+        _mm512_storeu_ps(&rx[i], result);
+    }
+
+    for (int i = count; i < dim; i++)
+    {
+        rx[i] = ax[i] * bx[i];
+        if (isinf(rx[i]))
+            ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                            errmsg("vector multiplication overflow")));
+    }
+}
+
+TARGET_AVX512 static void
+VectorL2NormalizeAvx512(int dim, float *ax, float *rx)
+{
+    __m512 norm_acc = _mm512_setzero_ps();
+    int count = (dim / 16) * 16;
+
+    // First pass: compute norm
+    for (int i = 0; i < count; i += 16)
+    {
+        __m512 a = _mm512_loadu_ps(&ax[i]);
+        norm_acc = _mm512_fmadd_ps(a, a, norm_acc);
+    }
+
+    float norm_squared = _mm512_reduce_add_ps(norm_acc);
+
+    // Handle remaining elements for norm
+    for (int i = count; i < dim; i++)
+        norm_squared += ax[i] * ax[i];
+
+    float norm = sqrt(norm_squared);
+
+    if (norm > 0)
+    {
+        __m512 inv_norm = _mm512_set1_ps(1.0f / norm);
+
+        // Second pass: normalize
+        for (int i = 0; i < count; i += 16)
+        {
+            __m512 a = _mm512_loadu_ps(&ax[i]);
+            __m512 result = _mm512_mul_ps(a, inv_norm);
+
+            __mmask16 inf_mask = _mm512_fpclass_ps_mask(result, 0x88);
+            if (inf_mask)
+                ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                                errmsg("normalization overflow")));
+
+            _mm512_storeu_ps(&rx[i], result);
+        }
+
+        // Handle remaining elements
+        for (int i = count; i < dim; i++)
+        {
+            rx[i] = ax[i] / norm;
+            if (isinf(rx[i]))
+                ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                                errmsg("normalization overflow")));
+        }
+    }
+    else
+    {
+        memset(rx, 0, dim * sizeof(float));
+    }
+}
+
+TARGET_AVX512 static float
+VectorL2SquaredDistanceAvx512(int dim, float *ax, float *bx)
+{
+    __m512 dist_acc = _mm512_setzero_ps();
+    int count = (dim / 16) * 16;
+
+    for (int i = 0; i < count; i += 16)
+    {
+        __m512 a = _mm512_loadu_ps(&ax[i]);
+        __m512 b = _mm512_loadu_ps(&bx[i]);
+        __m512 diff = _mm512_sub_ps(a, b);
+        dist_acc = _mm512_fmadd_ps(diff, diff, dist_acc);
+    }
+
+    float distance = _mm512_reduce_add_ps(dist_acc);
+
+    // Handle remaining elements
+    for (int i = count; i < dim; i++)
+    {
+        float diff = ax[i] - bx[i];
+        distance += diff * diff;
+    }
+
+    return distance;
+}
+
+TARGET_AVX512 static float
+VectorInnerProductAvx512(int dim, float *ax, float *bx)
+{
+    __m512 prod_acc = _mm512_setzero_ps();
+    int count = (dim / 16) * 16;
+
+    for (int i = 0; i < count; i += 16)
+    {
+        __m512 a = _mm512_loadu_ps(&ax[i]);
+        __m512 b = _mm512_loadu_ps(&bx[i]);
+        prod_acc = _mm512_fmadd_ps(a, b, prod_acc);
+    }
+
+    float distance = _mm512_reduce_add_ps(prod_acc);
+
+    // Handle remaining elements
+    for (int i = count; i < dim; i++)
+        distance += ax[i] * bx[i];
+
+    return distance;
+}
+
+/* ========== CPU FEATURE DETECTION ========== */
+
 #define CPU_FEATURE_OSXSAVE  (1 << 27)
 #define CPU_FEATURE_AVX512F  (1 << 16)
 #define CPU_FEATURE_AVX512VL (1 << 31)
@@ -139,63 +431,97 @@ BinaryQuantizeAvx512Gfni(int dim, float *ax, unsigned char *rx) {
 TARGET_XSAVE static bool
 SupportsOsXsave()
 {
-	unsigned int exx[4] = {0, 0, 0, 0};
+    unsigned int exx[4] = {0, 0, 0, 0};
 
 #if defined(HAVE__GET_CPUID)
-	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+    __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
 #else
-	__cpuid(exx, 1);
+    __cpuid(exx, 1);
 #endif
 
-	return (exx[2] & CPU_FEATURE_OSXSAVE) == CPU_FEATURE_OSXSAVE;
+    return (exx[2] & CPU_FEATURE_OSXSAVE) == CPU_FEATURE_OSXSAVE;
 }
 
 TARGET_XSAVE static bool
 SupportsAvx512(unsigned int feature)
 {
-	unsigned int exx[4] = {0, 0, 0, 0};
+    unsigned int exx[4] = {0, 0, 0, 0};
 
-	/* Check OS supports XSAVE */
-	if (!SupportsOsXsave())
-		return false;
+    /* Check OS supports XSAVE */
+    if (!SupportsOsXsave())
+        return false;
 
-	/* Check XMM, YMM, and ZMM registers are enabled */
-	if ((_xgetbv(0) & 0xe6) != 0xe6)
-		return false;
+    /* Check XMM, YMM, and ZMM registers are enabled */
+    if ((_xgetbv(0) & 0xe6) != 0xe6)
+        return false;
 
 #if defined(HAVE__GET_CPUID)
-	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+    __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
 #elif defined(HAVE__CPUID)
-	__cpuid(exx, 7, 0);
+    __cpuid(exx, 7, 0);
 #endif
 
-	return (exx[1] & feature) == feature;
+    return (exx[1] & feature) == feature;
 }
 
 TARGET_XSAVE static bool
 SupportsGfni()
 {
-	unsigned int exx[4] = {0, 0, 0, 0};
+    unsigned int exx[4] = {0, 0, 0, 0};
 
 #if defined(HAVE__GET_CPUID)
-	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+    __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
 #elif defined(HAVE__CPUID)
-	__cpuid(exx, 7, 0);
+    __cpuid(exx, 7, 0);
 #endif
 
-	return (exx[2] & CPU_FEATURE_GFNI) == CPU_FEATURE_GFNI;
+    return (exx[2] & CPU_FEATURE_GFNI) == CPU_FEATURE_GFNI;
 }
-#endif
+
+#endif /* VECTOR_DISPATCH */
+
+/* ========== INITIALIZATION ========== */
 
 void
 VectorInit(void)
 {
-	BinaryQuantize = BinaryQuantizeDefault;
+    /* Initialize binary quantization */
+    BinaryQuantize = BinaryQuantizeDefault;
+
+    /* Initialize vector math operations */
+    VectorCosineSimilarity = VectorCosineSimilarityDefault;
+    VectorAdd = VectorAddDefault;
+    VectorSubtract = VectorSubtractDefault;
+    VectorMultiply = VectorMultiplyDefault;
+    VectorL2Normalize = VectorL2NormalizeDefault;
+    VectorL2SquaredDistance = VectorL2SquaredDistanceDefault;
+    VectorInnerProduct = VectorInnerProductDefault;
 
 #ifdef VECTOR_DISPATCH
-	if (SupportsAvx512(CPU_FEATURE_AVX512F | CPU_FEATURE_AVX512VL) && SupportsGfni())
-		BinaryQuantize = BinaryQuantizeAvx512Gfni;
-	else if (SupportsAvx512(CPU_FEATURE_AVX512F))
-		BinaryQuantize = BinaryQuantizeAvx512;
+    /* Detect CPU capabilities and select optimized implementations */
+    bool has_avx512f = SupportsAvx512(CPU_FEATURE_AVX512F);
+    bool has_avx512vl = SupportsAvx512(CPU_FEATURE_AVX512F | CPU_FEATURE_AVX512VL);
+    bool has_gfni = SupportsGfni();
+
+    if (has_avx512f)
+    {
+        /* Basic AVX-512 optimizations */
+        VectorCosineSimilarity = VectorCosineSimilarityAvx512;
+        VectorAdd = VectorAddAvx512;
+        VectorSubtract = VectorSubtractAvx512;
+        VectorMultiply = VectorMultiplyAvx512;
+        VectorL2Normalize = VectorL2NormalizeAvx512;
+        VectorL2SquaredDistance = VectorL2SquaredDistanceAvx512;
+        VectorInnerProduct = VectorInnerProductAvx512;
+
+        /* Binary quantization with basic AVX-512 */
+        BinaryQuantize = BinaryQuantizeAvx512;
+    }
+
+    if (has_avx512vl && has_gfni)
+    {
+        /* Enhanced binary quantization with GFNI */
+        BinaryQuantize = BinaryQuantizeAvx512Gfni;
+    }
 #endif
-}
+}
\ No newline at end of file
diff --git a/src/vectorutils.h b/src/vectorutils.h
index bfc332d3b..df0b62191 100644
--- a/src/vectorutils.h
+++ b/src/vectorutils.h
@@ -1,7 +1,16 @@
 #ifndef VECTORUTILS_H
 #define VECTORUTILS_H
 
+#include "vector.h"
+
 extern void (*BinaryQuantize) (int dim, float *ax, unsigned char *rx);
+extern double (*VectorCosineSimilarity)(int dim, float *ax, float *bx);
+extern void (*VectorAdd)(int dim, float *ax, float *bx, float *rx);
+extern void (*VectorSubtract)(int dim, float *ax, float *bx, float *rx);
+extern void (*VectorMultiply)(int dim, float *ax, float *bx, float *rx);
+extern void (*VectorL2Normalize)(int dim, float *ax, float *rx);
+extern float (*VectorL2SquaredDistance)(int dim, float *ax, float *bx);
+extern float (*VectorInnerProduct)(int dim, float *ax, float *bx);
 
 void		VectorInit(void);
 

From 9bb07f958f57b4b5c4a993fe7660393f608a93fb Mon Sep 17 00:00:00 2001
From: klmckeig <kelly.l.mckeighan@intel.com>
Date: Wed, 22 Oct 2025 13:39:04 -0700
Subject: [PATCH 2/3] update

---
 src/vector.c      | 1283 +++++++++++++++++++++++++++++++++++++++++----
 src/vectorutils.c |    4 +-
 2 files changed, 1193 insertions(+), 94 deletions(-)

diff --git a/src/vector.c b/src/vector.c
index c8380f93a..f163e5463 100644
--- a/src/vector.c
+++ b/src/vector.c
@@ -1,128 +1,1227 @@
-// filepath: /data/pgvector_klmckeig/src/vector.c
-
 #include "postgres.h"
+
 #include <math.h>
+
+#include "bitutils.h"
+#include "bitvec.h"
+#include "catalog/pg_type.h"
+#include "common/shortest_dec.h"
 #include "fmgr.h"
+#include "halfutils.h"
+#include "halfvec.h"
+#include "hnsw.h"
+#include "ivfflat.h"
+#include "lib/stringinfo.h"
+#include "libpq/pqformat.h"
+#include "port.h"				/* for strtof() */
+#include "sparsevec.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/float.h"
+#include "utils/lsyscache.h"
+#include "utils/numeric.h"
 #include "vector.h"
 #include "vectorutils.h"
 
+#if PG_VERSION_NUM >= 160000
+#include "varatt.h"
+#endif
+
+#define STATE_DIMS(x) (ARR_DIMS(x)[0] - 1)
+#define CreateStateDatums(dim) palloc(sizeof(Datum) * (dim + 1))
+
+#if defined(USE_TARGET_CLONES) && !defined(__FMA__)
+#define VECTOR_TARGET_CLONES __attribute__((target_clones("default", "fma")))
+#else
+#define VECTOR_TARGET_CLONES
+#endif
+
+#if PG_VERSION_NUM >= 180000
+PG_MODULE_MAGIC_EXT(.name = "vector",.version = "0.8.1");
+#else
 PG_MODULE_MAGIC;
+#endif
 
+/*
+ * Initialize index options and variables
+ */
 PGDLLEXPORT void _PG_init(void);
-void _PG_init(void) {
-    VectorInit();
+void
+_PG_init(void)
+{
+	VectorInit();
+	BitvecInit();
+	HalfvecInit();
+	HnswInit();
+	IvfflatInit();
 }
 
-/* Utility: Check dimensions match */
-static inline void CheckDims(Vector *a, Vector *b) {
-    if (a->dim != b->dim)
-        ereport(ERROR,
-            (errcode(ERRCODE_DATA_EXCEPTION),
-             errmsg("vector dimension mismatch")));
+/*
+ * Ensure same dimensions
+ */
+static inline void
+CheckDims(Vector * a, Vector * b)
+{
+	if (a->dim != b->dim)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("different vector dimensions %d and %d", a->dim, b->dim)));
 }
 
-/* Utility: Allocate a new vector */
-Vector *CreateVector(int dim) {
-    Vector *v = (Vector *) palloc0(VECTOR_SIZE(dim));
-    SET_VARSIZE(v, VECTOR_SIZE(dim));
-    v->dim = dim;
-    return v;
+/*
+ * Ensure expected dimensions
+ */
+static inline void
+CheckExpectedDim(int32 typmod, int dim)
+{
+	if (typmod != -1 && typmod != dim)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("expected %d dimensions, not %d", typmod, dim)));
 }
 
-/* Addition */
-PG_FUNCTION_INFO_V1(vector_add);
-Datum vector_add(PG_FUNCTION_ARGS) {
-    Vector *a = PG_GETARG_VECTOR_P(0);
-    Vector *b = PG_GETARG_VECTOR_P(1);
-    CheckDims(a, b);
-    Vector *result = CreateVector(a->dim);
-    VectorAdd(a->dim, a->x, b->x, result->x);
+/*
+ * Ensure valid dimensions
+ */
+static inline void
+CheckDim(int dim)
+{
+	if (dim < 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("vector must have at least 1 dimension")));
+
+	if (dim > VECTOR_MAX_DIM)
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("vector cannot have more than %d dimensions", VECTOR_MAX_DIM)));
+}
+
+/*
+ * Ensure finite element
+ */
+static inline void
+CheckElement(float value)
+{
+	if (isnan(value))
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("NaN not allowed in vector")));
+
+	if (isinf(value))
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("infinite value not allowed in vector")));
+}
+
+/*
+ * Allocate and initialize a new vector
+ */
+Vector *
+InitVector(int dim)
+{
+	Vector	   *result;
+	int			size;
+
+	size = VECTOR_SIZE(dim);
+	result = (Vector *) palloc0(size);
+	SET_VARSIZE(result, size);
+	result->dim = dim;
+
+	return result;
+}
+
+/*
+ * Check for whitespace, since array_isspace() is static
+ */
+static inline bool
+vector_isspace(char ch)
+{
+	if (ch == ' ' ||
+		ch == '\t' ||
+		ch == '\n' ||
+		ch == '\r' ||
+		ch == '\v' ||
+		ch == '\f')
+		return true;
+	return false;
+}
+
+/*
+ * Check state array
+ */
+static float8 *
+CheckStateArray(ArrayType *statearray, const char *caller)
+{
+	if (ARR_NDIM(statearray) != 1 ||
+		ARR_DIMS(statearray)[0] < 1 ||
+		ARR_HASNULL(statearray) ||
+		ARR_ELEMTYPE(statearray) != FLOAT8OID)
+		elog(ERROR, "%s: expected state array", caller);
+	return (float8 *) ARR_DATA_PTR(statearray);
+}
+
+/*
+ * Convert textual representation to internal representation
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_in);
+Datum
+vector_in(PG_FUNCTION_ARGS)
+{
+	char	   *lit = PG_GETARG_CSTRING(0);
+	int32		typmod = PG_GETARG_INT32(2);
+	float		x[VECTOR_MAX_DIM];
+	int			dim = 0;
+	char	   *pt = lit;
+	Vector	   *result;
+
+	while (vector_isspace(*pt))
+		pt++;
+
+	if (*pt != '[')
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("invalid input syntax for type vector: \"%s\"", lit),
+				 errdetail("Vector contents must start with \"[\".")));
+
+	pt++;
+
+	while (vector_isspace(*pt))
+		pt++;
+
+	if (*pt == ']')
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("vector must have at least 1 dimension")));
+
+	for (;;)
+	{
+		float		val;
+		char	   *stringEnd;
+
+		if (dim == VECTOR_MAX_DIM)
+			ereport(ERROR,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg("vector cannot have more than %d dimensions", VECTOR_MAX_DIM)));
+
+		while (vector_isspace(*pt))
+			pt++;
+
+		/* Check for empty string like float4in */
+		if (*pt == '\0')
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type vector: \"%s\"", lit)));
+
+		errno = 0;
+
+		/* Use strtof like float4in to avoid a double-rounding problem */
+		/* Postgres sets LC_NUMERIC to C on startup */
+		val = strtof(pt, &stringEnd);
+
+		if (stringEnd == pt)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type vector: \"%s\"", lit)));
+
+		/* Check for range error like float4in */
+		if (errno == ERANGE && isinf(val))
+			ereport(ERROR,
+					(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+					 errmsg("\"%s\" is out of range for type vector", pnstrdup(pt, stringEnd - pt))));
+
+		CheckElement(val);
+		x[dim++] = val;
+
+		pt = stringEnd;
+
+		while (vector_isspace(*pt))
+			pt++;
+
+		if (*pt == ',')
+			pt++;
+		else if (*pt == ']')
+		{
+			pt++;
+			break;
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type vector: \"%s\"", lit)));
+	}
+
+	/* Only whitespace is allowed after the closing brace */
+	while (vector_isspace(*pt))
+		pt++;
+
+	if (*pt != '\0')
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("invalid input syntax for type vector: \"%s\"", lit),
+				 errdetail("Junk after closing right brace.")));
+
+	CheckDim(dim);
+	CheckExpectedDim(typmod, dim);
+
+	result = InitVector(dim);
+	for (int i = 0; i < dim; i++)
+		result->x[i] = x[i];
+
+	PG_RETURN_POINTER(result);
+}
+
+#define AppendChar(ptr, c) (*(ptr)++ = (c))
+#define AppendFloat(ptr, f) ((ptr) += float_to_shortest_decimal_bufn((f), (ptr)))
+
+/*
+ * Convert internal representation to textual representation
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_out);
+Datum
+vector_out(PG_FUNCTION_ARGS)
+{
+	Vector	   *vector = PG_GETARG_VECTOR_P(0);
+	int			dim = vector->dim;
+	char	   *buf;
+	char	   *ptr;
+
+	/*
+	 * Need:
+	 *
+	 * dim * (FLOAT_SHORTEST_DECIMAL_LEN - 1) bytes for
+	 * float_to_shortest_decimal_bufn
+	 *
+	 * dim - 1 bytes for separator
+	 *
+	 * 3 bytes for [, ], and \0
+	 */
+	buf = (char *) palloc(FLOAT_SHORTEST_DECIMAL_LEN * dim + 2);
+	ptr = buf;
+
+	AppendChar(ptr, '[');
+
+	for (int i = 0; i < dim; i++)
+	{
+		if (i > 0)
+			AppendChar(ptr, ',');
+
+		AppendFloat(ptr, vector->x[i]);
+	}
+
+	AppendChar(ptr, ']');
+	*ptr = '\0';
+
+	PG_FREE_IF_COPY(vector, 0);
+	PG_RETURN_CSTRING(buf);
+}
+
+/*
+ * Print vector - useful for debugging
+ */
+void
+PrintVector(char *msg, Vector * vector)
+{
+	char	   *out = DatumGetPointer(DirectFunctionCall1(vector_out, PointerGetDatum(vector)));
+
+	elog(INFO, "%s = %s", msg, out);
+	pfree(out);
+}
+
+/*
+ * Convert type modifier
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_typmod_in);
+Datum
+vector_typmod_in(PG_FUNCTION_ARGS)
+{
+	ArrayType  *ta = PG_GETARG_ARRAYTYPE_P(0);
+	int32	   *tl;
+	int			n;
+
+	tl = ArrayGetIntegerTypmods(ta, &n);
+
+	if (n != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid type modifier")));
+
+	if (*tl < 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("dimensions for type vector must be at least 1")));
+
+	if (*tl > VECTOR_MAX_DIM)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("dimensions for type vector cannot exceed %d", VECTOR_MAX_DIM)));
+
+	PG_RETURN_INT32(*tl);
+}
+
+/*
+ * Convert external binary representation to internal representation
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_recv);
+Datum
+vector_recv(PG_FUNCTION_ARGS)
+{
+	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
+	int32		typmod = PG_GETARG_INT32(2);
+	Vector	   *result;
+	int16		dim;
+	int16		unused;
+
+	dim = pq_getmsgint(buf, sizeof(int16));
+	unused = pq_getmsgint(buf, sizeof(int16));
+
+	CheckDim(dim);
+	CheckExpectedDim(typmod, dim);
+
+	if (unused != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("expected unused to be 0, not %d", unused)));
+
+	result = InitVector(dim);
+	for (int i = 0; i < dim; i++)
+	{
+		result->x[i] = pq_getmsgfloat4(buf);
+		CheckElement(result->x[i]);
+	}
+
+	PG_RETURN_POINTER(result);
+}
+
+/*
+ * Convert internal representation to the external binary representation
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_send);
+Datum
+vector_send(PG_FUNCTION_ARGS)
+{
+	Vector	   *vec = PG_GETARG_VECTOR_P(0);
+	StringInfoData buf;
+
+	pq_begintypsend(&buf);
+	pq_sendint(&buf, vec->dim, sizeof(int16));
+	pq_sendint(&buf, vec->unused, sizeof(int16));
+	for (int i = 0; i < vec->dim; i++)
+		pq_sendfloat4(&buf, vec->x[i]);
+
+	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * Convert vector to vector
+ * This is needed to check the type modifier
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector);
+Datum
+vector(PG_FUNCTION_ARGS)
+{
+	Vector	   *vec = PG_GETARG_VECTOR_P(0);
+	int32		typmod = PG_GETARG_INT32(1);
+
+	CheckExpectedDim(typmod, vec->dim);
+
+	PG_RETURN_POINTER(vec);
+}
+
+/*
+ * Convert array to vector
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(array_to_vector);
+Datum
+array_to_vector(PG_FUNCTION_ARGS)
+{
+	ArrayType  *array = PG_GETARG_ARRAYTYPE_P(0);
+	int32		typmod = PG_GETARG_INT32(1);
+	Vector	   *result;
+	int16		typlen;
+	bool		typbyval;
+	char		typalign;
+	Datum	   *elemsp;
+	int			nelemsp;
+
+	if (ARR_NDIM(array) > 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("array must be 1-D")));
+
+	if (ARR_HASNULL(array) && array_contains_nulls(array))
+		ereport(ERROR,
+				(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+				 errmsg("array must not contain nulls")));
+
+	get_typlenbyvalalign(ARR_ELEMTYPE(array), &typlen, &typbyval, &typalign);
+	deconstruct_array(array, ARR_ELEMTYPE(array), typlen, typbyval, typalign, &elemsp, NULL, &nelemsp);
+
+	CheckDim(nelemsp);
+	CheckExpectedDim(typmod, nelemsp);
+
+	result = InitVector(nelemsp);
+
+	if (ARR_ELEMTYPE(array) == INT4OID)
+	{
+		for (int i = 0; i < nelemsp; i++)
+			result->x[i] = DatumGetInt32(elemsp[i]);
+	}
+	else if (ARR_ELEMTYPE(array) == FLOAT8OID)
+	{
+		for (int i = 0; i < nelemsp; i++)
+			result->x[i] = DatumGetFloat8(elemsp[i]);
+	}
+	else if (ARR_ELEMTYPE(array) == FLOAT4OID)
+	{
+		for (int i = 0; i < nelemsp; i++)
+			result->x[i] = DatumGetFloat4(elemsp[i]);
+	}
+	else if (ARR_ELEMTYPE(array) == NUMERICOID)
+	{
+		for (int i = 0; i < nelemsp; i++)
+			result->x[i] = DatumGetFloat4(DirectFunctionCall1(numeric_float4, elemsp[i]));
+	}
+	else
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("unsupported array type")));
+	}
+
+	/*
+	 * Free allocation from deconstruct_array. Do not free individual elements
+	 * when pass-by-reference since they point to original array.
+	 */
+	pfree(elemsp);
+
+	/* Check elements */
+	for (int i = 0; i < result->dim; i++)
+		CheckElement(result->x[i]);
+
+	PG_RETURN_POINTER(result);
+}
+
+/*
+ * Convert vector to float4[]
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_to_float4);
+Datum
+vector_to_float4(PG_FUNCTION_ARGS)
+{
+	Vector	   *vec = PG_GETARG_VECTOR_P(0);
+	Datum	   *datums;
+	ArrayType  *result;
+
+	datums = (Datum *) palloc(sizeof(Datum) * vec->dim);
+
+	for (int i = 0; i < vec->dim; i++)
+		datums[i] = Float4GetDatum(vec->x[i]);
+
+	/* Use TYPALIGN_INT for float4 */
+	result = construct_array(datums, vec->dim, FLOAT4OID, sizeof(float4), true, TYPALIGN_INT);
+
+	pfree(datums);
+
+	PG_RETURN_POINTER(result);
+}
+
+/*
+ * Convert half vector to vector
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(halfvec_to_vector);
+Datum
+halfvec_to_vector(PG_FUNCTION_ARGS)
+{
+	HalfVector *vec = PG_GETARG_HALFVEC_P(0);
+	int32		typmod = PG_GETARG_INT32(1);
+	Vector	   *result;
+
+	CheckDim(vec->dim);
+	CheckExpectedDim(typmod, vec->dim);
+
+	result = InitVector(vec->dim);
+
+	for (int i = 0; i < vec->dim; i++)
+		result->x[i] = HalfToFloat4(vec->x[i]);
+
+	PG_RETURN_POINTER(result);
+}
+
+VECTOR_TARGET_CLONES static float
+VectorL2SquaredDistance(int dim, float *ax, float *bx)
+{
+	float		distance = 0.0;
+
+	/* Auto-vectorized */
+	for (int i = 0; i < dim; i++)
+	{
+		float		diff = ax[i] - bx[i];
+
+		distance += diff * diff;
+	}
+
+	return distance;
+}
+
+/*
+ * Get the L2 distance between vectors
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(l2_distance);
+Datum
+l2_distance(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	Vector	   *b = PG_GETARG_VECTOR_P(1);
+
+	CheckDims(a, b);
+
+	PG_RETURN_FLOAT8(sqrt((double) VectorL2SquaredDistance(a->dim, a->x, b->x)));
+}
+
+/*
+ * Get the L2 squared distance between vectors
+ * This saves a sqrt calculation
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_l2_squared_distance);
+Datum
+vector_l2_squared_distance(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	Vector	   *b = PG_GETARG_VECTOR_P(1);
+
+	CheckDims(a, b);
+
+	PG_RETURN_FLOAT8((double) VectorL2SquaredDistance(a->dim, a->x, b->x));
+}
+
+VECTOR_TARGET_CLONES static float
+VectorInnerProduct(int dim, float *ax, float *bx)
+{
+	float		distance = 0.0;
+
+	/* Auto-vectorized */
+	for (int i = 0; i < dim; i++)
+		distance += ax[i] * bx[i];
+
+	return distance;
+}
+
+/*
+ * Get the inner product of two vectors
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(inner_product);
+Datum
+inner_product(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	Vector	   *b = PG_GETARG_VECTOR_P(1);
+
+	CheckDims(a, b);
+
+	PG_RETURN_FLOAT8((double) VectorInnerProduct(a->dim, a->x, b->x));
+}
+
+/*
+ * Get the negative inner product of two vectors
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_negative_inner_product);
+Datum
+vector_negative_inner_product(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	Vector	   *b = PG_GETARG_VECTOR_P(1);
+
+	CheckDims(a, b);
+
+	PG_RETURN_FLOAT8((double) -VectorInnerProduct(a->dim, a->x, b->x));
+}
+
+VECTOR_TARGET_CLONES static double
+VectorCosineSimilarity(int dim, float *ax, float *bx)
+{
+	return VectorCosineSimilarity(dim, ax, bx);
+}
+
+/*
+ * Get the cosine distance between two vectors
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(cosine_distance);
+Datum
+cosine_distance(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	Vector	   *b = PG_GETARG_VECTOR_P(1);
+	double		similarity;
+
+	CheckDims(a, b);
+
+	similarity = VectorCosineSimilarity(a->dim, a->x, b->x);
+
+#ifdef _MSC_VER
+	/* /fp:fast may not propagate NaN */
+	if (isnan(similarity))
+		PG_RETURN_FLOAT8(NAN);
+#endif
+
+	/* Keep in range */
+	if (similarity > 1)
+		similarity = 1.0;
+	else if (similarity < -1)
+		similarity = -1.0;
+
+	PG_RETURN_FLOAT8(1.0 - similarity);
+}
+
+/*
+ * Get the distance for spherical k-means
+ * Currently uses angular distance since needs to satisfy triangle inequality
+ * Assumes inputs are unit vectors (skips norm)
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_spherical_distance);
+Datum
+vector_spherical_distance(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	Vector	   *b = PG_GETARG_VECTOR_P(1);
+	double		distance;
+
+	CheckDims(a, b);
+
+	distance = (double) VectorInnerProduct(a->dim, a->x, b->x);
+
+	/* Prevent NaN with acos with loss of precision */
+	if (distance > 1)
+		distance = 1;
+	else if (distance < -1)
+		distance = -1;
+
+	PG_RETURN_FLOAT8(acos(distance) / M_PI);
+}
+
+/* Does not require FMA, but keep logic simple */
+VECTOR_TARGET_CLONES static float
+VectorL1Distance(int dim, float *ax, float *bx)
+{
+	float		distance = 0.0;
+
+	/* Auto-vectorized */
+	for (int i = 0; i < dim; i++)
+		distance += fabsf(ax[i] - bx[i]);
+
+	return distance;
+}
+
+/*
+ * Get the L1 distance between two vectors
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(l1_distance);
+Datum
+l1_distance(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	Vector	   *b = PG_GETARG_VECTOR_P(1);
+
+	CheckDims(a, b);
+
+	PG_RETURN_FLOAT8((double) VectorL1Distance(a->dim, a->x, b->x));
+}
+
+/*
+ * Get the dimensions of a vector
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_dims);
+Datum
+vector_dims(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+
+	PG_RETURN_INT32(a->dim);
+}
+
+/*
+ * Get the L2 norm of a vector
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_norm);
+Datum
+vector_norm(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+
+    PG_RETURN_FLOAT8(sqrt((double) VectorL2SquaredDistance(a->dim, a->x, a->x)));
+}
+
+/*
+ * Normalize a vector with the L2 norm
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(l2_normalize);
+Datum
+l2_normalize(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+    Vector	   *result = InitVector(a->dim);
+
+    // Use optimized function pointer
+    VectorL2Normalize(a->dim, a->x, result->x);
+
     PG_RETURN_POINTER(result);
 }
 
-/* Subtraction */
-PG_FUNCTION_INFO_V1(vector_sub);
-Datum vector_sub(PG_FUNCTION_ARGS) {
-    Vector *a = PG_GETARG_VECTOR_P(0);
-    Vector *b = PG_GETARG_VECTOR_P(1);
+/*
+ * Add vectors
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_add);
+Datum
+vector_add(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	Vector	   *b = PG_GETARG_VECTOR_P(1);
+	float	   *ax = a->x;
+	float	   *bx = b->x;
+	Vector	   *result;
+	float	   *rx;
+
+	CheckDims(a, b);
+
+	result = InitVector(a->dim);
+	VectorAdd(a->dim, a->x, b->x, result->x);
+
+	PG_RETURN_POINTER(result);
+}
+
+/*
+ * Subtract vectors
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_sub);
+Datum
+vector_sub(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+    Vector	   *b = PG_GETARG_VECTOR_P(1);
+
     CheckDims(a, b);
-    Vector *result = CreateVector(a->dim);
+
+    Vector	   *result = InitVector(a->dim);
+
+    // Use optimized function pointer
     VectorSubtract(a->dim, a->x, b->x, result->x);
+
     PG_RETURN_POINTER(result);
 }
 
-/* Multiplication */
-PG_FUNCTION_INFO_V1(vector_mul);
-Datum vector_mul(PG_FUNCTION_ARGS) {
-    Vector *a = PG_GETARG_VECTOR_P(0);
-    Vector *b = PG_GETARG_VECTOR_P(1);
+/*
+ * Multiply vectors
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_mul);
+Datum
+vector_mul(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+    Vector	   *b = PG_GETARG_VECTOR_P(1);
+
     CheckDims(a, b);
-    Vector *result = CreateVector(a->dim);
+
+    Vector	   *result = InitVector(a->dim);
+
+    // Use optimized function pointer
     VectorMultiply(a->dim, a->x, b->x, result->x);
+
     PG_RETURN_POINTER(result);
 }
 
-/* L2 normalization */
-PG_FUNCTION_INFO_V1(l2_normalize);
-Datum l2_normalize(PG_FUNCTION_ARGS) {
-    Vector *a = PG_GETARG_VECTOR_P(0);
-    Vector *result = CreateVector(a->dim);
-    VectorL2Normalize(a->dim, a->x, result->x);
-    PG_RETURN_POINTER(result);
+/*
+ * Concatenate vectors
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_concat);
+Datum
+vector_concat(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	Vector	   *b = PG_GETARG_VECTOR_P(1);
+	Vector	   *result;
+	int			dim = a->dim + b->dim;
+
+	CheckDim(dim);
+	result = InitVector(dim);
+
+	/* Auto-vectorized */
+	for (int i = 0, imax = a->dim; i < imax; i++)
+		result->x[i] = a->x[i];
+
+	/* Auto-vectorized */
+	for (int i = 0, imax = b->dim, start = a->dim; i < imax; i++)
+		result->x[i + start] = b->x[i];
+
+	PG_RETURN_POINTER(result);
 }
 
-/* L2 norm */
-PG_FUNCTION_INFO_V1(vector_norm);
-Datum vector_norm(PG_FUNCTION_ARGS) {
-    Vector *a = PG_GETARG_VECTOR_P(0);
-    float norm = sqrt(VectorL2SquaredDistance(a->dim, a->x, a->x));
-    PG_RETURN_FLOAT8(norm);
+/*
+ * Quantize a vector
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(binary_quantize);
+Datum
+binary_quantize(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	float	   *ax = a->x;
+	VarBit	   *result = InitBitVector(a->dim);
+	unsigned char *rx = VARBITS(result);
+
+	BinaryQuantize(a->dim, ax, rx);
+
+	PG_RETURN_VARBIT_P(result);
 }
 
-/* L2 squared distance */
-PG_FUNCTION_INFO_V1(vector_l2_squared_distance);
-Datum vector_l2_squared_distance(PG_FUNCTION_ARGS) {
-    Vector *a = PG_GETARG_VECTOR_P(0);
-    Vector *b = PG_GETARG_VECTOR_P(1);
-    CheckDims(a, b);
-    float dist = VectorL2SquaredDistance(a->dim, a->x, b->x);
-    PG_RETURN_FLOAT8(dist);
+/*
+ * Get a subvector
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(subvector);
+Datum
+subvector(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	int32		start = PG_GETARG_INT32(1);
+	int32		count = PG_GETARG_INT32(2);
+	int32		end;
+	float	   *ax = a->x;
+	Vector	   *result;
+	int			dim;
+
+	if (count < 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("vector must have at least 1 dimension")));
+
+	/*
+	 * Check if (start + count > a->dim), avoiding integer overflow. a->dim
+	 * and count are both positive, so a->dim - count won't overflow.
+	 */
+	if (start > a->dim - count)
+		end = a->dim + 1;
+	else
+		end = start + count;
+
+	/* Indexing starts at 1, like substring */
+	if (start < 1)
+		start = 1;
+	else if (start > a->dim)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("vector must have at least 1 dimension")));
+
+	dim = end - start;
+	CheckDim(dim);
+	result = InitVector(dim);
+
+	for (int i = 0; i < dim; i++)
+		result->x[i] = ax[start - 1 + i];
+
+	PG_RETURN_POINTER(result);
 }
 
-/* Inner product */
-PG_FUNCTION_INFO_V1(inner_product);
-Datum inner_product(PG_FUNCTION_ARGS) {
-    Vector *a = PG_GETARG_VECTOR_P(0);
-    Vector *b = PG_GETARG_VECTOR_P(1);
-    CheckDims(a, b);
-    float prod = VectorInnerProduct(a->dim, a->x, b->x);
-    PG_RETURN_FLOAT8(prod);
+/*
+ * Internal helper to compare vectors
+ */
+int
+vector_cmp_internal(Vector * a, Vector * b)
+{
+	int			dim = Min(a->dim, b->dim);
+
+	/* Check values before dimensions to be consistent with Postgres arrays */
+	for (int i = 0; i < dim; i++)
+	{
+		if (a->x[i] < b->x[i])
+			return -1;
+
+		if (a->x[i] > b->x[i])
+			return 1;
+	}
+
+	if (a->dim < b->dim)
+		return -1;
+
+	if (a->dim > b->dim)
+		return 1;
+
+	return 0;
 }
 
-/* Cosine similarity */
-PG_FUNCTION_INFO_V1(cosine_distance);
-Datum cosine_distance(PG_FUNCTION_ARGS) {
-    Vector *a = PG_GETARG_VECTOR_P(0);
-    Vector *b = PG_GETARG_VECTOR_P(1);
-    CheckDims(a, b);
-    double sim = VectorCosineSimilarity(a->dim, a->x, b->x);
-    if (sim > 1.0) sim = 1.0;
-    if (sim < -1.0) sim = -1.0;
-    PG_RETURN_FLOAT8(1.0 - sim);
-}
-
-/* Binary quantization */
-PG_FUNCTION_INFO_V1(binary_quantize);
-Datum binary_quantize(PG_FUNCTION_ARGS) {
-    Vector *a = PG_GETARG_VECTOR_P(0);
-    VarBit *result = InitBitVector(a->dim);
-    BinaryQuantize(a->dim, a->x, VARBITS(result));
-    PG_RETURN_VARBIT_P(result);
-}
-
-/* Get vector dimensions */
-PG_FUNCTION_INFO_V1(vector_dims);
-Datum vector_dims(PG_FUNCTION_ARGS) {
-    Vector *a = PG_GETARG_VECTOR_P(0);
-    PG_RETURN_INT32(a->dim);
-}
\ No newline at end of file
+/*
+ * Less than
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_lt);
+Datum
+vector_lt(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	Vector	   *b = PG_GETARG_VECTOR_P(1);
+
+	PG_RETURN_BOOL(vector_cmp_internal(a, b) < 0);
+}
+
+/*
+ * Less than or equal
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_le);
+Datum
+vector_le(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	Vector	   *b = PG_GETARG_VECTOR_P(1);
+
+	PG_RETURN_BOOL(vector_cmp_internal(a, b) <= 0);
+}
+
+/*
+ * Equal
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_eq);
+Datum
+vector_eq(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	Vector	   *b = PG_GETARG_VECTOR_P(1);
+
+	PG_RETURN_BOOL(vector_cmp_internal(a, b) == 0);
+}
+
+/*
+ * Not equal
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_ne);
+Datum
+vector_ne(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	Vector	   *b = PG_GETARG_VECTOR_P(1);
+
+	PG_RETURN_BOOL(vector_cmp_internal(a, b) != 0);
+}
+
+/*
+ * Greater than or equal
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_ge);
+Datum
+vector_ge(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	Vector	   *b = PG_GETARG_VECTOR_P(1);
+
+	PG_RETURN_BOOL(vector_cmp_internal(a, b) >= 0);
+}
+
+/*
+ * Greater than
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_gt);
+Datum
+vector_gt(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	Vector	   *b = PG_GETARG_VECTOR_P(1);
+
+	PG_RETURN_BOOL(vector_cmp_internal(a, b) > 0);
+}
+
+/*
+ * Compare vectors
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_cmp);
+Datum
+vector_cmp(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	Vector	   *b = PG_GETARG_VECTOR_P(1);
+
+	PG_RETURN_INT32(vector_cmp_internal(a, b));
+}
+
+/*
+ * Accumulate vectors
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_accum);
+Datum
+vector_accum(PG_FUNCTION_ARGS)
+{
+	ArrayType  *statearray = PG_GETARG_ARRAYTYPE_P(0);
+	Vector	   *newval = PG_GETARG_VECTOR_P(1);
+	float8	   *statevalues;
+	int16		dim;
+	bool		newarr;
+	float8		n;
+	Datum	   *statedatums;
+	float	   *x = newval->x;
+	ArrayType  *result;
+
+	/* Check array before using */
+	statevalues = CheckStateArray(statearray, "vector_accum");
+	dim = STATE_DIMS(statearray);
+	newarr = dim == 0;
+
+	if (newarr)
+		dim = newval->dim;
+	else
+		CheckExpectedDim(dim, newval->dim);
+
+	n = statevalues[0] + 1.0;
+
+	statedatums = CreateStateDatums(dim);
+	statedatums[0] = Float8GetDatum(n);
+
+	if (newarr)
+	{
+		for (int i = 0; i < dim; i++)
+			statedatums[i + 1] = Float8GetDatum((double) x[i]);
+	}
+	else
+	{
+		for (int i = 0; i < dim; i++)
+		{
+			double		v = statevalues[i + 1] + x[i];
+
+			/* Check for overflow */
+			if (isinf(v))
+				float_overflow_error();
+
+			statedatums[i + 1] = Float8GetDatum(v);
+		}
+	}
+
+	/* Use float8 array like float4_accum */
+	result = construct_array(statedatums, dim + 1,
+							 FLOAT8OID,
+							 sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE);
+
+	pfree(statedatums);
+
+	PG_RETURN_ARRAYTYPE_P(result);
+}
+
+/*
+ * Combine vectors or half vectors (also used for halfvec_combine)
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_combine);
+Datum
+vector_combine(PG_FUNCTION_ARGS)
+{
+	/* Must also update parameters of halfvec_combine if modifying */
+	ArrayType  *statearray1 = PG_GETARG_ARRAYTYPE_P(0);
+	ArrayType  *statearray2 = PG_GETARG_ARRAYTYPE_P(1);
+	float8	   *statevalues1;
+	float8	   *statevalues2;
+	float8		n;
+	float8		n1;
+	float8		n2;
+	int16		dim;
+	Datum	   *statedatums;
+	ArrayType  *result;
+
+	/* Check arrays before using */
+	statevalues1 = CheckStateArray(statearray1, "vector_combine");
+	statevalues2 = CheckStateArray(statearray2, "vector_combine");
+
+	n1 = statevalues1[0];
+	n2 = statevalues2[0];
+
+	if (n1 == 0.0)
+	{
+		n = n2;
+		dim = STATE_DIMS(statearray2);
+		statedatums = CreateStateDatums(dim);
+		for (int i = 1; i <= dim; i++)
+			statedatums[i] = Float8GetDatum(statevalues2[i]);
+	}
+	else if (n2 == 0.0)
+	{
+		n = n1;
+		dim = STATE_DIMS(statearray1);
+		statedatums = CreateStateDatums(dim);
+		for (int i = 1; i <= dim; i++)
+			statedatums[i] = Float8GetDatum(statevalues1[i]);
+	}
+	else
+	{
+		n = n1 + n2;
+		dim = STATE_DIMS(statearray1);
+		CheckExpectedDim(dim, STATE_DIMS(statearray2));
+		statedatums = CreateStateDatums(dim);
+		for (int i = 1; i <= dim; i++)
+		{
+			double		v = statevalues1[i] + statevalues2[i];
+
+			/* Check for overflow */
+			if (isinf(v))
+				float_overflow_error();
+
+			statedatums[i] = Float8GetDatum(v);
+		}
+	}
+
+	statedatums[0] = Float8GetDatum(n);
+
+	result = construct_array(statedatums, dim + 1,
+							 FLOAT8OID,
+							 sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE);
+
+	pfree(statedatums);
+
+	PG_RETURN_ARRAYTYPE_P(result);
+}
+
+/*
+ * Average vectors
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(vector_avg);
+Datum
+vector_avg(PG_FUNCTION_ARGS)
+{
+	ArrayType  *statearray = PG_GETARG_ARRAYTYPE_P(0);
+	float8	   *statevalues;
+	float8		n;
+	uint16		dim;
+	Vector	   *result;
+
+	/* Check array before using */
+	statevalues = CheckStateArray(statearray, "vector_avg");
+	n = statevalues[0];
+
+	/* SQL defines AVG of no values to be NULL */
+	if (n == 0.0)
+		PG_RETURN_NULL();
+
+	/* Create vector */
+	dim = STATE_DIMS(statearray);
+	CheckDim(dim);
+	result = InitVector(dim);
+	for (int i = 0; i < dim; i++)
+	{
+		result->x[i] = statevalues[i + 1] / n;
+		CheckElement(result->x[i]);
+	}
+
+	PG_RETURN_POINTER(result);
+}
+
+/*
+ * Convert sparse vector to dense vector
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(sparsevec_to_vector);
+Datum
+sparsevec_to_vector(PG_FUNCTION_ARGS)
+{
+	SparseVector *svec = PG_GETARG_SPARSEVEC_P(0);
+	int32		typmod = PG_GETARG_INT32(1);
+	Vector	   *result;
+	int			dim = svec->dim;
+	float	   *values = SPARSEVEC_VALUES(svec);
+
+	CheckDim(dim);
+	CheckExpectedDim(typmod, dim);
+
+	result = InitVector(dim);
+	for (int i = 0; i < svec->nnz; i++)
+		result->x[svec->indices[i]] = values[i];
+
+	PG_RETURN_POINTER(result);
+}
diff --git a/src/vectorutils.c b/src/vectorutils.c
index d854067fe..3dac7ce48 100644
--- a/src/vectorutils.c
+++ b/src/vectorutils.c
@@ -27,10 +27,10 @@
 
 /* ========== FUNCTION POINTERS ========== */
 
-// Binary quantization (existing)
+// Binary quantization
 void		(*BinaryQuantize) (int dim, float *ax, unsigned char *rx);
 
-// Vector math operations (new)
+// Vector math operations
 double		(*VectorCosineSimilarity) (int dim, float *ax, float *bx);
 void		(*VectorAdd) (int dim, float *ax, float *bx, float *rx);
 void		(*VectorSubtract) (int dim, float *ax, float *bx, float *rx);

From fa45d0fc4f5ba0f4f8f96260799a81fc54763aaa Mon Sep 17 00:00:00 2001
From: klmckeig <kelly.l.mckeighan@intel.com>
Date: Wed, 22 Oct 2025 14:00:29 -0700
Subject: [PATCH 3/3] update

---
 src/vectorutils.c | 60 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 57 insertions(+), 3 deletions(-)

diff --git a/src/vectorutils.c b/src/vectorutils.c
index 3dac7ce48..dba877ca8 100644
--- a/src/vectorutils.c
+++ b/src/vectorutils.c
@@ -169,7 +169,33 @@ VectorInnerProductDefault(int dim, float *ax, float *bx)
 // Existing binary quantization functions
 TARGET_AVX512 static inline void
 BinaryQuantizeAvx512Compare(int dim, float *ax, unsigned char *rx) {
-    // ... existing implementation ...
+    int				rx_bytes = 0;
+	unsigned long	mask;
+	__m512			axi_512;
+	__m512			zero_512 = _mm512_setzero_ps();
+	__mmask16		cmp;
+
+	for (int i = 0; i < dim; i += 16)
+	{
+		if (dim - i < 16)
+		{
+			mask = (1 << (dim - i)) - 1;
+			axi_512 = _mm512_maskz_loadu_ps(mask, ax + i);
+			cmp = _mm512_cmp_ps_mask(axi_512, zero_512, _CMP_GT_OQ);
+			if (dim - i > 8)
+				*((uint16_t*)(rx + rx_bytes)) = cmp;
+			else {
+				*((uint8_t*)(rx + rx_bytes)) = (uint8_t)(cmp & 0xFF);
+			}
+		}
+		else
+		{
+			axi_512 = _mm512_loadu_ps(ax + i);
+			cmp = _mm512_cmp_ps_mask(axi_512, zero_512, _CMP_GT_OQ);
+			*((uint16_t*)(rx + rx_bytes)) = cmp;
+			rx_bytes += 2;
+		}
+	}
 }
 
 static const uint8_t bit_invert_lookup[16] = {
@@ -179,14 +205,42 @@ static const uint8_t bit_invert_lookup[16] = {
 
 TARGET_AVX512 static void
 BinaryQuantizeAvx512(int dim, float *ax, unsigned char *rx) {
-    // ... existing implementation ...
+    int	rx_bytes = 0;
+
+	BinaryQuantizeAvx512Compare(dim, ax, rx);
+
+	rx_bytes = dim / 8;
+	if (dim % 8 > 0)
+		rx_bytes++;
+	for (int i = 0; i < rx_bytes; i++)
+		rx[i] = (bit_invert_lookup[rx[i] & 0b1111] << 4) | bit_invert_lookup[rx[i] >> 4];
 }
 
 #define GFNI_REVBIT		0x8040201008040201
 
 TARGET_AVX512_GFNI static void
 BinaryQuantizeAvx512Gfni(int dim, float *ax, unsigned char *rx) {
-    // ... existing implementation ...
+    int			rx_bytes = 0;
+	__m128i		revbit = _mm_set1_epi64x(GFNI_REVBIT);
+	__m128i		rxi;
+	__m128i		rxirev;
+	int			count;
+	int			i = 0;
+
+	BinaryQuantizeAvx512Compare(dim, ax, rx);
+
+	rx_bytes = dim / 8;
+	if (dim % 8 > 0)
+		rx_bytes++;
+	count = (rx_bytes/16)*16;
+	for (; i < count; i += 16)
+	{
+		rxi = _mm_loadu_epi64(rx + i);
+		rxirev = _mm_gf2p8affine_epi64_epi8(rxi, revbit, 0);
+		_mm_storeu_epi64(rx + i, rxirev);
+	}
+	for (; i < rx_bytes; i++)
+		rx[i] =(bit_invert_lookup[rx[i] & 0b1111] << 4) | bit_invert_lookup[rx[i] >> 4];
 }
 
 // New vector math optimizations