From 63792c8f6dfce215742ed433b701850bb98b3f9f Mon Sep 17 00:00:00 2001
From: Luca Giacchino <luca.giacchino@intel.com>
Date: Tue, 9 Apr 2024 09:08:26 -0700
Subject: [PATCH 01/64] Implement AVX512 distance calculations for halfvec

---
 src/halfutils.c                          | 400 +++++++++++++++++++++++
 src/halfutils.h                          |   3 +
 test/expected/halfvec_functions_fp16.out | 141 ++++++++
 test/sql/halfvec_functions_fp16.sql      |  31 ++
 4 files changed, 575 insertions(+)
 create mode 100644 test/expected/halfvec_functions_fp16.out
 create mode 100644 test/sql/halfvec_functions_fp16.sql

diff --git a/src/halfutils.c b/src/halfutils.c
index d16909409..9ef916803 100644
--- a/src/halfutils.c
+++ b/src/halfutils.c
@@ -2,6 +2,7 @@
 
 #include "halfutils.h"
 #include "halfvec.h"
+#include "utils/guc.h"
 
 #ifdef HALFVEC_DISPATCH
 #include <immintrin.h>
@@ -12,13 +13,23 @@
 #include <intrin.h>
 #endif
 
+#if (defined(__GNUC__) && (__GNUC__ >= 12)) || \
+	(defined(__clang__) && (__clang_major__ >= 16)) || \
+	(defined __AVX512FP16__)
+#define HAVE_AVX512FP16
+#endif
+
 #ifdef _MSC_VER
 #define TARGET_F16C
+#define TARGET_AVX512FP16
 #else
 #define TARGET_F16C __attribute__((target("avx,f16c,fma")))
+#define TARGET_AVX512FP16 __attribute__((target("avx512fp16,avx512f,avx512vl,avx512bw")))
 #endif
 #endif
 
+bool halfvec_use_fp16_compute;
+
 float		(*HalfvecL2SquaredDistance) (int dim, half * ax, half * bx);
 float		(*HalfvecInnerProduct) (int dim, half * ax, half * bx);
 double		(*HalfvecCosineSimilarity) (int dim, half * ax, half * bx);
@@ -74,6 +85,85 @@ HalfvecL2SquaredDistanceF16c(int dim, half * ax, half * bx)
 
 	return distance;
 }
+
+#ifdef HAVE_AVX512FP16
+TARGET_AVX512FP16 static float
+HalfvecL2SquaredDistanceAvx512Fp16(int dim, half * ax, half * bx)
+{
+	float		distance;
+	int			i;
+	int			count = (dim / 32) * 32;
+	unsigned long mask;
+	__m512h		axi;
+	__m512h		bxi;
+	__m512h		diff;
+	__m512h		dist = _mm512_setzero_ph();
+
+	for (i = 0; i < count; i += 32)
+	{
+		axi = _mm512_loadu_ph(ax+i);
+		bxi = _mm512_loadu_ph(bx+i);
+		diff = _mm512_sub_ph(axi, bxi);
+		dist = _mm512_fmadd_ph(diff, diff, dist);
+	}
+
+	mask = (1 << (dim - i)) - 1;
+	axi = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
+	bxi = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
+	diff = _mm512_sub_ph(axi, bxi);
+	dist = _mm512_fmadd_ph(diff, diff, dist);
+
+	distance = (float)_mm512_reduce_add_ph(dist);
+
+	return distance;
+}
+
+TARGET_AVX512FP16 static float
+HalfvecL2SquaredDistanceAvx512Fp32(int dim, half * ax, half * bx)
+{
+	float		distance;
+	int			i;
+	int			count = (dim / 16) * 16;
+	unsigned long mask;
+	__m256h		axi;
+	__m256h		bxi;
+	__m512		axs;
+	__m512		bxs;
+	__m512		diff;
+	__m512		dist = _mm512_setzero_ps();
+
+	for (i = 0; i < count; i += 16)
+	{
+		axi = _mm256_loadu_ph(ax+i);
+		bxi = _mm256_loadu_ph(bx+i);
+		axs = _mm512_cvtxph_ps(axi);
+		bxs = _mm512_cvtxph_ps(bxi);
+		diff = _mm512_sub_ps(axs, bxs);
+		dist = _mm512_fmadd_ps(diff, diff, dist);
+	}
+	
+	mask = (1 << (dim - i)) - 1;
+	axi = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
+	bxi = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
+	axs = _mm512_cvtxph_ps(axi);
+	bxs = _mm512_cvtxph_ps(bxi);
+	diff = _mm512_sub_ps(axs, bxs);
+	dist = _mm512_fmadd_ps(diff, diff, dist);
+
+	distance = (float)_mm512_reduce_add_ps(dist);
+
+	return distance;
+}
+
+static float
+HalfvecL2SquaredDistanceAvx512(int dim, half * ax, half * bx)
+{
+	if (halfvec_use_fp16_compute)
+		return HalfvecL2SquaredDistanceAvx512Fp16(dim, ax, bx);
+	else
+		return HalfvecL2SquaredDistanceAvx512Fp32(dim, ax, bx);
+}
+#endif
 #endif
 
 static float
@@ -117,6 +207,79 @@ HalfvecInnerProductF16c(int dim, half * ax, half * bx)
 
 	return distance;
 }
+
+#ifdef HAVE_AVX512FP16
+TARGET_AVX512FP16 static float
+HalfvecInnerProductAvx512Fp16(int dim, half * ax, half * bx)
+{
+	float		distance;
+	int			i;
+	int			count = (dim / 32) * 32;
+	unsigned long mask;
+	__m512h		axi;
+	__m512h		bxi;
+	__m512h		dist = _mm512_setzero_ph();
+
+	for (i = 0; i < count; i += 32)
+	{
+		axi = _mm512_loadu_ph(ax+i);
+		bxi = _mm512_loadu_ph(bx+i);
+		dist = _mm512_fmadd_ph(axi, bxi, dist);
+	}
+
+	mask = (1 << (dim - i)) - 1;
+	axi = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
+	bxi = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
+	dist = _mm512_fmadd_ph(axi, bxi, dist);
+
+	distance = (float)_mm512_reduce_add_ph(dist);
+
+	return distance;
+}
+
+TARGET_AVX512FP16 static float
+HalfvecInnerProductAvx512Fp32(int dim, half * ax, half * bx)
+{
+	float		distance;
+	int			i;
+	int			count = (dim / 16) * 16;
+	unsigned long mask;
+	__m256h		axi;
+	__m256h		bxi;
+	__m512		axs;
+	__m512		bxs;
+	__m512		dist = _mm512_setzero_ps();
+
+	for (i = 0; i < count; i += 16)
+	{
+		axi = _mm256_loadu_ph(ax+i);
+		bxi = _mm256_loadu_ph(bx+i);
+		axs = _mm512_cvtxph_ps(axi);
+		bxs = _mm512_cvtxph_ps(bxi);
+		dist = _mm512_fmadd_ps(axs, bxs, dist);
+	}
+
+	mask = (1 << (dim - i)) - 1;
+	axi = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
+	bxi = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
+	axs = _mm512_cvtxph_ps(axi);
+	bxs = _mm512_cvtxph_ps(bxi);
+	dist = _mm512_fmadd_ps(axs, bxs, dist);
+
+	distance = (float)_mm512_reduce_add_ps(dist);
+
+	return distance;
+}
+
+static float
+HalfvecInnerProductAvx512(int dim, half * ax, half * bx)
+{
+	if (halfvec_use_fp16_compute)
+		return HalfvecInnerProductAvx512Fp16(dim, ax, bx);
+	else
+		return HalfvecInnerProductAvx512Fp32(dim, ax, bx);
+}
+#endif
 #endif
 
 static double
@@ -190,6 +353,101 @@ HalfvecCosineSimilarityF16c(int dim, half * ax, half * bx)
 	/* Use sqrt(a * b) over sqrt(a) * sqrt(b) */
 	return (double) similarity / sqrt((double) norma * (double) normb);
 }
+
+#ifdef HAVE_AVX512FP16
+TARGET_AVX512FP16 static double
+HalfvecCosineSimilarityAvx512Fp16(int dim, half * ax, half * bx)
+{
+	float		similarity;
+	float		norma;
+	float		normb;
+	int			i;
+	int			count = (dim / 32) * 32;
+	unsigned long mask;
+	__m512h		axi;
+	__m512h		bxi;
+	__m512h		sim = _mm512_setzero_ph();
+	__m512h		na = _mm512_setzero_ph();
+	__m512h		nb = _mm512_setzero_ph();
+
+	for (i = 0; i < count; i += 32)
+	{
+		axi = _mm512_loadu_ph(ax+i);
+		bxi = _mm512_loadu_ph(bx+i);
+		sim = _mm512_fmadd_ph(axi, bxi, sim);
+		na = _mm512_fmadd_ph(axi, axi, na);
+		nb = _mm512_fmadd_ph(bxi, bxi, nb);
+	}
+
+	mask = (1 << (dim - i)) - 1;
+	axi = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
+	bxi = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
+	sim = _mm512_fmadd_ph(axi, bxi, sim);
+	na = _mm512_fmadd_ph(axi, axi, na);
+	nb = _mm512_fmadd_ph(bxi, bxi, nb);
+
+	similarity = (float)_mm512_reduce_add_ph(sim);
+	norma = (float)_mm512_reduce_add_ph(na);
+	normb = (float)_mm512_reduce_add_ph(nb);
+
+	/* Use sqrt(a * b) over sqrt(a) * sqrt(b) */
+	return (double) similarity / sqrt((double) norma * (double) normb);
+}
+
+TARGET_AVX512FP16 static double
+HalfvecCosineSimilarityAvx512Fp32(int dim, half * ax, half * bx)
+{
+	float		similarity;
+	float		norma;
+	float		normb;
+	int			i;
+	int			count = (dim / 16) * 16;
+	unsigned long mask;
+	__m256h		axi;
+	__m256h 	bxi;
+	__m512		axs;
+	__m512		bxs;
+	__m512		sim = _mm512_setzero_ps();
+	__m512		na = _mm512_setzero_ps();
+	__m512		nb = _mm512_setzero_ps();
+
+	for (i = 0; i < count; i += 16)
+	{
+		axi = _mm256_loadu_ph(ax+i);
+		bxi = _mm256_loadu_ph(bx+i);
+		axs = _mm512_cvtxph_ps(axi);
+		bxs = _mm512_cvtxph_ps(bxi);
+		sim = _mm512_fmadd_ps(axs, bxs, sim);
+		na = _mm512_fmadd_ps(axs, axs, na);
+		nb = _mm512_fmadd_ps(bxs, bxs, nb);
+	}
+
+	mask = (1 << (dim - i)) - 1;
+	axi = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
+	bxi = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
+	axs = _mm512_cvtxph_ps(axi);
+	bxs = _mm512_cvtxph_ps(bxi);
+	sim = _mm512_fmadd_ps(axs, bxs, sim);
+	na = _mm512_fmadd_ps(axs, axs, na);
+	nb = _mm512_fmadd_ps(bxs, bxs, nb);
+
+	similarity = (float)_mm512_reduce_add_ps(sim);
+	norma = (float)_mm512_reduce_add_ps(na);
+	normb = (float)_mm512_reduce_add_ps(nb);
+
+	/* Use sqrt(a * b) over sqrt(a) * sqrt(b) */
+	return (double) similarity / sqrt((double) norma * (double) normb);
+}
+
+static double
+HalfvecCosineSimilarityAvx512(int dim, half * ax, half * bx)
+{
+	if (halfvec_use_fp16_compute)
+		return HalfvecCosineSimilarityAvx512Fp16(dim, ax, bx);
+	else
+		return HalfvecCosineSimilarityAvx512Fp32(dim, ax, bx);
+}
+#endif
 #endif
 
 static float
@@ -235,6 +493,79 @@ HalfvecL1DistanceF16c(int dim, half * ax, half * bx)
 
 	return distance;
 }
+
+#ifdef HAVE_AVX512FP16
+TARGET_AVX512FP16 static float
+HalfvecL1DistanceAvx512Fp16(int dim, half * ax, half * bx)
+{
+	float		distance;
+	int			i;
+	int			count = (dim / 32) * 32;
+	unsigned long mask;
+	__m512h		axi;
+	__m512h		bxi;
+	__m512h		dist = _mm512_setzero_ph();
+
+	for (i = 0; i < count; i += 32)
+	{
+		axi = _mm512_loadu_ph(ax+i);
+		bxi = _mm512_loadu_ph(bx+i);
+		dist = _mm512_add_ph(dist, _mm512_abs_ph(_mm512_sub_ph(axi, bxi)));
+	}
+
+	mask = (1 << (dim - i)) - 1;
+	axi = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
+	bxi = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
+	dist = _mm512_add_ph(dist, _mm512_abs_ph(_mm512_sub_ph(axi, bxi)));
+
+	distance = (float)_mm512_reduce_add_ph(dist);
+
+	return distance;
+}
+
+TARGET_AVX512FP16 static float
+HalfvecL1DistanceAvx512Fp32(int dim, half * ax, half * bx)
+{
+	float		distance;
+	int			i;
+	int			count = (dim / 16) * 16;
+	unsigned long mask;
+	__m256h		axi;
+	__m256h		bxi;
+	__m512		axs;
+	__m512		bxs;
+	__m512		dist = _mm512_setzero_ps();
+
+	for (i = 0; i < count; i += 16)
+	{
+		axi = _mm256_loadu_ph(ax+i);
+		bxi = _mm256_loadu_ph(bx+i);
+		axs = _mm512_cvtxph_ps(axi);
+		bxs = _mm512_cvtxph_ps(bxi);
+		dist = _mm512_add_ps(dist, _mm512_abs_ps(_mm512_sub_ps(axs, bxs)));
+	}
+
+	mask = (1 << (dim - i)) - 1;
+	axi = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
+	bxi = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
+	axs = _mm512_cvtxph_ps(axi);
+	bxs = _mm512_cvtxph_ps(bxi);
+	dist = _mm512_add_ps(dist, _mm512_abs_ps(_mm512_sub_ps(axs, bxs)));
+
+	distance = (float)_mm512_reduce_add_ps(dist);
+
+	return distance;
+}
+
+static float
+HalfvecL1DistanceAvx512(int dim, half * ax, half * bx)
+{
+	if (halfvec_use_fp16_compute)
+		return HalfvecL1DistanceAvx512Fp16(dim, ax, bx);
+	else
+		return HalfvecL1DistanceAvx512Fp32(dim, ax, bx);
+}
+#endif
 #endif
 
 #ifdef HALFVEC_DISPATCH
@@ -271,6 +602,61 @@ SupportsCpuFeature(unsigned int feature)
 	/* Now check features */
 	return (exx[2] & feature) == feature;
 }
+
+#ifdef HAVE_AVX512FP16
+TARGET_XSAVE static bool
+SupportsOsXsave()
+{
+	unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID)
+	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#else
+	__cpuid(exx, 1);
+#endif
+
+	return (exx[2] & CPU_FEATURE_OSXSAVE) == CPU_FEATURE_OSXSAVE;
+}
+
+#define CPU_FEATURE_AVX512F         (1 << 16)
+#define CPU_FEATURE_AVX512_FP16     (1 << 23)
+#define CPU_FEATURE_AVX512_BW       (1 << 30)
+#define CPU_FEATURE_AVX512VL     	(1 << 31)
+
+TARGET_XSAVE static bool
+SupportsAvx512Fp16()
+{
+	unsigned int exx[4] = {0, 0, 0, 0};
+
+	/* Check OS supports XSAVE */
+	if (!SupportsOsXsave())
+		return false;
+
+	/* Check XMM, YMM, and ZMM registers are enabled */
+	if ((_xgetbv(0) & 0xe6) != 0xe6)
+		return false;
+		
+#if defined(HAVE__GET_CPUID)
+	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+	__cpuid(exx, 7, 0);
+#endif
+
+	/* Required by AVX512 sub/fma/add instructions */
+	if ((exx[1] & CPU_FEATURE_AVX512F) != CPU_FEATURE_AVX512F)
+		return false;
+
+	/* Required by _mm256_loadu_ph */
+    if ((exx[1] & CPU_FEATURE_AVX512VL) != CPU_FEATURE_AVX512VL)
+	    return false;
+
+	/* Required by masked loads in remainder loops */
+	if ((exx[1] & CPU_FEATURE_AVX512_BW) != CPU_FEATURE_AVX512_BW)
+		return false;
+
+	return (exx[3] & CPU_FEATURE_AVX512_FP16) == CPU_FEATURE_AVX512_FP16;
+}
+#endif
 #endif
 
 void
@@ -294,5 +680,19 @@ HalfvecInit(void)
 		/* Does not require FMA, but keep logic simple */
 		HalfvecL1Distance = HalfvecL1DistanceF16c;
 	}
+
+#ifdef HAVE_AVX512FP16
+	if (SupportsAvx512Fp16())
+	{
+		HalfvecL2SquaredDistance = HalfvecL2SquaredDistanceAvx512;
+		HalfvecInnerProduct = HalfvecInnerProductAvx512;
+		HalfvecCosineSimilarity = HalfvecCosineSimilarityAvx512;
+		HalfvecL1Distance = HalfvecL1DistanceAvx512;
+	}
 #endif
+#endif
+
+	DefineCustomBoolVariable("halfvec.use_fp16_compute", "Use FP16 computation for distance calculations",
+							"If true, distance calculations are executed in FP16. If false, distance calculations are executed in FP32",
+							&halfvec_use_fp16_compute, false, PGC_USERSET, 0, NULL, NULL, NULL);
 }
diff --git a/src/halfutils.h b/src/halfutils.h
index c684f72d7..99131d583 100644
--- a/src/halfutils.h
+++ b/src/halfutils.h
@@ -17,6 +17,9 @@ extern float (*HalfvecL1Distance) (int dim, half * ax, half * bx);
 
 void		HalfvecInit(void);
 
+/* Variables */
+extern bool	halfvec_use_fp16_compute;
+
 /*
  * Check if half is NaN
  */
diff --git a/test/expected/halfvec_functions_fp16.out b/test/expected/halfvec_functions_fp16.out
new file mode 100644
index 000000000..a82a604b0
--- /dev/null
+++ b/test/expected/halfvec_functions_fp16.out
@@ -0,0 +1,141 @@
+SET halfvec.use_fp16_compute = true;
+SELECT l2_distance('[0,0]'::halfvec, '[3,4]');
+ l2_distance 
+-------------
+           5
+(1 row)
+
+SELECT l2_distance('[0,0]'::halfvec, '[0,1]');
+ l2_distance 
+-------------
+           1
+(1 row)
+
+SELECT l2_distance('[1,2]'::halfvec, '[3]');
+ERROR:  different halfvec dimensions 2 and 1
+SELECT l2_distance('[1,1,1,1,1,1,1,1,1]'::halfvec, '[1,1,1,1,1,1,1,4,5]');
+ l2_distance 
+-------------
+           5
+(1 row)
+
+SELECT '[0,0]'::halfvec <-> '[3,4]';
+ ?column? 
+----------
+        5
+(1 row)
+
+SELECT inner_product('[1,2]'::halfvec, '[3,4]');
+ inner_product 
+---------------
+            11
+(1 row)
+
+SELECT inner_product('[1,2]'::halfvec, '[3]');
+ERROR:  different halfvec dimensions 2 and 1
+SELECT inner_product('[1,1,1,1,1,1,1,1,1]'::halfvec, '[1,2,3,4,5,6,7,8,9]');
+ inner_product 
+---------------
+            45
+(1 row)
+
+SELECT '[1,2]'::halfvec <#> '[3,4]';
+ ?column? 
+----------
+      -11
+(1 row)
+
+SELECT cosine_distance('[1,2]'::halfvec, '[2,4]');
+ cosine_distance 
+-----------------
+               0
+(1 row)
+
+SELECT cosine_distance('[1,2]'::halfvec, '[0,0]');
+ cosine_distance 
+-----------------
+             NaN
+(1 row)
+
+SELECT cosine_distance('[1,1]'::halfvec, '[1,1]');
+ cosine_distance 
+-----------------
+               0
+(1 row)
+
+SELECT cosine_distance('[1,0]'::halfvec, '[0,2]');
+ cosine_distance 
+-----------------
+               1
+(1 row)
+
+SELECT cosine_distance('[1,1]'::halfvec, '[-1,-1]');
+ cosine_distance 
+-----------------
+               2
+(1 row)
+
+SELECT cosine_distance('[1,2]'::halfvec, '[3]');
+ERROR:  different halfvec dimensions 2 and 1
+SELECT cosine_distance('[1,1]'::halfvec, '[1.1,1.1]');
+ cosine_distance 
+-----------------
+               0
+(1 row)
+
+SELECT cosine_distance('[1,1]'::halfvec, '[-1.1,-1.1]');
+ cosine_distance 
+-----------------
+               2
+(1 row)
+
+SELECT cosine_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[1,2,3,4,5,6,7,8,9]');
+ cosine_distance 
+-----------------
+               0
+(1 row)
+
+SELECT cosine_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[-1,-2,-3,-4,-5,-6,-7,-8,-9]');
+ cosine_distance 
+-----------------
+               2
+(1 row)
+
+SELECT '[1,2]'::halfvec <=> '[2,4]';
+ ?column? 
+----------
+        0
+(1 row)
+
+SELECT l1_distance('[0,0]'::halfvec, '[3,4]');
+ l1_distance 
+-------------
+           7
+(1 row)
+
+SELECT l1_distance('[0,0]'::halfvec, '[0,1]');
+ l1_distance 
+-------------
+           1
+(1 row)
+
+SELECT l1_distance('[1,2]'::halfvec, '[3]');
+ERROR:  different halfvec dimensions 2 and 1
+SELECT l1_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[1,2,3,4,5,6,7,8,9]');
+ l1_distance 
+-------------
+           0
+(1 row)
+
+SELECT l1_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[0,3,2,5,4,7,6,9,8]');
+ l1_distance 
+-------------
+           9
+(1 row)
+
+SELECT '[0,0]'::halfvec <+> '[3,4]';
+ ?column? 
+----------
+        7
+(1 row)
+
diff --git a/test/sql/halfvec_functions_fp16.sql b/test/sql/halfvec_functions_fp16.sql
new file mode 100644
index 000000000..e930a840e
--- /dev/null
+++ b/test/sql/halfvec_functions_fp16.sql
@@ -0,0 +1,31 @@
+SET halfvec.use_fp16_compute = true;
+SELECT l2_distance('[0,0]'::halfvec, '[3,4]');
+SELECT l2_distance('[0,0]'::halfvec, '[0,1]');
+SELECT l2_distance('[1,2]'::halfvec, '[3]');
+SELECT l2_distance('[1,1,1,1,1,1,1,1,1]'::halfvec, '[1,1,1,1,1,1,1,4,5]');
+SELECT '[0,0]'::halfvec <-> '[3,4]';
+
+SELECT inner_product('[1,2]'::halfvec, '[3,4]');
+SELECT inner_product('[1,2]'::halfvec, '[3]');
+SELECT inner_product('[1,1,1,1,1,1,1,1,1]'::halfvec, '[1,2,3,4,5,6,7,8,9]');
+SELECT '[1,2]'::halfvec <#> '[3,4]';
+
+SELECT cosine_distance('[1,2]'::halfvec, '[2,4]');
+SELECT cosine_distance('[1,2]'::halfvec, '[0,0]');
+SELECT cosine_distance('[1,1]'::halfvec, '[1,1]');
+SELECT cosine_distance('[1,0]'::halfvec, '[0,2]');
+SELECT cosine_distance('[1,1]'::halfvec, '[-1,-1]');
+SELECT cosine_distance('[1,2]'::halfvec, '[3]');
+SELECT cosine_distance('[1,1]'::halfvec, '[1.1,1.1]');
+SELECT cosine_distance('[1,1]'::halfvec, '[-1.1,-1.1]');
+SELECT cosine_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[1,2,3,4,5,6,7,8,9]');
+SELECT cosine_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[-1,-2,-3,-4,-5,-6,-7,-8,-9]');
+SELECT '[1,2]'::halfvec <=> '[2,4]';
+
+SELECT l1_distance('[0,0]'::halfvec, '[3,4]');
+SELECT l1_distance('[0,0]'::halfvec, '[0,1]');
+SELECT l1_distance('[1,2]'::halfvec, '[3]');
+SELECT l1_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[1,2,3,4,5,6,7,8,9]');
+SELECT l1_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[0,3,2,5,4,7,6,9,8]');
+SELECT '[0,0]'::halfvec <+> '[3,4]';
+

From 59699511e61dab9677b5e4fd9bb5d7a22c84b6c5 Mon Sep 17 00:00:00 2001
From: Luca Giacchino <luca.giacchino@intel.com>
Date: Wed, 25 Sep 2024 14:33:58 -0700
Subject: [PATCH 02/64] Automate fp16/fp32 halfvec distance computation

---
 src/halfutils.c                          | 539 +++++++++++------------
 src/halfutils.h                          |   3 -
 test/expected/halfvec.out                |  66 +++
 test/expected/halfvec_functions_fp16.out | 141 ------
 test/sql/halfvec.sql                     |  11 +
 test/sql/halfvec_functions_fp16.sql      |  31 --
 6 files changed, 343 insertions(+), 448 deletions(-)
 delete mode 100644 test/expected/halfvec_functions_fp16.out
 delete mode 100644 test/sql/halfvec_functions_fp16.sql

diff --git a/src/halfutils.c b/src/halfutils.c
index 9ef916803..4b4cf9d4b 100644
--- a/src/halfutils.c
+++ b/src/halfutils.c
@@ -2,7 +2,6 @@
 
 #include "halfutils.h"
 #include "halfvec.h"
-#include "utils/guc.h"
 
 #ifdef HALFVEC_DISPATCH
 #include <immintrin.h>
@@ -24,12 +23,10 @@
 #define TARGET_AVX512FP16
 #else
 #define TARGET_F16C __attribute__((target("avx,f16c,fma")))
-#define TARGET_AVX512FP16 __attribute__((target("avx512fp16,avx512f,avx512vl,avx512bw")))
+#define TARGET_AVX512FP16 __attribute__((target("avx512fp16,avx512f,avx512dq,avx512vl,avx512bw")))
 #endif
 #endif
 
-bool halfvec_use_fp16_compute;
-
 float		(*HalfvecL2SquaredDistance) (int dim, half * ax, half * bx);
 float		(*HalfvecInnerProduct) (int dim, half * ax, half * bx);
 double		(*HalfvecCosineSimilarity) (int dim, half * ax, half * bx);
@@ -87,82 +84,90 @@ HalfvecL2SquaredDistanceF16c(int dim, half * ax, half * bx)
 }
 
 #ifdef HAVE_AVX512FP16
-TARGET_AVX512FP16 static float
-HalfvecL2SquaredDistanceAvx512Fp16(int dim, half * ax, half * bx)
-{
-	float		distance;
-	int			i;
-	int			count = (dim / 32) * 32;
-	unsigned long mask;
-	__m512h		axi;
-	__m512h		bxi;
-	__m512h		diff;
-	__m512h		dist = _mm512_setzero_ph();
-
-	for (i = 0; i < count; i += 32)
-	{
-		axi = _mm512_loadu_ph(ax+i);
-		bxi = _mm512_loadu_ph(bx+i);
-		diff = _mm512_sub_ph(axi, bxi);
-		dist = _mm512_fmadd_ph(diff, diff, dist);
-	}
-
-	mask = (1 << (dim - i)) - 1;
-	axi = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
-	bxi = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
-	diff = _mm512_sub_ph(axi, bxi);
-	dist = _mm512_fmadd_ph(diff, diff, dist);
-
-	distance = (float)_mm512_reduce_add_ph(dist);
+TARGET_AVX512FP16 static inline bool
+HasInfinity(__m512h val) {
+	/* Test for positive and negative infinity */
+	__mmask32 mask = _mm512_fpclass_ph_mask(val, 0x08 + 0x10);
+	return mask != 0;
+}
 
-	return distance;
+TARGET_AVX512FP16 static inline __m512
+ConvertToFp32Sum(__m512h val) {
+	__m256h val_lower = _mm256_castsi256_ph(_mm512_extracti32x8_epi32(_mm512_castph_si512(val), 0));
+	__m256h val_upper = _mm256_castsi256_ph(_mm512_extracti32x8_epi32(_mm512_castph_si512(val), 1));
+	return _mm512_add_ps(_mm512_cvtxph_ps(val_lower), _mm512_cvtxph_ps(val_upper));
 }
 
 TARGET_AVX512FP16 static float
-HalfvecL2SquaredDistanceAvx512Fp32(int dim, half * ax, half * bx)
+HalfvecL2SquaredDistanceAvx512(int dim, half * ax, half * bx)
 {
 	float		distance;
 	int			i;
-	int			count = (dim / 16) * 16;
 	unsigned long mask;
-	__m256h		axi;
-	__m256h		bxi;
-	__m512		axs;
-	__m512		bxs;
-	__m512		diff;
-	__m512		dist = _mm512_setzero_ps();
-
-	for (i = 0; i < count; i += 16)
+
+	/* For FP16 computation */
+	__m512h		axi_512h;
+	__m512h		bxi_512h;
+	__m512h		diff_512h;
+	__m512h		dist_512h = _mm512_setzero_ph();
+	__m512h		dist_512h_temp;
+
+	/* For FP32 computation */
+	__m256h		axi_256h;
+	__m256h		bxi_256h;
+	__m512		axi_512;
+	__m512		bxi_512;
+	__m512		diff_512;
+	__m512		dist_512;
+
+	/* FP16 computation */
+	for (i = 0; i < dim; i += 32)
 	{
-		axi = _mm256_loadu_ph(ax+i);
-		bxi = _mm256_loadu_ph(bx+i);
-		axs = _mm512_cvtxph_ps(axi);
-		bxs = _mm512_cvtxph_ps(bxi);
-		diff = _mm512_sub_ps(axs, bxs);
-		dist = _mm512_fmadd_ps(diff, diff, dist);
+		if (dim - i < 32)
+		{
+			mask = (1 << (dim - i)) - 1;
+			axi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
+			bxi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
+		}
+		else
+		{
+			axi_512h = _mm512_loadu_ph(ax + i);
+			bxi_512h = _mm512_loadu_ph(bx + i);
+		}
+		diff_512h = _mm512_sub_ph(axi_512h, bxi_512h);
+		dist_512h_temp = _mm512_fmadd_ph(diff_512h, diff_512h, dist_512h);
+
+		/* if overflow, continue with FP32 */
+		if (HasInfinity(dist_512h_temp))
+			break;
+		else
+			dist_512h = dist_512h_temp;
 	}
-	
-	mask = (1 << (dim - i)) - 1;
-	axi = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
-	bxi = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
-	axs = _mm512_cvtxph_ps(axi);
-	bxs = _mm512_cvtxph_ps(bxi);
-	diff = _mm512_sub_ps(axs, bxs);
-	dist = _mm512_fmadd_ps(diff, diff, dist);
+	dist_512 = ConvertToFp32Sum(dist_512h);
 
-	distance = (float)_mm512_reduce_add_ps(dist);
+	/* FP32 computation */
+	for (; i < dim; i += 16)
+	{
+		if (dim - i < 16)
+		{
+			mask = (1 << (dim - i)) - 1;
+			axi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
+			bxi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
+		}
+		else
+		{
+			axi_256h = _mm256_loadu_ph(ax + i);
+			bxi_256h = _mm256_loadu_ph(bx + i);
+		}
+		axi_512 = _mm512_cvtxph_ps(axi_256h);
+		bxi_512 = _mm512_cvtxph_ps(bxi_256h);
+		diff_512 = _mm512_sub_ps(axi_512, bxi_512);
+		dist_512 = _mm512_fmadd_ps(diff_512, diff_512, dist_512);
+	}
 
+	distance = _mm512_reduce_add_ps(dist_512);
 	return distance;
 }
-
-static float
-HalfvecL2SquaredDistanceAvx512(int dim, half * ax, half * bx)
-{
-	if (halfvec_use_fp16_compute)
-		return HalfvecL2SquaredDistanceAvx512Fp16(dim, ax, bx);
-	else
-		return HalfvecL2SquaredDistanceAvx512Fp32(dim, ax, bx);
-}
 #endif
 #endif
 
@@ -210,75 +215,71 @@ HalfvecInnerProductF16c(int dim, half * ax, half * bx)
 
 #ifdef HAVE_AVX512FP16
 TARGET_AVX512FP16 static float
-HalfvecInnerProductAvx512Fp16(int dim, half * ax, half * bx)
+HalfvecInnerProductAvx512(int dim, half * ax, half * bx)
 {
 	float		distance;
 	int			i;
-	int			count = (dim / 32) * 32;
-	unsigned long mask;
-	__m512h		axi;
-	__m512h		bxi;
-	__m512h		dist = _mm512_setzero_ph();
-
-	for (i = 0; i < count; i += 32)
+	unsigned int mask;
+
+	/* For FP16 computation */
+	__m512h		axi_512h;
+	__m512h		bxi_512h;
+	__m512h		dist_512h = _mm512_setzero_ph();
+	__m512h		dist_512h_temp;
+
+	/* For FP32 computation */
+	__m256h		axi_256h;
+	__m256h		bxi_256h;
+	__m512		axi_512;
+	__m512		bxi_512;
+	__m512		dist_512;
+
+	/* FP16 computation */
+	for (i = 0; i < dim; i += 32)
 	{
-		axi = _mm512_loadu_ph(ax+i);
-		bxi = _mm512_loadu_ph(bx+i);
-		dist = _mm512_fmadd_ph(axi, bxi, dist);
+		if (dim - i < 32)
+		{
+			mask = (1 << (dim - i)) - 1;
+			axi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
+			bxi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
+		}
+		else
+		{
+			axi_512h = _mm512_loadu_ph(ax + i);
+			bxi_512h = _mm512_loadu_ph(bx + i);
+		}
+		dist_512h_temp = _mm512_fmadd_ph(axi_512h, bxi_512h, dist_512h);
+
+		/* if overflow, continue with FP32 */
+		if (HasInfinity(dist_512h_temp))
+			break;
+		else
+			dist_512h = dist_512h_temp;
 	}
+	dist_512 = ConvertToFp32Sum(dist_512h);
 
-	mask = (1 << (dim - i)) - 1;
-	axi = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
-	bxi = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
-	dist = _mm512_fmadd_ph(axi, bxi, dist);
-
-	distance = (float)_mm512_reduce_add_ph(dist);
-
-	return distance;
-}
-
-TARGET_AVX512FP16 static float
-HalfvecInnerProductAvx512Fp32(int dim, half * ax, half * bx)
-{
-	float		distance;
-	int			i;
-	int			count = (dim / 16) * 16;
-	unsigned long mask;
-	__m256h		axi;
-	__m256h		bxi;
-	__m512		axs;
-	__m512		bxs;
-	__m512		dist = _mm512_setzero_ps();
-
-	for (i = 0; i < count; i += 16)
+	/* FP32 computation */
+	for (; i < dim; i += 16)
 	{
-		axi = _mm256_loadu_ph(ax+i);
-		bxi = _mm256_loadu_ph(bx+i);
-		axs = _mm512_cvtxph_ps(axi);
-		bxs = _mm512_cvtxph_ps(bxi);
-		dist = _mm512_fmadd_ps(axs, bxs, dist);
+		if (dim - i < 16)
+		{
+			mask = (1 << (dim - i)) - 1;
+			axi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
+			bxi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
+		}
+		else
+		{
+			axi_256h = _mm256_loadu_ph(ax + i);
+			bxi_256h = _mm256_loadu_ph(bx + i);
+		}
+		axi_512 = _mm512_cvtxph_ps(axi_256h);
+		bxi_512 = _mm512_cvtxph_ps(bxi_256h);
+		dist_512 = _mm512_fmadd_ps(axi_512, bxi_512, dist_512);
 	}
 
-	mask = (1 << (dim - i)) - 1;
-	axi = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
-	bxi = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
-	axs = _mm512_cvtxph_ps(axi);
-	bxs = _mm512_cvtxph_ps(bxi);
-	dist = _mm512_fmadd_ps(axs, bxs, dist);
-
-	distance = (float)_mm512_reduce_add_ps(dist);
-
+	distance = _mm512_reduce_add_ps(dist_512);
 	return distance;
 }
-
-static float
-HalfvecInnerProductAvx512(int dim, half * ax, half * bx)
-{
-	if (halfvec_use_fp16_compute)
-		return HalfvecInnerProductAvx512Fp16(dim, ax, bx);
-	else
-		return HalfvecInnerProductAvx512Fp32(dim, ax, bx);
-}
 #endif
 #endif
 
@@ -356,97 +357,93 @@ HalfvecCosineSimilarityF16c(int dim, half * ax, half * bx)
 
 #ifdef HAVE_AVX512FP16
 TARGET_AVX512FP16 static double
-HalfvecCosineSimilarityAvx512Fp16(int dim, half * ax, half * bx)
+HalfvecCosineSimilarityAvx512(int dim, half * ax, half * bx)
 {
 	float		similarity;
 	float		norma;
 	float		normb;
 	int			i;
-	int			count = (dim / 32) * 32;
-	unsigned long mask;
-	__m512h		axi;
-	__m512h		bxi;
-	__m512h		sim = _mm512_setzero_ph();
-	__m512h		na = _mm512_setzero_ph();
-	__m512h		nb = _mm512_setzero_ph();
-
-	for (i = 0; i < count; i += 32)
+	unsigned int mask;
+
+	/* For FP16 computation */
+	__m512h		axi_512h;
+	__m512h		bxi_512h;
+	__m512h		sim_512h = _mm512_setzero_ph();
+	__m512h		na_512h = _mm512_setzero_ph();
+	__m512h		nb_512h = _mm512_setzero_ph();
+	__m512h		sim_512h_temp;
+	__m512h		na_512h_temp;
+	__m512h		nb_512h_temp;
+
+	/* For FP32 computation */
+	__m256h		axi_256h;
+	__m256h 	bxi_256h;
+	__m512		axi_512;
+	__m512		bxi_512;
+	__m512		sim_512;
+	__m512		na_512;
+	__m512		nb_512;
+
+	/* FP16 computation */
+	for (i = 0; i < dim; i += 32)
 	{
-		axi = _mm512_loadu_ph(ax+i);
-		bxi = _mm512_loadu_ph(bx+i);
-		sim = _mm512_fmadd_ph(axi, bxi, sim);
-		na = _mm512_fmadd_ph(axi, axi, na);
-		nb = _mm512_fmadd_ph(bxi, bxi, nb);
+		if (dim - i < 32) {
+			mask = (1 << (dim - i)) - 1;
+			axi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
+			bxi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
+		}
+		else {
+			axi_512h = _mm512_loadu_ph(ax + i);
+			bxi_512h = _mm512_loadu_ph(bx + i);
+		}
+		sim_512h_temp = _mm512_fmadd_ph(axi_512h, bxi_512h, sim_512h);
+		na_512h_temp = _mm512_fmadd_ph(axi_512h, axi_512h, na_512h);
+		nb_512h_temp = _mm512_fmadd_ph(bxi_512h, bxi_512h, nb_512h);
+
+		/* if overflow, continue with FP32 */
+		if (HasInfinity(sim_512h_temp) ||
+			HasInfinity(na_512h_temp) ||
+			HasInfinity(nb_512h_temp))
+			break;
+		else
+		{
+			sim_512h = sim_512h_temp;
+			na_512h = na_512h_temp;
+			nb_512h = nb_512h_temp;
+		}
 	}
+	sim_512 = ConvertToFp32Sum(sim_512h);
+	na_512 = ConvertToFp32Sum(na_512h);
+	nb_512 = ConvertToFp32Sum(nb_512h);
 
-	mask = (1 << (dim - i)) - 1;
-	axi = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
-	bxi = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
-	sim = _mm512_fmadd_ph(axi, bxi, sim);
-	na = _mm512_fmadd_ph(axi, axi, na);
-	nb = _mm512_fmadd_ph(bxi, bxi, nb);
-
-	similarity = (float)_mm512_reduce_add_ph(sim);
-	norma = (float)_mm512_reduce_add_ph(na);
-	normb = (float)_mm512_reduce_add_ph(nb);
-
-	/* Use sqrt(a * b) over sqrt(a) * sqrt(b) */
-	return (double) similarity / sqrt((double) norma * (double) normb);
-}
-
-TARGET_AVX512FP16 static double
-HalfvecCosineSimilarityAvx512Fp32(int dim, half * ax, half * bx)
-{
-	float		similarity;
-	float		norma;
-	float		normb;
-	int			i;
-	int			count = (dim / 16) * 16;
-	unsigned long mask;
-	__m256h		axi;
-	__m256h 	bxi;
-	__m512		axs;
-	__m512		bxs;
-	__m512		sim = _mm512_setzero_ps();
-	__m512		na = _mm512_setzero_ps();
-	__m512		nb = _mm512_setzero_ps();
-
-	for (i = 0; i < count; i += 16)
+	/* FP32 computation */
+	for (; i < dim; i += 16)
 	{
-		axi = _mm256_loadu_ph(ax+i);
-		bxi = _mm256_loadu_ph(bx+i);
-		axs = _mm512_cvtxph_ps(axi);
-		bxs = _mm512_cvtxph_ps(bxi);
-		sim = _mm512_fmadd_ps(axs, bxs, sim);
-		na = _mm512_fmadd_ps(axs, axs, na);
-		nb = _mm512_fmadd_ps(bxs, bxs, nb);
+		if (dim - i < 16)
+		{
+			mask = (1 << (dim - i)) - 1;
+			axi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
+			bxi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
+		}
+		else
+		{
+			axi_256h = _mm256_loadu_ph(ax + i);
+			bxi_256h = _mm256_loadu_ph(bx + i);
+		}
+		axi_512 = _mm512_cvtxph_ps(axi_256h);
+		bxi_512 = _mm512_cvtxph_ps(bxi_256h);
+		sim_512 = _mm512_fmadd_ps(axi_512, bxi_512, sim_512);
+		na_512 = _mm512_fmadd_ps(axi_512, axi_512, na_512);
+		nb_512 = _mm512_fmadd_ps(bxi_512, bxi_512, nb_512);
 	}
 
-	mask = (1 << (dim - i)) - 1;
-	axi = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
-	bxi = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
-	axs = _mm512_cvtxph_ps(axi);
-	bxs = _mm512_cvtxph_ps(bxi);
-	sim = _mm512_fmadd_ps(axs, bxs, sim);
-	na = _mm512_fmadd_ps(axs, axs, na);
-	nb = _mm512_fmadd_ps(bxs, bxs, nb);
-
-	similarity = (float)_mm512_reduce_add_ps(sim);
-	norma = (float)_mm512_reduce_add_ps(na);
-	normb = (float)_mm512_reduce_add_ps(nb);
+	similarity = _mm512_reduce_add_ps(sim_512);
+	norma = _mm512_reduce_add_ps(na_512);
+	normb = _mm512_reduce_add_ps(nb_512);
 
 	/* Use sqrt(a * b) over sqrt(a) * sqrt(b) */
 	return (double) similarity / sqrt((double) norma * (double) normb);
 }
-
-static double
-HalfvecCosineSimilarityAvx512(int dim, half * ax, half * bx)
-{
-	if (halfvec_use_fp16_compute)
-		return HalfvecCosineSimilarityAvx512Fp16(dim, ax, bx);
-	else
-		return HalfvecCosineSimilarityAvx512Fp32(dim, ax, bx);
-}
 #endif
 #endif
 
@@ -496,75 +493,72 @@ HalfvecL1DistanceF16c(int dim, half * ax, half * bx)
 
 #ifdef HAVE_AVX512FP16
 TARGET_AVX512FP16 static float
-HalfvecL1DistanceAvx512Fp16(int dim, half * ax, half * bx)
+HalfvecL1DistanceAvx512(int dim, half * ax, half * bx)
 {
 	float		distance;
 	int			i;
-	int			count = (dim / 32) * 32;
 	unsigned long mask;
-	__m512h		axi;
-	__m512h		bxi;
-	__m512h		dist = _mm512_setzero_ph();
 
-	for (i = 0; i < count; i += 32)
+	/* For FP16 computation */
+	__m512h		axi_512h;
+	__m512h		bxi_512h;
+	__m512h		dist_512h = _mm512_setzero_ph();
+	__m512h		dist_512h_temp;
+
+	/* For FP32 computation */
+	__m256h		axi_256h;
+	__m256h		bxi_256h;
+	__m512		axi_512;
+	__m512		bxi_512;
+	__m512		dist_512;
+
+	/* FP16 computation */
+	for (i = 0; i < dim; i += 32)
 	{
-		axi = _mm512_loadu_ph(ax+i);
-		bxi = _mm512_loadu_ph(bx+i);
-		dist = _mm512_add_ph(dist, _mm512_abs_ph(_mm512_sub_ph(axi, bxi)));
+		if (dim - i < 32)
+		{
+			mask = (1 << (dim - i)) - 1;
+			axi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
+			bxi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
+		}
+		else
+		{
+			axi_512h = _mm512_loadu_ph(ax + i);
+			bxi_512h = _mm512_loadu_ph(bx + i);
+		}
+		dist_512h_temp = _mm512_add_ph(dist_512h, _mm512_abs_ph(_mm512_sub_ph(axi_512h, bxi_512h)));
+
+		/* if overflow, continue with FP32 */
+		if (HasInfinity(dist_512h_temp))
+			break;
+		else
+			dist_512h = dist_512h_temp;
 	}
+	dist_512 = ConvertToFp32Sum(dist_512h);
 
-	mask = (1 << (dim - i)) - 1;
-	axi = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
-	bxi = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
-	dist = _mm512_add_ph(dist, _mm512_abs_ph(_mm512_sub_ph(axi, bxi)));
-
-	distance = (float)_mm512_reduce_add_ph(dist);
-
-	return distance;
-}
-
-TARGET_AVX512FP16 static float
-HalfvecL1DistanceAvx512Fp32(int dim, half * ax, half * bx)
-{
-	float		distance;
-	int			i;
-	int			count = (dim / 16) * 16;
-	unsigned long mask;
-	__m256h		axi;
-	__m256h		bxi;
-	__m512		axs;
-	__m512		bxs;
-	__m512		dist = _mm512_setzero_ps();
-
-	for (i = 0; i < count; i += 16)
+	/* FP32 computation */
+	for (; i < dim; i += 16)
 	{
-		axi = _mm256_loadu_ph(ax+i);
-		bxi = _mm256_loadu_ph(bx+i);
-		axs = _mm512_cvtxph_ps(axi);
-		bxs = _mm512_cvtxph_ps(bxi);
-		dist = _mm512_add_ps(dist, _mm512_abs_ps(_mm512_sub_ps(axs, bxs)));
+		if (dim - i < 16)
+		{
+			mask = (1 << (dim - i)) - 1;
+			axi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
+			bxi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
+		}
+		else
+		{
+			axi_256h = _mm256_loadu_ph(ax + i);
+			bxi_256h = _mm256_loadu_ph(bx + i);
+		}
+		axi_512 = _mm512_cvtxph_ps(axi_256h);
+		bxi_512 = _mm512_cvtxph_ps(bxi_256h);
+		dist_512 = _mm512_add_ps(dist_512, _mm512_abs_ps(_mm512_sub_ps(axi_512, bxi_512)));
 	}
 
-	mask = (1 << (dim - i)) - 1;
-	axi = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
-	bxi = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
-	axs = _mm512_cvtxph_ps(axi);
-	bxs = _mm512_cvtxph_ps(bxi);
-	dist = _mm512_add_ps(dist, _mm512_abs_ps(_mm512_sub_ps(axs, bxs)));
-
-	distance = (float)_mm512_reduce_add_ps(dist);
+	distance = _mm512_reduce_add_ps(dist_512);
 
 	return distance;
 }
-
-static float
-HalfvecL1DistanceAvx512(int dim, half * ax, half * bx)
-{
-	if (halfvec_use_fp16_compute)
-		return HalfvecL1DistanceAvx512Fp16(dim, ax, bx);
-	else
-		return HalfvecL1DistanceAvx512Fp32(dim, ax, bx);
-}
 #endif
 #endif
 
@@ -618,16 +612,28 @@ SupportsOsXsave()
 	return (exx[2] & CPU_FEATURE_OSXSAVE) == CPU_FEATURE_OSXSAVE;
 }
 
-#define CPU_FEATURE_AVX512F         (1 << 16)
-#define CPU_FEATURE_AVX512_FP16     (1 << 23)
-#define CPU_FEATURE_AVX512_BW       (1 << 30)
-#define CPU_FEATURE_AVX512VL     	(1 << 31)
+#define CPU_FEATURE_AVX512F     (1 << 16)
+#define CPU_FEATURE_AVX512DQ    (1 << 17)
+#define CPU_FEATURE_AVX512_FP16 (1 << 23)
+#define CPU_FEATURE_AVX512BW    (1 << 30)
+#define CPU_FEATURE_AVX512VL    (1 << 31)
 
 TARGET_XSAVE static bool
 SupportsAvx512Fp16()
 {
 	unsigned int exx[4] = {0, 0, 0, 0};
 
+	/* AVX512 features required:
+	 *  AVX512F : sub/fma/add instructions
+	 *  AVX512DQ: _mm512_extracti32x8_epi32
+	 *  AVX512VL: _mm256_loadu_ph
+	 *  AVX512BW: masked loads
+	 */
+	unsigned int features = CPU_FEATURE_AVX512F |
+	                        CPU_FEATURE_AVX512DQ |
+	                        CPU_FEATURE_AVX512VL |
+	                        CPU_FEATURE_AVX512BW;
+
 	/* Check OS supports XSAVE */
 	if (!SupportsOsXsave())
 		return false;
@@ -642,16 +648,7 @@ SupportsAvx512Fp16()
 	__cpuid(exx, 7, 0);
 #endif
 
-	/* Required by AVX512 sub/fma/add instructions */
-	if ((exx[1] & CPU_FEATURE_AVX512F) != CPU_FEATURE_AVX512F)
-		return false;
-
-	/* Required by _mm256_loadu_ph */
-    if ((exx[1] & CPU_FEATURE_AVX512VL) != CPU_FEATURE_AVX512VL)
-	    return false;
-
-	/* Required by masked loads in remainder loops */
-	if ((exx[1] & CPU_FEATURE_AVX512_BW) != CPU_FEATURE_AVX512_BW)
+	if ((exx[1] & features) != features)
 		return false;
 
 	return (exx[3] & CPU_FEATURE_AVX512_FP16) == CPU_FEATURE_AVX512_FP16;
@@ -691,8 +688,4 @@ HalfvecInit(void)
 	}
 #endif
 #endif
-
-	DefineCustomBoolVariable("halfvec.use_fp16_compute", "Use FP16 computation for distance calculations",
-							"If true, distance calculations are executed in FP16. If false, distance calculations are executed in FP32",
-							&halfvec_use_fp16_compute, false, PGC_USERSET, 0, NULL, NULL, NULL);
 }
diff --git a/src/halfutils.h b/src/halfutils.h
index 99131d583..c684f72d7 100644
--- a/src/halfutils.h
+++ b/src/halfutils.h
@@ -17,9 +17,6 @@ extern float (*HalfvecL1Distance) (int dim, half * ax, half * bx);
 
 void		HalfvecInit(void);
 
-/* Variables */
-extern bool	halfvec_use_fp16_compute;
-
 /*
  * Check if half is NaN
  */
diff --git a/test/expected/halfvec.out b/test/expected/halfvec.out
index a3ce8931f..867dd176b 100644
--- a/test/expected/halfvec.out
+++ b/test/expected/halfvec.out
@@ -378,6 +378,24 @@ SELECT '[0,0]'::halfvec <-> '[3,4]';
         5
 (1 row)
 
+SELECT l2_distance('[501]'::halfvec, '[1]');
+ l2_distance 
+-------------
+         500
+(1 row)
+
+SELECT l2_distance('[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]'::halfvec, '[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]');
+ l2_distance 
+-------------
+           0
+(1 row)
+
+SELECT l2_distance('[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]'::halfvec, '[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]');
+ l2_distance 
+-------------
+           6
+(1 row)
+
 SELECT inner_product('[1,2]'::halfvec, '[3,4]');
  inner_product 
 ---------------
@@ -404,6 +422,24 @@ SELECT '[1,2]'::halfvec <#> '[3,4]';
       -11
 (1 row)
 
+SELECT inner_product('[50000,50000]'::halfvec, '[1,1]');
+ inner_product 
+---------------
+         99968
+(1 row)
+
+SELECT inner_product('[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]'::halfvec, '[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]');
+ inner_product 
+---------------
+            36
+(1 row)
+
+SELECT inner_product('[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]'::halfvec, '[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]');
+ inner_product 
+---------------
+            72
+(1 row)
+
 SELECT cosine_distance('[1,2]'::halfvec, '[2,4]');
  cosine_distance 
 -----------------
@@ -466,6 +502,24 @@ SELECT '[1,2]'::halfvec <=> '[2,4]';
         0
 (1 row)
 
+SELECT cosine_distance('[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]'::halfvec, '[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]');
+ cosine_distance 
+-----------------
+               0
+(1 row)
+
+SELECT cosine_distance('[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]'::halfvec, '[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]');
+ cosine_distance 
+-----------------
+               0
+(1 row)
+
+SELECT cosine_distance('[1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0]'::halfvec, '[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]');
+ cosine_distance 
+-----------------
+               1
+(1 row)
+
 SELECT l1_distance('[0,0]'::halfvec, '[3,4]');
  l1_distance 
 -------------
@@ -498,6 +552,18 @@ SELECT '[0,0]'::halfvec <+> '[3,4]';
         7
 (1 row)
 
+SELECT l1_distance('[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]'::halfvec, '[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]');
+ l1_distance 
+-------------
+           0
+(1 row)
+
+SELECT l1_distance('[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]'::halfvec, '[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]');
+ l1_distance 
+-------------
+          36
+(1 row)
+
 SELECT l2_normalize('[3,4]'::halfvec);
       l2_normalize      
 ------------------------
diff --git a/test/expected/halfvec_functions_fp16.out b/test/expected/halfvec_functions_fp16.out
deleted file mode 100644
index a82a604b0..000000000
--- a/test/expected/halfvec_functions_fp16.out
+++ /dev/null
@@ -1,141 +0,0 @@
-SET halfvec.use_fp16_compute = true;
-SELECT l2_distance('[0,0]'::halfvec, '[3,4]');
- l2_distance 
--------------
-           5
-(1 row)
-
-SELECT l2_distance('[0,0]'::halfvec, '[0,1]');
- l2_distance 
--------------
-           1
-(1 row)
-
-SELECT l2_distance('[1,2]'::halfvec, '[3]');
-ERROR:  different halfvec dimensions 2 and 1
-SELECT l2_distance('[1,1,1,1,1,1,1,1,1]'::halfvec, '[1,1,1,1,1,1,1,4,5]');
- l2_distance 
--------------
-           5
-(1 row)
-
-SELECT '[0,0]'::halfvec <-> '[3,4]';
- ?column? 
-----------
-        5
-(1 row)
-
-SELECT inner_product('[1,2]'::halfvec, '[3,4]');
- inner_product 
----------------
-            11
-(1 row)
-
-SELECT inner_product('[1,2]'::halfvec, '[3]');
-ERROR:  different halfvec dimensions 2 and 1
-SELECT inner_product('[1,1,1,1,1,1,1,1,1]'::halfvec, '[1,2,3,4,5,6,7,8,9]');
- inner_product 
----------------
-            45
-(1 row)
-
-SELECT '[1,2]'::halfvec <#> '[3,4]';
- ?column? 
-----------
-      -11
-(1 row)
-
-SELECT cosine_distance('[1,2]'::halfvec, '[2,4]');
- cosine_distance 
------------------
-               0
-(1 row)
-
-SELECT cosine_distance('[1,2]'::halfvec, '[0,0]');
- cosine_distance 
------------------
-             NaN
-(1 row)
-
-SELECT cosine_distance('[1,1]'::halfvec, '[1,1]');
- cosine_distance 
------------------
-               0
-(1 row)
-
-SELECT cosine_distance('[1,0]'::halfvec, '[0,2]');
- cosine_distance 
------------------
-               1
-(1 row)
-
-SELECT cosine_distance('[1,1]'::halfvec, '[-1,-1]');
- cosine_distance 
------------------
-               2
-(1 row)
-
-SELECT cosine_distance('[1,2]'::halfvec, '[3]');
-ERROR:  different halfvec dimensions 2 and 1
-SELECT cosine_distance('[1,1]'::halfvec, '[1.1,1.1]');
- cosine_distance 
------------------
-               0
-(1 row)
-
-SELECT cosine_distance('[1,1]'::halfvec, '[-1.1,-1.1]');
- cosine_distance 
------------------
-               2
-(1 row)
-
-SELECT cosine_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[1,2,3,4,5,6,7,8,9]');
- cosine_distance 
------------------
-               0
-(1 row)
-
-SELECT cosine_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[-1,-2,-3,-4,-5,-6,-7,-8,-9]');
- cosine_distance 
------------------
-               2
-(1 row)
-
-SELECT '[1,2]'::halfvec <=> '[2,4]';
- ?column? 
-----------
-        0
-(1 row)
-
-SELECT l1_distance('[0,0]'::halfvec, '[3,4]');
- l1_distance 
--------------
-           7
-(1 row)
-
-SELECT l1_distance('[0,0]'::halfvec, '[0,1]');
- l1_distance 
--------------
-           1
-(1 row)
-
-SELECT l1_distance('[1,2]'::halfvec, '[3]');
-ERROR:  different halfvec dimensions 2 and 1
-SELECT l1_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[1,2,3,4,5,6,7,8,9]');
- l1_distance 
--------------
-           0
-(1 row)
-
-SELECT l1_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[0,3,2,5,4,7,6,9,8]');
- l1_distance 
--------------
-           9
-(1 row)
-
-SELECT '[0,0]'::halfvec <+> '[3,4]';
- ?column? 
-----------
-        7
-(1 row)
-
diff --git a/test/sql/halfvec.sql b/test/sql/halfvec.sql
index 1a3fd1b82..d94518348 100644
--- a/test/sql/halfvec.sql
+++ b/test/sql/halfvec.sql
@@ -87,12 +87,18 @@ SELECT l2_distance('[0,0]'::halfvec, '[0,1]');
 SELECT l2_distance('[1,2]'::halfvec, '[3]');
 SELECT l2_distance('[1,1,1,1,1,1,1,1,1]'::halfvec, '[1,1,1,1,1,1,1,4,5]');
 SELECT '[0,0]'::halfvec <-> '[3,4]';
+SELECT l2_distance('[501]'::halfvec, '[1]');
+SELECT l2_distance('[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]'::halfvec, '[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]');
+SELECT l2_distance('[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]'::halfvec, '[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]');
 
 SELECT inner_product('[1,2]'::halfvec, '[3,4]');
 SELECT inner_product('[1,2]'::halfvec, '[3]');
 SELECT inner_product('[65504]'::halfvec, '[65504]');
 SELECT inner_product('[1,1,1,1,1,1,1,1,1]'::halfvec, '[1,2,3,4,5,6,7,8,9]');
 SELECT '[1,2]'::halfvec <#> '[3,4]';
+SELECT inner_product('[50000,50000]'::halfvec, '[1,1]');
+SELECT inner_product('[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]'::halfvec, '[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]');
+SELECT inner_product('[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]'::halfvec, '[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]');
 
 SELECT cosine_distance('[1,2]'::halfvec, '[2,4]');
 SELECT cosine_distance('[1,2]'::halfvec, '[0,0]');
@@ -105,6 +111,9 @@ SELECT cosine_distance('[1,1]'::halfvec, '[-1.1,-1.1]');
 SELECT cosine_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[1,2,3,4,5,6,7,8,9]');
 SELECT cosine_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[-1,-2,-3,-4,-5,-6,-7,-8,-9]');
 SELECT '[1,2]'::halfvec <=> '[2,4]';
+SELECT cosine_distance('[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]'::halfvec, '[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]');
+SELECT cosine_distance('[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]'::halfvec, '[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]');
+SELECT cosine_distance('[1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0]'::halfvec, '[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]');
 
 SELECT l1_distance('[0,0]'::halfvec, '[3,4]');
 SELECT l1_distance('[0,0]'::halfvec, '[0,1]');
@@ -112,6 +121,8 @@ SELECT l1_distance('[1,2]'::halfvec, '[3]');
 SELECT l1_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[1,2,3,4,5,6,7,8,9]');
 SELECT l1_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[0,3,2,5,4,7,6,9,8]');
 SELECT '[0,0]'::halfvec <+> '[3,4]';
+SELECT l1_distance('[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]'::halfvec, '[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]');
+SELECT l1_distance('[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]'::halfvec, '[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]');
 
 SELECT l2_normalize('[3,4]'::halfvec);
 SELECT l2_normalize('[3,0]'::halfvec);
diff --git a/test/sql/halfvec_functions_fp16.sql b/test/sql/halfvec_functions_fp16.sql
deleted file mode 100644
index e930a840e..000000000
--- a/test/sql/halfvec_functions_fp16.sql
+++ /dev/null
@@ -1,31 +0,0 @@
-SET halfvec.use_fp16_compute = true;
-SELECT l2_distance('[0,0]'::halfvec, '[3,4]');
-SELECT l2_distance('[0,0]'::halfvec, '[0,1]');
-SELECT l2_distance('[1,2]'::halfvec, '[3]');
-SELECT l2_distance('[1,1,1,1,1,1,1,1,1]'::halfvec, '[1,1,1,1,1,1,1,4,5]');
-SELECT '[0,0]'::halfvec <-> '[3,4]';
-
-SELECT inner_product('[1,2]'::halfvec, '[3,4]');
-SELECT inner_product('[1,2]'::halfvec, '[3]');
-SELECT inner_product('[1,1,1,1,1,1,1,1,1]'::halfvec, '[1,2,3,4,5,6,7,8,9]');
-SELECT '[1,2]'::halfvec <#> '[3,4]';
-
-SELECT cosine_distance('[1,2]'::halfvec, '[2,4]');
-SELECT cosine_distance('[1,2]'::halfvec, '[0,0]');
-SELECT cosine_distance('[1,1]'::halfvec, '[1,1]');
-SELECT cosine_distance('[1,0]'::halfvec, '[0,2]');
-SELECT cosine_distance('[1,1]'::halfvec, '[-1,-1]');
-SELECT cosine_distance('[1,2]'::halfvec, '[3]');
-SELECT cosine_distance('[1,1]'::halfvec, '[1.1,1.1]');
-SELECT cosine_distance('[1,1]'::halfvec, '[-1.1,-1.1]');
-SELECT cosine_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[1,2,3,4,5,6,7,8,9]');
-SELECT cosine_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[-1,-2,-3,-4,-5,-6,-7,-8,-9]');
-SELECT '[1,2]'::halfvec <=> '[2,4]';
-
-SELECT l1_distance('[0,0]'::halfvec, '[3,4]');
-SELECT l1_distance('[0,0]'::halfvec, '[0,1]');
-SELECT l1_distance('[1,2]'::halfvec, '[3]');
-SELECT l1_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[1,2,3,4,5,6,7,8,9]');
-SELECT l1_distance('[1,2,3,4,5,6,7,8,9]'::halfvec, '[0,3,2,5,4,7,6,9,8]');
-SELECT '[0,0]'::halfvec <+> '[3,4]';
-

From 49fa02bd908cd1a26958cd0b053cc5cf6f1dc4f3 Mon Sep 17 00:00:00 2001
From: Luca Giacchino <luca.giacchino@intel.com>
Date: Mon, 28 Oct 2024 13:50:32 -0700
Subject: [PATCH 03/64] Implement AVX512 vector_to_halfvec conversion

---
 src/halfutils.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/halfutils.h |  3 +++
 src/halfvec.c   |  3 +--
 3 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/src/halfutils.c b/src/halfutils.c
index 4b4cf9d4b..e6048bab1 100644
--- a/src/halfutils.c
+++ b/src/halfutils.c
@@ -32,6 +32,8 @@ float		(*HalfvecInnerProduct) (int dim, half * ax, half * bx);
 double		(*HalfvecCosineSimilarity) (int dim, half * ax, half * bx);
 float		(*HalfvecL1Distance) (int dim, half * ax, half * bx);
 
+void 		(*Float4ToHalfVector) (Vector * vec, HalfVector * result);
+
 static float
 HalfvecL2SquaredDistanceDefault(int dim, half * ax, half * bx)
 {
@@ -562,6 +564,67 @@ HalfvecL1DistanceAvx512(int dim, half * ax, half * bx)
 #endif
 #endif
 
+static void
+Float4ToHalfVectorDefault(Vector * vec, HalfVector * result) {
+	for (int i = 0; i < vec->dim; i++)
+		result->x[i] = Float4ToHalf(vec->x[i]);
+}
+
+#ifdef HAVE_AVX512FP16
+TARGET_AVX512FP16 static void
+Float4ToHalfVectorAvx512(Vector * vec, HalfVector * result) {
+	unsigned long mask;
+	__m512		vec_512;
+	__m256h		vec_256h;
+	__mmask16 	vec_512_inf;
+	__mmask16 	vec_256h_inf;
+
+	for (int i = 0; i < vec->dim; i += 16)
+	{
+		if (vec->dim - i < 16)
+		{
+			mask = (1 << (vec->dim - i)) - 1;
+			vec_512 = _mm512_maskz_loadu_ps(mask, vec->x + i);
+			vec_256h = _mm512_cvtxps_ph(vec_512);
+			_mm256_mask_storeu_epi16(result->x + i, mask, _mm256_castph_si256(vec_256h));
+		}
+		else
+		{
+			vec_512 = _mm512_loadu_ps(vec->x + i);
+			vec_256h = _mm512_cvtxps_ph(vec_512);
+			_mm256_storeu_ph(result->x + i, vec_256h);
+		}
+
+		/* Test for positive and negative infinity */
+		vec_512_inf = _mm512_fpclass_ps_mask(vec_512, 0x08 + 0x10);
+		vec_256h_inf = _mm256_fpclass_ph_mask(vec_256h, 0x08 + 0x10);
+		if (unlikely(vec_512_inf != vec_256h_inf))
+		{
+			float num;
+			char* buf;
+
+			__mmask16 diff = _kxor_mask16(vec_512_inf, vec_256h_inf);
+			/* Find first element in vector to overflow after conversion (first bit set) */
+			int count = 0;
+			while (diff % 2 == 0) {
+				diff >>= 1;
+				count++;
+			}
+			num = vec->x[i + count];
+
+			/* TODO Avoid duplicate code in Float4ToHalf */
+			buf = palloc(FLOAT_SHORTEST_DECIMAL_LEN);
+
+			float_to_shortest_decimal_buf(num, buf);
+
+			ereport(ERROR,
+					(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+					 errmsg("\"%s\" is out of range for type halfvec", buf)));
+		}
+	}
+}
+#endif
+
 #ifdef HALFVEC_DISPATCH
 #define CPU_FEATURE_FMA     (1 << 12)
 #define CPU_FEATURE_OSXSAVE (1 << 27)
@@ -667,6 +730,7 @@ HalfvecInit(void)
 	HalfvecInnerProduct = HalfvecInnerProductDefault;
 	HalfvecCosineSimilarity = HalfvecCosineSimilarityDefault;
 	HalfvecL1Distance = HalfvecL1DistanceDefault;
+	Float4ToHalfVector = Float4ToHalfVectorDefault;
 
 #ifdef HALFVEC_DISPATCH
 	if (SupportsCpuFeature(CPU_FEATURE_AVX | CPU_FEATURE_F16C | CPU_FEATURE_FMA))
@@ -685,6 +749,7 @@ HalfvecInit(void)
 		HalfvecInnerProduct = HalfvecInnerProductAvx512;
 		HalfvecCosineSimilarity = HalfvecCosineSimilarityAvx512;
 		HalfvecL1Distance = HalfvecL1DistanceAvx512;
+		Float4ToHalfVector = Float4ToHalfVectorAvx512;
 	}
 #endif
 #endif
diff --git a/src/halfutils.h b/src/halfutils.h
index c684f72d7..1a33d1748 100644
--- a/src/halfutils.h
+++ b/src/halfutils.h
@@ -5,6 +5,7 @@
 
 #include "common/shortest_dec.h"
 #include "halfvec.h"
+#include "vector.h"
 
 #ifdef F16C_SUPPORT
 #include <immintrin.h>
@@ -15,6 +16,8 @@ extern float (*HalfvecInnerProduct) (int dim, half * ax, half * bx);
 extern double (*HalfvecCosineSimilarity) (int dim, half * ax, half * bx);
 extern float (*HalfvecL1Distance) (int dim, half * ax, half * bx);
 
+extern void (*Float4ToHalfVector) (Vector * vec, HalfVector * result);
+
 void		HalfvecInit(void);
 
 /*
diff --git a/src/halfvec.c b/src/halfvec.c
index aad320b1c..9b85ba741 100644
--- a/src/halfvec.c
+++ b/src/halfvec.c
@@ -533,8 +533,7 @@ vector_to_halfvec(PG_FUNCTION_ARGS)
 
 	result = InitHalfVector(vec->dim);
 
-	for (int i = 0; i < vec->dim; i++)
-		result->x[i] = Float4ToHalf(vec->x[i]);
+	Float4ToHalfVector(vec, result);
 
 	PG_RETURN_POINTER(result);
 }

From 80049e47eef08f9a82dec57456825b08080e6631 Mon Sep 17 00:00:00 2001
From: Luca Giacchino <luca.giacchino@intel.com>
Date: Tue, 26 Nov 2024 19:08:16 -0800
Subject: [PATCH 04/64] Add CI job for AVX512 FP16 optimized path

---
 .github/workflows/build.yml | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index e58c3b56d..93cb37134 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -39,6 +39,38 @@ jobs:
           sudo apt-get update
           sudo apt-get install libipc-run-perl
       - run: make prove_installcheck
+  ubuntu_spr:
+    runs-on: ubuntu-24.04
+    if: ${{ !startsWith(github.ref_name, 'mac') && !startsWith(github.ref_name, 'windows') }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Intel SDE
+        run: |
+          curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/831748/sde-external-9.44.0-2024-08-22-lin.tar.xz
+          mkdir /tmp/sde && tar -xvf /tmp/sde.tar.xz -C /tmp/sde/
+          sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde
+      - name: Install Postgres
+        run: |
+          sudo apt-get install postgresql-16
+          sudo apt-get install postgresql-server-dev-16
+          sudo systemctl stop postgresql
+          pgdir=$(pg_config --bindir)
+          pgdata=/tmp/postgres_data
+          mkdir $pgdata
+          $pgdir/initdb -D $pgdata --username=$USER
+          sudo chmod 777 /var/run/postgresql/
+          sde -spr -mix -- $pgdir/pg_ctl -D $pgdata start
+      - run: make
+        env:
+          PG_CFLAGS: -DUSE_ASSERT_CHECKING -Wall -Wextra -Werror -Wno-unused-parameter -Wno-sign-compare
+      - run: |
+          export PG_CONFIG=`which pg_config`
+          sudo --preserve-env=PG_CONFIG make install
+      - run: sde -spr -mix -- make installcheck
+      - if: ${{ failure() }}
+        run: cat regression.diffs
+      - name: Report AVX512 FP16 FMA instruction count
+        run: cat sde*.txt | grep -E "FMA.*PH"
   mac:
     runs-on: ${{ matrix.os }}
     if: ${{ !startsWith(github.ref_name, 'windows') }}

From bfaa33a2f865c593d9fe075faa7ea1d8cce4dcb0 Mon Sep 17 00:00:00 2001
From: Luca Giacchino <luca.giacchino@intel.com>
Date: Tue, 25 Mar 2025 13:41:38 -0700
Subject: [PATCH 05/64] Move AVX-512 functions to separate files

---
 Makefile               |   6 +
 src/halfutils.c        | 440 +---------------------------------------
 src/halfutils_avx512.c | 449 +++++++++++++++++++++++++++++++++++++++++
 src/halfutils_avx512.h |  25 +++
 4 files changed, 482 insertions(+), 438 deletions(-)
 create mode 100644 src/halfutils_avx512.c
 create mode 100644 src/halfutils_avx512.h

diff --git a/Makefile b/Makefile
index 7a4b88caf..75d1330cc 100644
--- a/Makefile
+++ b/Makefile
@@ -5,6 +5,9 @@ MODULE_big = vector
 DATA = $(wildcard sql/*--*--*.sql)
 DATA_built = sql/$(EXTENSION)--$(EXTVERSION).sql
 OBJS = src/bitutils.o src/bitvec.o src/halfutils.o src/halfvec.o src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/sparsevec.o src/vector.o
+ifneq ($(USE_AVX512), 0)
+	OBJS += src/halfutils_avx512.o
+endif
 HEADERS = src/halfvec.h src/sparsevec.h src/vector.h
 
 TESTS = $(wildcard test/sql/*.sql)
@@ -31,6 +34,9 @@ endif
 # - GCC (needs -ftree-vectorize OR -O3) - https://gcc.gnu.org/projects/tree-ssa/vectorization.html
 # - Clang (could use pragma instead) - https://llvm.org/docs/Vectorizers.html
 PG_CFLAGS += $(OPTFLAGS) -ftree-vectorize -fassociative-math -fno-signed-zeros -fno-trapping-math
+ifneq ($(USE_AVX512), 0)
+	PG_CFLAGS += -DUSE_AVX512
+endif
 
 # Debug GCC auto-vectorization
 # PG_CFLAGS += -fopt-info-vec
diff --git a/src/halfutils.c b/src/halfutils.c
index e6048bab1..bdc370a19 100644
--- a/src/halfutils.c
+++ b/src/halfutils.c
@@ -5,6 +5,7 @@
 
 #ifdef HALFVEC_DISPATCH
 #include <immintrin.h>
+#include "halfutils_avx512.h"
 
 #if defined(USE__GET_CPUID)
 #include <cpuid.h>
@@ -12,18 +13,10 @@
 #include <intrin.h>
 #endif
 
-#if (defined(__GNUC__) && (__GNUC__ >= 12)) || \
-	(defined(__clang__) && (__clang_major__ >= 16)) || \
-	(defined __AVX512FP16__)
-#define HAVE_AVX512FP16
-#endif
-
 #ifdef _MSC_VER
 #define TARGET_F16C
-#define TARGET_AVX512FP16
 #else
 #define TARGET_F16C __attribute__((target("avx,f16c,fma")))
-#define TARGET_AVX512FP16 __attribute__((target("avx512fp16,avx512f,avx512dq,avx512vl,avx512bw")))
 #endif
 #endif
 
@@ -84,93 +77,6 @@ HalfvecL2SquaredDistanceF16c(int dim, half * ax, half * bx)
 
 	return distance;
 }
-
-#ifdef HAVE_AVX512FP16
-TARGET_AVX512FP16 static inline bool
-HasInfinity(__m512h val) {
-	/* Test for positive and negative infinity */
-	__mmask32 mask = _mm512_fpclass_ph_mask(val, 0x08 + 0x10);
-	return mask != 0;
-}
-
-TARGET_AVX512FP16 static inline __m512
-ConvertToFp32Sum(__m512h val) {
-	__m256h val_lower = _mm256_castsi256_ph(_mm512_extracti32x8_epi32(_mm512_castph_si512(val), 0));
-	__m256h val_upper = _mm256_castsi256_ph(_mm512_extracti32x8_epi32(_mm512_castph_si512(val), 1));
-	return _mm512_add_ps(_mm512_cvtxph_ps(val_lower), _mm512_cvtxph_ps(val_upper));
-}
-
-TARGET_AVX512FP16 static float
-HalfvecL2SquaredDistanceAvx512(int dim, half * ax, half * bx)
-{
-	float		distance;
-	int			i;
-	unsigned long mask;
-
-	/* For FP16 computation */
-	__m512h		axi_512h;
-	__m512h		bxi_512h;
-	__m512h		diff_512h;
-	__m512h		dist_512h = _mm512_setzero_ph();
-	__m512h		dist_512h_temp;
-
-	/* For FP32 computation */
-	__m256h		axi_256h;
-	__m256h		bxi_256h;
-	__m512		axi_512;
-	__m512		bxi_512;
-	__m512		diff_512;
-	__m512		dist_512;
-
-	/* FP16 computation */
-	for (i = 0; i < dim; i += 32)
-	{
-		if (dim - i < 32)
-		{
-			mask = (1 << (dim - i)) - 1;
-			axi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
-			bxi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
-		}
-		else
-		{
-			axi_512h = _mm512_loadu_ph(ax + i);
-			bxi_512h = _mm512_loadu_ph(bx + i);
-		}
-		diff_512h = _mm512_sub_ph(axi_512h, bxi_512h);
-		dist_512h_temp = _mm512_fmadd_ph(diff_512h, diff_512h, dist_512h);
-
-		/* if overflow, continue with FP32 */
-		if (HasInfinity(dist_512h_temp))
-			break;
-		else
-			dist_512h = dist_512h_temp;
-	}
-	dist_512 = ConvertToFp32Sum(dist_512h);
-
-	/* FP32 computation */
-	for (; i < dim; i += 16)
-	{
-		if (dim - i < 16)
-		{
-			mask = (1 << (dim - i)) - 1;
-			axi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
-			bxi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
-		}
-		else
-		{
-			axi_256h = _mm256_loadu_ph(ax + i);
-			bxi_256h = _mm256_loadu_ph(bx + i);
-		}
-		axi_512 = _mm512_cvtxph_ps(axi_256h);
-		bxi_512 = _mm512_cvtxph_ps(bxi_256h);
-		diff_512 = _mm512_sub_ps(axi_512, bxi_512);
-		dist_512 = _mm512_fmadd_ps(diff_512, diff_512, dist_512);
-	}
-
-	distance = _mm512_reduce_add_ps(dist_512);
-	return distance;
-}
-#endif
 #endif
 
 static float
@@ -214,75 +120,6 @@ HalfvecInnerProductF16c(int dim, half * ax, half * bx)
 
 	return distance;
 }
-
-#ifdef HAVE_AVX512FP16
-TARGET_AVX512FP16 static float
-HalfvecInnerProductAvx512(int dim, half * ax, half * bx)
-{
-	float		distance;
-	int			i;
-	unsigned int mask;
-
-	/* For FP16 computation */
-	__m512h		axi_512h;
-	__m512h		bxi_512h;
-	__m512h		dist_512h = _mm512_setzero_ph();
-	__m512h		dist_512h_temp;
-
-	/* For FP32 computation */
-	__m256h		axi_256h;
-	__m256h		bxi_256h;
-	__m512		axi_512;
-	__m512		bxi_512;
-	__m512		dist_512;
-
-	/* FP16 computation */
-	for (i = 0; i < dim; i += 32)
-	{
-		if (dim - i < 32)
-		{
-			mask = (1 << (dim - i)) - 1;
-			axi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
-			bxi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
-		}
-		else
-		{
-			axi_512h = _mm512_loadu_ph(ax + i);
-			bxi_512h = _mm512_loadu_ph(bx + i);
-		}
-		dist_512h_temp = _mm512_fmadd_ph(axi_512h, bxi_512h, dist_512h);
-
-		/* if overflow, continue with FP32 */
-		if (HasInfinity(dist_512h_temp))
-			break;
-		else
-			dist_512h = dist_512h_temp;
-	}
-	dist_512 = ConvertToFp32Sum(dist_512h);
-
-	/* FP32 computation */
-	for (; i < dim; i += 16)
-	{
-		if (dim - i < 16)
-		{
-			mask = (1 << (dim - i)) - 1;
-			axi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
-			bxi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
-		}
-		else
-		{
-			axi_256h = _mm256_loadu_ph(ax + i);
-			bxi_256h = _mm256_loadu_ph(bx + i);
-		}
-		axi_512 = _mm512_cvtxph_ps(axi_256h);
-		bxi_512 = _mm512_cvtxph_ps(bxi_256h);
-		dist_512 = _mm512_fmadd_ps(axi_512, bxi_512, dist_512);
-	}
-
-	distance = _mm512_reduce_add_ps(dist_512);
-	return distance;
-}
-#endif
 #endif
 
 static double
@@ -356,97 +193,6 @@ HalfvecCosineSimilarityF16c(int dim, half * ax, half * bx)
 	/* Use sqrt(a * b) over sqrt(a) * sqrt(b) */
 	return (double) similarity / sqrt((double) norma * (double) normb);
 }
-
-#ifdef HAVE_AVX512FP16
-TARGET_AVX512FP16 static double
-HalfvecCosineSimilarityAvx512(int dim, half * ax, half * bx)
-{
-	float		similarity;
-	float		norma;
-	float		normb;
-	int			i;
-	unsigned int mask;
-
-	/* For FP16 computation */
-	__m512h		axi_512h;
-	__m512h		bxi_512h;
-	__m512h		sim_512h = _mm512_setzero_ph();
-	__m512h		na_512h = _mm512_setzero_ph();
-	__m512h		nb_512h = _mm512_setzero_ph();
-	__m512h		sim_512h_temp;
-	__m512h		na_512h_temp;
-	__m512h		nb_512h_temp;
-
-	/* For FP32 computation */
-	__m256h		axi_256h;
-	__m256h 	bxi_256h;
-	__m512		axi_512;
-	__m512		bxi_512;
-	__m512		sim_512;
-	__m512		na_512;
-	__m512		nb_512;
-
-	/* FP16 computation */
-	for (i = 0; i < dim; i += 32)
-	{
-		if (dim - i < 32) {
-			mask = (1 << (dim - i)) - 1;
-			axi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
-			bxi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
-		}
-		else {
-			axi_512h = _mm512_loadu_ph(ax + i);
-			bxi_512h = _mm512_loadu_ph(bx + i);
-		}
-		sim_512h_temp = _mm512_fmadd_ph(axi_512h, bxi_512h, sim_512h);
-		na_512h_temp = _mm512_fmadd_ph(axi_512h, axi_512h, na_512h);
-		nb_512h_temp = _mm512_fmadd_ph(bxi_512h, bxi_512h, nb_512h);
-
-		/* if overflow, continue with FP32 */
-		if (HasInfinity(sim_512h_temp) ||
-			HasInfinity(na_512h_temp) ||
-			HasInfinity(nb_512h_temp))
-			break;
-		else
-		{
-			sim_512h = sim_512h_temp;
-			na_512h = na_512h_temp;
-			nb_512h = nb_512h_temp;
-		}
-	}
-	sim_512 = ConvertToFp32Sum(sim_512h);
-	na_512 = ConvertToFp32Sum(na_512h);
-	nb_512 = ConvertToFp32Sum(nb_512h);
-
-	/* FP32 computation */
-	for (; i < dim; i += 16)
-	{
-		if (dim - i < 16)
-		{
-			mask = (1 << (dim - i)) - 1;
-			axi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
-			bxi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
-		}
-		else
-		{
-			axi_256h = _mm256_loadu_ph(ax + i);
-			bxi_256h = _mm256_loadu_ph(bx + i);
-		}
-		axi_512 = _mm512_cvtxph_ps(axi_256h);
-		bxi_512 = _mm512_cvtxph_ps(bxi_256h);
-		sim_512 = _mm512_fmadd_ps(axi_512, bxi_512, sim_512);
-		na_512 = _mm512_fmadd_ps(axi_512, axi_512, na_512);
-		nb_512 = _mm512_fmadd_ps(bxi_512, bxi_512, nb_512);
-	}
-
-	similarity = _mm512_reduce_add_ps(sim_512);
-	norma = _mm512_reduce_add_ps(na_512);
-	normb = _mm512_reduce_add_ps(nb_512);
-
-	/* Use sqrt(a * b) over sqrt(a) * sqrt(b) */
-	return (double) similarity / sqrt((double) norma * (double) normb);
-}
-#endif
 #endif
 
 static float
@@ -492,76 +238,6 @@ HalfvecL1DistanceF16c(int dim, half * ax, half * bx)
 
 	return distance;
 }
-
-#ifdef HAVE_AVX512FP16
-TARGET_AVX512FP16 static float
-HalfvecL1DistanceAvx512(int dim, half * ax, half * bx)
-{
-	float		distance;
-	int			i;
-	unsigned long mask;
-
-	/* For FP16 computation */
-	__m512h		axi_512h;
-	__m512h		bxi_512h;
-	__m512h		dist_512h = _mm512_setzero_ph();
-	__m512h		dist_512h_temp;
-
-	/* For FP32 computation */
-	__m256h		axi_256h;
-	__m256h		bxi_256h;
-	__m512		axi_512;
-	__m512		bxi_512;
-	__m512		dist_512;
-
-	/* FP16 computation */
-	for (i = 0; i < dim; i += 32)
-	{
-		if (dim - i < 32)
-		{
-			mask = (1 << (dim - i)) - 1;
-			axi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
-			bxi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
-		}
-		else
-		{
-			axi_512h = _mm512_loadu_ph(ax + i);
-			bxi_512h = _mm512_loadu_ph(bx + i);
-		}
-		dist_512h_temp = _mm512_add_ph(dist_512h, _mm512_abs_ph(_mm512_sub_ph(axi_512h, bxi_512h)));
-
-		/* if overflow, continue with FP32 */
-		if (HasInfinity(dist_512h_temp))
-			break;
-		else
-			dist_512h = dist_512h_temp;
-	}
-	dist_512 = ConvertToFp32Sum(dist_512h);
-
-	/* FP32 computation */
-	for (; i < dim; i += 16)
-	{
-		if (dim - i < 16)
-		{
-			mask = (1 << (dim - i)) - 1;
-			axi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
-			bxi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
-		}
-		else
-		{
-			axi_256h = _mm256_loadu_ph(ax + i);
-			bxi_256h = _mm256_loadu_ph(bx + i);
-		}
-		axi_512 = _mm512_cvtxph_ps(axi_256h);
-		bxi_512 = _mm512_cvtxph_ps(bxi_256h);
-		dist_512 = _mm512_add_ps(dist_512, _mm512_abs_ps(_mm512_sub_ps(axi_512, bxi_512)));
-	}
-
-	distance = _mm512_reduce_add_ps(dist_512);
-
-	return distance;
-}
-#endif
 #endif
 
 static void
@@ -570,60 +246,6 @@ Float4ToHalfVectorDefault(Vector * vec, HalfVector * result) {
 		result->x[i] = Float4ToHalf(vec->x[i]);
 }
 
-#ifdef HAVE_AVX512FP16
-TARGET_AVX512FP16 static void
-Float4ToHalfVectorAvx512(Vector * vec, HalfVector * result) {
-	unsigned long mask;
-	__m512		vec_512;
-	__m256h		vec_256h;
-	__mmask16 	vec_512_inf;
-	__mmask16 	vec_256h_inf;
-
-	for (int i = 0; i < vec->dim; i += 16)
-	{
-		if (vec->dim - i < 16)
-		{
-			mask = (1 << (vec->dim - i)) - 1;
-			vec_512 = _mm512_maskz_loadu_ps(mask, vec->x + i);
-			vec_256h = _mm512_cvtxps_ph(vec_512);
-			_mm256_mask_storeu_epi16(result->x + i, mask, _mm256_castph_si256(vec_256h));
-		}
-		else
-		{
-			vec_512 = _mm512_loadu_ps(vec->x + i);
-			vec_256h = _mm512_cvtxps_ph(vec_512);
-			_mm256_storeu_ph(result->x + i, vec_256h);
-		}
-
-		/* Test for positive and negative infinity */
-		vec_512_inf = _mm512_fpclass_ps_mask(vec_512, 0x08 + 0x10);
-		vec_256h_inf = _mm256_fpclass_ph_mask(vec_256h, 0x08 + 0x10);
-		if (unlikely(vec_512_inf != vec_256h_inf))
-		{
-			float num;
-			char* buf;
-
-			__mmask16 diff = _kxor_mask16(vec_512_inf, vec_256h_inf);
-			/* Find first element in vector to overflow after conversion (first bit set) */
-			int count = 0;
-			while (diff % 2 == 0) {
-				diff >>= 1;
-				count++;
-			}
-			num = vec->x[i + count];
-
-			/* TODO Avoid duplicate code in Float4ToHalf */
-			buf = palloc(FLOAT_SHORTEST_DECIMAL_LEN);
-
-			float_to_shortest_decimal_buf(num, buf);
-
-			ereport(ERROR,
-					(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-					 errmsg("\"%s\" is out of range for type halfvec", buf)));
-		}
-	}
-}
-#endif
 
 #ifdef HALFVEC_DISPATCH
 #define CPU_FEATURE_FMA     (1 << 12)
@@ -659,64 +281,6 @@ SupportsCpuFeature(unsigned int feature)
 	/* Now check features */
 	return (exx[2] & feature) == feature;
 }
-
-#ifdef HAVE_AVX512FP16
-TARGET_XSAVE static bool
-SupportsOsXsave()
-{
-	unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID)
-	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
-#else
-	__cpuid(exx, 1);
-#endif
-
-	return (exx[2] & CPU_FEATURE_OSXSAVE) == CPU_FEATURE_OSXSAVE;
-}
-
-#define CPU_FEATURE_AVX512F     (1 << 16)
-#define CPU_FEATURE_AVX512DQ    (1 << 17)
-#define CPU_FEATURE_AVX512_FP16 (1 << 23)
-#define CPU_FEATURE_AVX512BW    (1 << 30)
-#define CPU_FEATURE_AVX512VL    (1 << 31)
-
-TARGET_XSAVE static bool
-SupportsAvx512Fp16()
-{
-	unsigned int exx[4] = {0, 0, 0, 0};
-
-	/* AVX512 features required:
-	 *  AVX512F : sub/fma/add instructions
-	 *  AVX512DQ: _mm512_extracti32x8_epi32
-	 *  AVX512VL: _mm256_loadu_ph
-	 *  AVX512BW: masked loads
-	 */
-	unsigned int features = CPU_FEATURE_AVX512F |
-	                        CPU_FEATURE_AVX512DQ |
-	                        CPU_FEATURE_AVX512VL |
-	                        CPU_FEATURE_AVX512BW;
-
-	/* Check OS supports XSAVE */
-	if (!SupportsOsXsave())
-		return false;
-
-	/* Check XMM, YMM, and ZMM registers are enabled */
-	if ((_xgetbv(0) & 0xe6) != 0xe6)
-		return false;
-		
-#if defined(HAVE__GET_CPUID)
-	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
-	__cpuid(exx, 7, 0);
-#endif
-
-	if ((exx[1] & features) != features)
-		return false;
-
-	return (exx[3] & CPU_FEATURE_AVX512_FP16) == CPU_FEATURE_AVX512_FP16;
-}
-#endif
 #endif
 
 void
@@ -742,7 +306,7 @@ HalfvecInit(void)
 		HalfvecL1Distance = HalfvecL1DistanceF16c;
 	}
 
-#ifdef HAVE_AVX512FP16
+#if defined(USE_AVX512) && defined(HAVE_AVX512FP16)
 	if (SupportsAvx512Fp16())
 	{
 		HalfvecL2SquaredDistance = HalfvecL2SquaredDistanceAvx512;
diff --git a/src/halfutils_avx512.c b/src/halfutils_avx512.c
new file mode 100644
index 000000000..27d907781
--- /dev/null
+++ b/src/halfutils_avx512.c
@@ -0,0 +1,449 @@
+#ifdef USE_AVX512
+#include "halfutils_avx512.h"
+
+#ifdef HAVE_AVX512FP16
+#include "common/shortest_dec.h"
+
+#include <immintrin.h>
+#include <math.h>
+
+#if defined(USE__GET_CPUID)
+#include <cpuid.h>
+#else
+#include <intrin.h>
+#endif
+
+#ifdef _MSC_VER
+#define TARGET_AVX512FP16
+#else
+#define TARGET_AVX512FP16 __attribute__((target("avx512fp16,avx512f,avx512dq,avx512vl,avx512bw")))
+#endif
+
+TARGET_AVX512FP16 static inline bool
+HasInfinity(__m512h val) {
+	/* Test for positive and negative infinity */
+	__mmask32 mask = _mm512_fpclass_ph_mask(val, 0x08 + 0x10);
+	return mask != 0;
+}
+
+TARGET_AVX512FP16 static inline __m512
+ConvertToFp32Sum(__m512h val) {
+	__m256h val_lower = _mm256_castsi256_ph(_mm512_extracti32x8_epi32(_mm512_castph_si512(val), 0));
+	__m256h val_upper = _mm256_castsi256_ph(_mm512_extracti32x8_epi32(_mm512_castph_si512(val), 1));
+	return _mm512_add_ps(_mm512_cvtxph_ps(val_lower), _mm512_cvtxph_ps(val_upper));
+}
+
+TARGET_AVX512FP16 float
+HalfvecL2SquaredDistanceAvx512(int dim, half * ax, half * bx)
+{
+	float		distance;
+	int			i;
+	unsigned long mask;
+
+	/* For FP16 computation */
+	__m512h		axi_512h;
+	__m512h		bxi_512h;
+	__m512h		diff_512h;
+	__m512h		dist_512h = _mm512_setzero_ph();
+	__m512h		dist_512h_temp;
+
+	/* For FP32 computation */
+	__m256h		axi_256h;
+	__m256h		bxi_256h;
+	__m512		axi_512;
+	__m512		bxi_512;
+	__m512		diff_512;
+	__m512		dist_512;
+
+	/* FP16 computation */
+	for (i = 0; i < dim; i += 32)
+	{
+		if (dim - i < 32)
+		{
+			mask = (1 << (dim - i)) - 1;
+			axi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
+			bxi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
+		}
+		else
+		{
+			axi_512h = _mm512_loadu_ph(ax + i);
+			bxi_512h = _mm512_loadu_ph(bx + i);
+		}
+		diff_512h = _mm512_sub_ph(axi_512h, bxi_512h);
+		dist_512h_temp = _mm512_fmadd_ph(diff_512h, diff_512h, dist_512h);
+
+		/* if overflow, continue with FP32 */
+		if (HasInfinity(dist_512h_temp))
+			break;
+		else
+			dist_512h = dist_512h_temp;
+	}
+	dist_512 = ConvertToFp32Sum(dist_512h);
+
+	/* FP32 computation */
+	for (; i < dim; i += 16)
+	{
+		if (dim - i < 16)
+		{
+			mask = (1 << (dim - i)) - 1;
+			axi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
+			bxi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
+		}
+		else
+		{
+			axi_256h = _mm256_loadu_ph(ax + i);
+			bxi_256h = _mm256_loadu_ph(bx + i);
+		}
+		axi_512 = _mm512_cvtxph_ps(axi_256h);
+		bxi_512 = _mm512_cvtxph_ps(bxi_256h);
+		diff_512 = _mm512_sub_ps(axi_512, bxi_512);
+		dist_512 = _mm512_fmadd_ps(diff_512, diff_512, dist_512);
+	}
+
+	distance = _mm512_reduce_add_ps(dist_512);
+	return distance;
+}
+
+TARGET_AVX512FP16 float
+HalfvecInnerProductAvx512(int dim, half * ax, half * bx)
+{
+	float		distance;
+	int			i;
+	unsigned int mask;
+
+	/* For FP16 computation */
+	__m512h		axi_512h;
+	__m512h		bxi_512h;
+	__m512h		dist_512h = _mm512_setzero_ph();
+	__m512h		dist_512h_temp;
+
+	/* For FP32 computation */
+	__m256h		axi_256h;
+	__m256h		bxi_256h;
+	__m512		axi_512;
+	__m512		bxi_512;
+	__m512		dist_512;
+
+	/* FP16 computation */
+	for (i = 0; i < dim; i += 32)
+	{
+		if (dim - i < 32)
+		{
+			mask = (1 << (dim - i)) - 1;
+			axi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
+			bxi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
+		}
+		else
+		{
+			axi_512h = _mm512_loadu_ph(ax + i);
+			bxi_512h = _mm512_loadu_ph(bx + i);
+		}
+		dist_512h_temp = _mm512_fmadd_ph(axi_512h, bxi_512h, dist_512h);
+
+		/* if overflow, continue with FP32 */
+		if (HasInfinity(dist_512h_temp))
+			break;
+		else
+			dist_512h = dist_512h_temp;
+	}
+	dist_512 = ConvertToFp32Sum(dist_512h);
+
+	/* FP32 computation */
+	for (; i < dim; i += 16)
+	{
+		if (dim - i < 16)
+		{
+			mask = (1 << (dim - i)) - 1;
+			axi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
+			bxi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
+		}
+		else
+		{
+			axi_256h = _mm256_loadu_ph(ax + i);
+			bxi_256h = _mm256_loadu_ph(bx + i);
+		}
+		axi_512 = _mm512_cvtxph_ps(axi_256h);
+		bxi_512 = _mm512_cvtxph_ps(bxi_256h);
+		dist_512 = _mm512_fmadd_ps(axi_512, bxi_512, dist_512);
+	}
+
+	distance = _mm512_reduce_add_ps(dist_512);
+	return distance;
+}
+
+TARGET_AVX512FP16 double
+HalfvecCosineSimilarityAvx512(int dim, half * ax, half * bx)
+{
+	float		similarity;
+	float		norma;
+	float		normb;
+	int			i;
+	unsigned int mask;
+
+	/* For FP16 computation */
+	__m512h		axi_512h;
+	__m512h		bxi_512h;
+	__m512h		sim_512h = _mm512_setzero_ph();
+	__m512h		na_512h = _mm512_setzero_ph();
+	__m512h		nb_512h = _mm512_setzero_ph();
+	__m512h		sim_512h_temp;
+	__m512h		na_512h_temp;
+	__m512h		nb_512h_temp;
+
+	/* For FP32 computation */
+	__m256h		axi_256h;
+	__m256h 	bxi_256h;
+	__m512		axi_512;
+	__m512		bxi_512;
+	__m512		sim_512;
+	__m512		na_512;
+	__m512		nb_512;
+
+	/* FP16 computation */
+	for (i = 0; i < dim; i += 32)
+	{
+		if (dim - i < 32) {
+			mask = (1 << (dim - i)) - 1;
+			axi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
+			bxi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
+		}
+		else {
+			axi_512h = _mm512_loadu_ph(ax + i);
+			bxi_512h = _mm512_loadu_ph(bx + i);
+		}
+		sim_512h_temp = _mm512_fmadd_ph(axi_512h, bxi_512h, sim_512h);
+		na_512h_temp = _mm512_fmadd_ph(axi_512h, axi_512h, na_512h);
+		nb_512h_temp = _mm512_fmadd_ph(bxi_512h, bxi_512h, nb_512h);
+
+		/* if overflow, continue with FP32 */
+		if (HasInfinity(sim_512h_temp) ||
+			HasInfinity(na_512h_temp) ||
+			HasInfinity(nb_512h_temp))
+			break;
+		else
+		{
+			sim_512h = sim_512h_temp;
+			na_512h = na_512h_temp;
+			nb_512h = nb_512h_temp;
+		}
+	}
+	sim_512 = ConvertToFp32Sum(sim_512h);
+	na_512 = ConvertToFp32Sum(na_512h);
+	nb_512 = ConvertToFp32Sum(nb_512h);
+
+	/* FP32 computation */
+	for (; i < dim; i += 16)
+	{
+		if (dim - i < 16)
+		{
+			mask = (1 << (dim - i)) - 1;
+			axi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
+			bxi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
+		}
+		else
+		{
+			axi_256h = _mm256_loadu_ph(ax + i);
+			bxi_256h = _mm256_loadu_ph(bx + i);
+		}
+		axi_512 = _mm512_cvtxph_ps(axi_256h);
+		bxi_512 = _mm512_cvtxph_ps(bxi_256h);
+		sim_512 = _mm512_fmadd_ps(axi_512, bxi_512, sim_512);
+		na_512 = _mm512_fmadd_ps(axi_512, axi_512, na_512);
+		nb_512 = _mm512_fmadd_ps(bxi_512, bxi_512, nb_512);
+	}
+
+	similarity = _mm512_reduce_add_ps(sim_512);
+	norma = _mm512_reduce_add_ps(na_512);
+	normb = _mm512_reduce_add_ps(nb_512);
+
+	/* Use sqrt(a * b) over sqrt(a) * sqrt(b) */
+	return (double) similarity / sqrt((double) norma * (double) normb);
+}
+
+TARGET_AVX512FP16 float
+HalfvecL1DistanceAvx512(int dim, half * ax, half * bx)
+{
+	float		distance;
+	int			i;
+	unsigned long mask;
+
+	/* For FP16 computation */
+	__m512h		axi_512h;
+	__m512h		bxi_512h;
+	__m512h		dist_512h = _mm512_setzero_ph();
+	__m512h		dist_512h_temp;
+
+	/* For FP32 computation */
+	__m256h		axi_256h;
+	__m256h		bxi_256h;
+	__m512		axi_512;
+	__m512		bxi_512;
+	__m512		dist_512;
+
+	/* FP16 computation */
+	for (i = 0; i < dim; i += 32)
+	{
+		if (dim - i < 32)
+		{
+			mask = (1 << (dim - i)) - 1;
+			axi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, ax + i));
+			bxi_512h = _mm512_castsi512_ph(_mm512_maskz_loadu_epi16(mask, bx + i));
+		}
+		else
+		{
+			axi_512h = _mm512_loadu_ph(ax + i);
+			bxi_512h = _mm512_loadu_ph(bx + i);
+		}
+		dist_512h_temp = _mm512_add_ph(dist_512h, _mm512_abs_ph(_mm512_sub_ph(axi_512h, bxi_512h)));
+
+		/* if overflow, continue with FP32 */
+		if (HasInfinity(dist_512h_temp))
+			break;
+		else
+			dist_512h = dist_512h_temp;
+	}
+	dist_512 = ConvertToFp32Sum(dist_512h);
+
+	/* FP32 computation */
+	for (; i < dim; i += 16)
+	{
+		if (dim - i < 16)
+		{
+			mask = (1 << (dim - i)) - 1;
+			axi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, ax + i));
+			bxi_256h = _mm256_castsi256_ph(_mm256_maskz_loadu_epi16(mask, bx + i));
+		}
+		else
+		{
+			axi_256h = _mm256_loadu_ph(ax + i);
+			bxi_256h = _mm256_loadu_ph(bx + i);
+		}
+		axi_512 = _mm512_cvtxph_ps(axi_256h);
+		bxi_512 = _mm512_cvtxph_ps(bxi_256h);
+		dist_512 = _mm512_add_ps(dist_512, _mm512_abs_ps(_mm512_sub_ps(axi_512, bxi_512)));
+	}
+
+	distance = _mm512_reduce_add_ps(dist_512);
+
+	return distance;
+}
+
+TARGET_AVX512FP16 void
+Float4ToHalfVectorAvx512(Vector * vec, HalfVector * result)
+{
+	unsigned long mask;
+	__m512		vec_512;
+	__m256h		vec_256h;
+	__mmask16 	vec_512_inf;
+	__mmask16 	vec_256h_inf;
+
+	for (int i = 0; i < vec->dim; i += 16)
+	{
+		if (vec->dim - i < 16)
+		{
+			mask = (1 << (vec->dim - i)) - 1;
+			vec_512 = _mm512_maskz_loadu_ps(mask, vec->x + i);
+			vec_256h = _mm512_cvtxps_ph(vec_512);
+			_mm256_mask_storeu_epi16(result->x + i, mask, _mm256_castph_si256(vec_256h));
+		}
+		else
+		{
+			vec_512 = _mm512_loadu_ps(vec->x + i);
+			vec_256h = _mm512_cvtxps_ph(vec_512);
+			_mm256_storeu_ph(result->x + i, vec_256h);
+		}
+
+		/* Test for positive and negative infinity */
+		vec_512_inf = _mm512_fpclass_ps_mask(vec_512, 0x08 + 0x10);
+		vec_256h_inf = _mm256_fpclass_ph_mask(vec_256h, 0x08 + 0x10);
+		if (unlikely(vec_512_inf != vec_256h_inf))
+		{
+			float num;
+			char* buf;
+
+			__mmask16 diff = _kxor_mask16(vec_512_inf, vec_256h_inf);
+			/* Find first element in vector to overflow after conversion (first bit set) */
+			int count = 0;
+			while (diff % 2 == 0) {
+				diff >>= 1;
+				count++;
+			}
+			num = vec->x[i + count];
+
+			/* TODO Avoid duplicate code in Float4ToHalf */
+			buf = palloc(FLOAT_SHORTEST_DECIMAL_LEN);
+
+			float_to_shortest_decimal_buf(num, buf);
+
+			ereport(ERROR,
+					(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+					 errmsg("\"%s\" is out of range for type halfvec", buf)));
+		}
+	}
+}
+
+#define CPU_FEATURE_OSXSAVE     (1 << 27)
+#define CPU_FEATURE_AVX512F     (1 << 16)
+#define CPU_FEATURE_AVX512DQ    (1 << 17)
+#define CPU_FEATURE_AVX512_FP16 (1 << 23)
+#define CPU_FEATURE_AVX512BW    (1 << 30)
+#define CPU_FEATURE_AVX512VL    (1 << 31)
+
+#ifdef _MSC_VER
+#define TARGET_XSAVE
+#else
+#define TARGET_XSAVE __attribute__((target("xsave")))
+#endif
+
+TARGET_XSAVE static bool
+SupportsOsXsave()
+{
+	unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID)
+	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#else
+	__cpuid(exx, 1);
+#endif
+
+	return (exx[2] & CPU_FEATURE_OSXSAVE) == CPU_FEATURE_OSXSAVE;
+}
+
+TARGET_XSAVE bool
+SupportsAvx512Fp16()
+{
+	unsigned int exx[4] = {0, 0, 0, 0};
+
+	/* AVX512 features required:
+	 *  AVX512F : sub/fma/add instructions
+	 *  AVX512DQ: _mm512_extracti32x8_epi32
+	 *  AVX512VL: _mm256_loadu_ph
+	 *  AVX512BW: masked loads
+	 */
+	unsigned int features = CPU_FEATURE_AVX512F |
+	                        CPU_FEATURE_AVX512DQ |
+	                        CPU_FEATURE_AVX512VL |
+	                        CPU_FEATURE_AVX512BW;
+
+	/* Check OS supports XSAVE */
+	if (!SupportsOsXsave())
+		return false;
+
+	/* Check XMM, YMM, and ZMM registers are enabled */
+	if ((_xgetbv(0) & 0xe6) != 0xe6)
+		return false;
+
+#if defined(HAVE__GET_CPUID)
+	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+	__cpuid(exx, 7, 0);
+#endif
+
+	if ((exx[1] & features) != features)
+		return false;
+
+	return (exx[3] & CPU_FEATURE_AVX512_FP16) == CPU_FEATURE_AVX512_FP16;
+}
+
+#endif
+#endif
diff --git a/src/halfutils_avx512.h b/src/halfutils_avx512.h
new file mode 100644
index 000000000..9f94f11dc
--- /dev/null
+++ b/src/halfutils_avx512.h
@@ -0,0 +1,25 @@
+#ifndef HALFUTILS_AVX512_H
+#define HALFUTILS_AVX512_H
+
+#ifdef USE_AVX512
+#include "postgres.h"
+#include "halfvec.h"
+#include "vector.h"
+
+#if (defined(__GNUC__) && (__GNUC__ >= 12)) || \
+	(defined(__clang__) && (__clang_major__ >= 16)) || \
+	(defined __AVX512FP16__)
+#define HAVE_AVX512FP16
+#endif
+
+#ifdef HAVE_AVX512FP16
+extern float HalfvecL2SquaredDistanceAvx512(int dim, half * ax, half * bx);
+extern float HalfvecInnerProductAvx512(int dim, half * ax, half * bx);
+extern double HalfvecCosineSimilarityAvx512(int dim, half * ax, half * bx);
+extern float HalfvecL1DistanceAvx512(int dim, half * ax, half * bx);
+extern void Float4ToHalfVectorAvx512(Vector * vec, HalfVector * result);
+
+extern bool SupportsAvx512Fp16(void);
+#endif
+#endif
+#endif

From 5655669325693d13db20c82df055127f3cc39434 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Wed, 26 Mar 2025 11:56:38 -0700
Subject: [PATCH 06/64] Added note about index options to index build time docs
 - #807 [skip ci]

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a1077cd45..1ee996b94 100644
--- a/README.md
+++ b/README.md
@@ -318,7 +318,9 @@ You can also speed up index creation by increasing the number of parallel worker
 SET max_parallel_maintenance_workers = 7; -- plus leader
 ```
 
-For a large number of workers, you may also need to increase `max_parallel_workers` (8 by default)
+For a large number of workers, you may need to increase `max_parallel_workers` (8 by default)
+
+The [index options](#index-options) also have a significant impact on build time (use the defaults unless seeing low recall)
 
 ### Indexing Progress
 

From 69d03ba2d21754304b8efef4f39792cf29000415 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 5 Apr 2025 11:31:57 -0700
Subject: [PATCH 07/64] Added fields to IndexAmRoutine for Postgres 18 [skip
 ci]

---
 src/hnsw.c    | 13 +++++++++++++
 src/ivfflat.c | 13 +++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/src/hnsw.c b/src/hnsw.c
index 5bfc6193e..ec7c98852 100644
--- a/src/hnsw.c
+++ b/src/hnsw.c
@@ -259,6 +259,11 @@ hnswhandler(PG_FUNCTION_ARGS)
 	amroutine->amoptsprocnum = 0;
 	amroutine->amcanorder = false;
 	amroutine->amcanorderbyop = true;
+#if PG_VERSION_NUM >= 180000
+	amroutine->amcanhash = false;
+	amroutine->amconsistentequality = false;
+	amroutine->amconsistentordering = false;
+#endif
 	amroutine->amcanbackward = false;	/* can change direction mid-scan */
 	amroutine->amcanunique = false;
 	amroutine->amcanmulticol = false;
@@ -291,6 +296,9 @@ hnswhandler(PG_FUNCTION_ARGS)
 	amroutine->amvacuumcleanup = hnswvacuumcleanup;
 	amroutine->amcanreturn = NULL;
 	amroutine->amcostestimate = hnswcostestimate;
+#if PG_VERSION_NUM >= 180000
+	amroutine->amgettreeheight = NULL;
+#endif
 	amroutine->amoptions = hnswoptions;
 	amroutine->amproperty = NULL;	/* TODO AMPROP_DISTANCE_ORDERABLE */
 	amroutine->ambuildphasename = hnswbuildphasename;
@@ -311,5 +319,10 @@ hnswhandler(PG_FUNCTION_ARGS)
 	amroutine->aminitparallelscan = NULL;
 	amroutine->amparallelrescan = NULL;
 
+#if PG_VERSION_NUM >= 180000
+	amroutine->amtranslatestrategy = NULL;
+	amroutine->amtranslatecmptype = NULL;
+#endif
+
 	PG_RETURN_POINTER(amroutine);
 }
diff --git a/src/ivfflat.c b/src/ivfflat.c
index 9e8370ffe..736d537d1 100644
--- a/src/ivfflat.c
+++ b/src/ivfflat.c
@@ -186,6 +186,11 @@ ivfflathandler(PG_FUNCTION_ARGS)
 	amroutine->amoptsprocnum = 0;
 	amroutine->amcanorder = false;
 	amroutine->amcanorderbyop = true;
+#if PG_VERSION_NUM >= 180000
+	amroutine->amcanhash = false;
+	amroutine->amconsistentequality = false;
+	amroutine->amconsistentordering = false;
+#endif
 	amroutine->amcanbackward = false;	/* can change direction mid-scan */
 	amroutine->amcanunique = false;
 	amroutine->amcanmulticol = false;
@@ -218,6 +223,9 @@ ivfflathandler(PG_FUNCTION_ARGS)
 	amroutine->amvacuumcleanup = ivfflatvacuumcleanup;
 	amroutine->amcanreturn = NULL;	/* tuple not included in heapsort */
 	amroutine->amcostestimate = ivfflatcostestimate;
+#if PG_VERSION_NUM >= 180000
+	amroutine->amgettreeheight = NULL;
+#endif
 	amroutine->amoptions = ivfflatoptions;
 	amroutine->amproperty = NULL;	/* TODO AMPROP_DISTANCE_ORDERABLE */
 	amroutine->ambuildphasename = ivfflatbuildphasename;
@@ -238,5 +246,10 @@ ivfflathandler(PG_FUNCTION_ARGS)
 	amroutine->aminitparallelscan = NULL;
 	amroutine->amparallelrescan = NULL;
 
+#if PG_VERSION_NUM >= 180000
+	amroutine->amtranslatestrategy = NULL;
+	amroutine->amtranslatecmptype = NULL;
+#endif
+
 	PG_RETURN_POINTER(amroutine);
 }

From ad6baf48db7a206ffef842ccd8c58553e9694386 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 5 Apr 2025 12:13:38 -0700
Subject: [PATCH 08/64] Fixed warnings with Postgres 18 [skip ci]

---
 src/hnsw.c    | 3 +++
 src/ivfflat.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/src/hnsw.c b/src/hnsw.c
index ec7c98852..d0d7086dd 100644
--- a/src/hnsw.c
+++ b/src/hnsw.c
@@ -223,6 +223,9 @@ hnswcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 static bytea *
 hnswoptions(Datum reloptions, bool validate)
 {
+#if PG_VERSION_NUM >= 180000
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#endif
 	static const relopt_parse_elt tab[] = {
 		{"m", RELOPT_TYPE_INT, offsetof(HnswOptions, m)},
 		{"ef_construction", RELOPT_TYPE_INT, offsetof(HnswOptions, efConstruction)},
diff --git a/src/ivfflat.c b/src/ivfflat.c
index 736d537d1..9d95f1d19 100644
--- a/src/ivfflat.c
+++ b/src/ivfflat.c
@@ -151,6 +151,9 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 static bytea *
 ivfflatoptions(Datum reloptions, bool validate)
 {
+#if PG_VERSION_NUM >= 180000
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#endif
 	static const relopt_parse_elt tab[] = {
 		{"lists", RELOPT_TYPE_INT, offsetof(IvfflatOptions, lists)},
 	};

From de5857d69866dca40ec9d77cd2030bfaafff1e7e Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 5 Apr 2025 12:38:30 -0700
Subject: [PATCH 09/64] Improved warning check [skip ci]

---
 src/hnsw.c    | 2 +-
 src/ivfflat.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/hnsw.c b/src/hnsw.c
index d0d7086dd..5d16ae36b 100644
--- a/src/hnsw.c
+++ b/src/hnsw.c
@@ -223,7 +223,7 @@ hnswcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 static bytea *
 hnswoptions(Datum reloptions, bool validate)
 {
-#if PG_VERSION_NUM >= 180000
+#if PG_VERSION_NUM >= 180000 && defined(__GNUC__)
 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
 #endif
 	static const relopt_parse_elt tab[] = {
diff --git a/src/ivfflat.c b/src/ivfflat.c
index 9d95f1d19..3ff0299cb 100644
--- a/src/ivfflat.c
+++ b/src/ivfflat.c
@@ -151,7 +151,7 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 static bytea *
 ivfflatoptions(Datum reloptions, bool validate)
 {
-#if PG_VERSION_NUM >= 180000
+#if PG_VERSION_NUM >= 180000 && defined(__GNUC__)
 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
 #endif
 	static const relopt_parse_elt tab[] = {

From 3d594936c65830c2a215327c129c841e3e51d015 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 5 Apr 2025 12:56:00 -0700
Subject: [PATCH 10/64] Revert "Fixed warnings with Postgres 18 [skip ci]"

This reverts commit 32e95a8598200788bfc13fb1b9b8284b45f07eda.
---
 src/hnsw.c    | 3 ---
 src/ivfflat.c | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/src/hnsw.c b/src/hnsw.c
index 5d16ae36b..ec7c98852 100644
--- a/src/hnsw.c
+++ b/src/hnsw.c
@@ -223,9 +223,6 @@ hnswcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 static bytea *
 hnswoptions(Datum reloptions, bool validate)
 {
-#if PG_VERSION_NUM >= 180000 && defined(__GNUC__)
-#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
-#endif
 	static const relopt_parse_elt tab[] = {
 		{"m", RELOPT_TYPE_INT, offsetof(HnswOptions, m)},
 		{"ef_construction", RELOPT_TYPE_INT, offsetof(HnswOptions, efConstruction)},
diff --git a/src/ivfflat.c b/src/ivfflat.c
index 3ff0299cb..736d537d1 100644
--- a/src/ivfflat.c
+++ b/src/ivfflat.c
@@ -151,9 +151,6 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 static bytea *
 ivfflatoptions(Datum reloptions, bool validate)
 {
-#if PG_VERSION_NUM >= 180000 && defined(__GNUC__)
-#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
-#endif
 	static const relopt_parse_elt tab[] = {
 		{"lists", RELOPT_TYPE_INT, offsetof(IvfflatOptions, lists)},
 	};

From af3a48276c9f4efff1a9020e710da1b16729f13b Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 5 Apr 2025 13:05:27 -0700
Subject: [PATCH 11/64] Fixed CI for Postgres 18 [skip ci]

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 93cb37134..fcfcf4df4 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -28,7 +28,7 @@ jobs:
           dev-files: true
       - run: make
         env:
-          PG_CFLAGS: -DUSE_ASSERT_CHECKING -Wall -Wextra -Werror -Wno-unused-parameter -Wno-sign-compare
+          PG_CFLAGS: ${{ matrix.postgres == 18 && '-Wno-missing-field-initializers' || '' }} -DUSE_ASSERT_CHECKING -Wall -Wextra -Werror -Wno-unused-parameter -Wno-sign-compare
       - run: |
           export PG_CONFIG=`which pg_config`
           sudo --preserve-env=PG_CONFIG make install

From c2544d2a55c23b13dc61f3e4ef6141c59463d5f3 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 13 Apr 2025 13:07:31 -0700
Subject: [PATCH 12/64] Updated readme [skip ci]

---
 README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/README.md b/README.md
index 1ee996b94..ef56d354c 100644
--- a/README.md
+++ b/README.md
@@ -477,8 +477,6 @@ CREATE TABLE items (embedding vector(3), category_id int) PARTITION BY LIST(cate
 
 ## Iterative Index Scans
 
-*Added in 0.8.0*
-
 With approximate indexes, queries with filtering can return less results since filtering is applied *after* the index is scanned. Starting with 0.8.0, you can enable iterative index scans, which will automatically scan more of the index until enough results are found (or it reaches `hnsw.max_scan_tuples` or `ivfflat.max_probes`).
 
 Iterative scans can use strict or relaxed ordering.

From 7c96bbb6ee1016473039afd5c4a6941e914f4040 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 6 May 2025 21:39:51 -0700
Subject: [PATCH 13/64] Updated readme [skip ci]

---
 README.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/README.md b/README.md
index ef56d354c..d8db12ad0 100644
--- a/README.md
+++ b/README.md
@@ -754,8 +754,6 @@ SELECT query, calls, ROUND((total_plan_time + total_exec_time) / calls) AS avg_t
     FROM pg_stat_statements ORDER BY total_plan_time + total_exec_time DESC LIMIT 20;
 ```
 
-Note: Replace `total_plan_time + total_exec_time` with `total_time` for Postgres < 13
-
 Monitor recall by comparing results from approximate search with exact search.
 
 ```sql
@@ -824,7 +822,7 @@ You can use [half-precision indexing](#half-precision-indexing) to index up to 4
 
 #### Can I store vectors with different dimensions in the same column?
 
-You can use `vector` as the type (instead of `vector(3)`).
+You can use `vector` as the type (instead of `vector(n)`).
 
 ```sql
 CREATE TABLE embeddings (model_id bigint, item_id bigint, embedding vector, PRIMARY KEY (model_id, item_id));

From 23ffa728b2389f867e1a0ba81ed886e3386fc74c Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 12 May 2025 13:11:43 -0700
Subject: [PATCH 14/64] Updated readme [skip ci]

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d8db12ad0..191e16865 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ Note: The exact path will vary depending on your Visual Studio version and editi
 Then use `nmake` to build:
 
 ```cmd
-set "PGROOT=C:\Program Files\PostgreSQL\16"
+set "PGROOT=C:\Program Files\PostgreSQL\17"
 cd %TEMP%
 git clone --branch v0.8.0 https://github.com/pgvector/pgvector.git
 cd pgvector

From ee12fbd1f77f724774d80f716fd3946cc6a67540 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Wed, 4 Jun 2025 14:18:53 -0700
Subject: [PATCH 15/64] Updated Windows installation instructions - #593 [skip
 ci]

---
 README.md | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 191e16865..2fe18907e 100644
--- a/README.md
+++ b/README.md
@@ -33,15 +33,7 @@ You can also install it with [Docker](#docker), [Homebrew](#homebrew), [PGXN](#p
 
 ### Windows
 
-Ensure [C++ support in Visual Studio](https://learn.microsoft.com/en-us/cpp/build/building-on-the-command-line?view=msvc-170#download-and-install-the-tools) is installed, and run:
-
-```cmd
-call "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat"
-```
-
-Note: The exact path will vary depending on your Visual Studio version and edition
-
-Then use `nmake` to build:
+Ensure [C++ support in Visual Studio](https://learn.microsoft.com/en-us/cpp/build/building-on-the-command-line?view=msvc-170#download-and-install-the-tools) is installed and run `x64 Native Tools Command Prompt for VS 2022` as administrator. Then use `nmake` to build:
 
 ```cmd
 set "PGROOT=C:\Program Files\PostgreSQL\17"
@@ -1127,7 +1119,7 @@ If compilation fails with `Cannot open include file: 'postgres.h': No such file
 
 ### Mismatched Architecture
 
-If compilation fails with `error C2196: case value '4' already used`, make sure `vcvars64.bat` was called. Then run `nmake /F Makefile.win clean` and re-run the installation instructions.
+If compilation fails with `error C2196: case value '4' already used`, make sure you’re using the `x64 Native Tools Command Prompt`. Then run `nmake /F Makefile.win clean` and re-run the installation instructions.
 
 ### Missing Symbol
 

From fc99a01a0a40bb66f5bbd15b72b9f4e741f9de7b Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Wed, 4 Jun 2025 14:29:53 -0700
Subject: [PATCH 16/64] Updated readme [skip ci]

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2fe18907e..344971611 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ You can also install it with [Docker](#docker), [Homebrew](#homebrew), [PGXN](#p
 
 ### Windows
 
-Ensure [C++ support in Visual Studio](https://learn.microsoft.com/en-us/cpp/build/building-on-the-command-line?view=msvc-170#download-and-install-the-tools) is installed and run `x64 Native Tools Command Prompt for VS 2022` as administrator. Then use `nmake` to build:
+Ensure [C++ support in Visual Studio](https://learn.microsoft.com/en-us/cpp/build/building-on-the-command-line?view=msvc-170#download-and-install-the-tools) is installed and run `x64 Native Tools Command Prompt for VS [version]` as administrator. Then use `nmake` to build:
 
 ```cmd
 set "PGROOT=C:\Program Files\PostgreSQL\17"

From d40a22fecf0f45b84036f2fc87b6f069be25e868 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Wed, 18 Jun 2025 10:57:18 -0700
Subject: [PATCH 17/64] Set random seed for IVFFLAT_BENCH [skip ci]

---
 src/ivfbuild.c | 4 ++++
 src/ivfflat.h  | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/src/ivfbuild.c b/src/ivfbuild.c
index 944f07b55..c51f40884 100644
--- a/src/ivfbuild.c
+++ b/src/ivfbuild.c
@@ -1023,6 +1023,10 @@ ivfflatbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 	IndexBuildResult *result;
 	IvfflatBuildState buildstate;
 
+#ifdef IVFFLAT_BENCH
+	SeedRandom(42);
+#endif
+
 	BuildIndex(heap, index, indexInfo, &buildstate, MAIN_FORKNUM);
 
 	result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
diff --git a/src/ivfflat.h b/src/ivfflat.h
index c296b6677..76608f334 100644
--- a/src/ivfflat.h
+++ b/src/ivfflat.h
@@ -73,9 +73,11 @@
 #if PG_VERSION_NUM >= 150000
 #define RandomDouble() pg_prng_double(&pg_global_prng_state)
 #define RandomInt() pg_prng_uint32(&pg_global_prng_state)
+#define SeedRandom(seed) pg_prng_seed(&pg_global_prng_state, seed)
 #else
 #define RandomDouble() (((double) random()) / MAX_RANDOM_VALUE)
 #define RandomInt() random()
+#define SeedRandom(seed) srandom(seed)
 #endif
 
 /* Variables */

From fc7a8e8e37ce5517ff23663ceb3f69754208f730 Mon Sep 17 00:00:00 2001
From: Jon Daniel <binarycleric@users.noreply.github.com>
Date: Wed, 18 Jun 2025 19:09:43 -0400
Subject: [PATCH 18/64] vectorize: optimize VectorSumCenter and
 HalfvecSumCenter (#860)

* vectorize: optimize VectorSumCenter and HalfvecSumCenter

The functions VectorSumCenter and HalfvecSumCenter were not being
vectorized by the compiler. A few slight changes will allow these
optimizations to take place and get a performance boost by utilizing
SIMD instructions.

This optimization helps improve performance of vector operations in IVF
index building and updating.

* Removing const, commenting that it is only vectoirzed on ARM
---
 src/ivfutils.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/ivfutils.c b/src/ivfutils.c
index da241ee0a..044fbabf6 100644
--- a/src/ivfutils.c
+++ b/src/ivfutils.c
@@ -295,8 +295,10 @@ static void
 VectorSumCenter(Pointer v, float *x)
 {
 	Vector	   *vec = (Vector *) v;
+	int dim = vec->dim;
 
-	for (int k = 0; k < vec->dim; k++)
+	/* Auto-vectorized */
+	for (int k = 0; k < dim; k++)
 		x[k] += vec->x[k];
 }
 
@@ -304,8 +306,10 @@ static void
 HalfvecSumCenter(Pointer v, float *x)
 {
 	HalfVector *vec = (HalfVector *) v;
+	int dim = vec->dim;
 
-	for (int k = 0; k < vec->dim; k++)
+	/* Auto-vectorized on aarch64 */
+	for (int k = 0; k < dim; k++)
 		x[k] += HalfToFloat4(vec->x[k]);
 }
 

From bb2dc96f66aa1f5d4a8389de5128bdae25b9c3d7 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Wed, 18 Jun 2025 16:11:15 -0700
Subject: [PATCH 19/64] Ran pgindent [skip ci]

---
 src/ivfutils.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ivfutils.c b/src/ivfutils.c
index 044fbabf6..3f5535955 100644
--- a/src/ivfutils.c
+++ b/src/ivfutils.c
@@ -295,7 +295,7 @@ static void
 VectorSumCenter(Pointer v, float *x)
 {
 	Vector	   *vec = (Vector *) v;
-	int dim = vec->dim;
+	int			dim = vec->dim;
 
 	/* Auto-vectorized */
 	for (int k = 0; k < dim; k++)
@@ -306,7 +306,7 @@ static void
 HalfvecSumCenter(Pointer v, float *x)
 {
 	HalfVector *vec = (HalfVector *) v;
-	int dim = vec->dim;
+	int			dim = vec->dim;
 
 	/* Auto-vectorized on aarch64 */
 	for (int k = 0; k < dim; k++)

From 19ae89c19588727b7939d2639d1322185bc1fef8 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Wed, 18 Jun 2025 16:26:30 -0700
Subject: [PATCH 20/64] Fixed CI [skip ci]

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index fcfcf4df4..9ed7ff4bf 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -89,7 +89,7 @@ jobs:
           postgres-version: ${{ matrix.postgres }}
       - run: make
         env:
-          PG_CFLAGS: -DUSE_ASSERT_CHECKING -Wall -Wextra -Werror -Wno-unused-parameter
+          PG_CFLAGS: -DUSE_ASSERT_CHECKING -Wall -Wextra -Werror -Wno-unused-parameter -Wno-unknown-warning-option
       - run: make install
       - run: make installcheck
       - if: ${{ failure() }}

From 95d7cc40c8c9e1fa0386246c58832fc410f4df47 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Wed, 18 Jun 2025 17:32:37 -0700
Subject: [PATCH 21/64] Improved variable names [skip ci]

---
 src/ivfutils.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/ivfutils.c b/src/ivfutils.c
index 3f5535955..9596391be 100644
--- a/src/ivfutils.c
+++ b/src/ivfutils.c
@@ -259,8 +259,8 @@ VectorUpdateCenter(Pointer v, int dimensions, float *x)
 	SET_VARSIZE(vec, VECTOR_SIZE(dimensions));
 	vec->dim = dimensions;
 
-	for (int k = 0; k < dimensions; k++)
-		vec->x[k] = x[k];
+	for (int i = 0; i < dimensions; i++)
+		vec->x[i] = x[i];
 }
 
 static void
@@ -271,8 +271,8 @@ HalfvecUpdateCenter(Pointer v, int dimensions, float *x)
 	SET_VARSIZE(vec, HALFVEC_SIZE(dimensions));
 	vec->dim = dimensions;
 
-	for (int k = 0; k < dimensions; k++)
-		vec->x[k] = Float4ToHalfUnchecked(x[k]);
+	for (int i = 0; i < dimensions; i++)
+		vec->x[i] = Float4ToHalfUnchecked(x[i]);
 }
 
 static void
@@ -284,11 +284,11 @@ BitUpdateCenter(Pointer v, int dimensions, float *x)
 	SET_VARSIZE(vec, VARBITTOTALLEN(dimensions));
 	VARBITLEN(vec) = dimensions;
 
-	for (uint32 k = 0; k < VARBITBYTES(vec); k++)
-		nx[k] = 0;
+	for (uint32 i = 0; i < VARBITBYTES(vec); i++)
+		nx[i] = 0;
 
-	for (int k = 0; k < dimensions; k++)
-		nx[k / 8] |= (x[k] > 0.5 ? 1 : 0) << (7 - (k % 8));
+	for (int i = 0; i < dimensions; i++)
+		nx[i / 8] |= (x[i] > 0.5 ? 1 : 0) << (7 - (i % 8));
 }
 
 static void
@@ -298,8 +298,8 @@ VectorSumCenter(Pointer v, float *x)
 	int			dim = vec->dim;
 
 	/* Auto-vectorized */
-	for (int k = 0; k < dim; k++)
-		x[k] += vec->x[k];
+	for (int i = 0; i < dim; i++)
+		x[i] += vec->x[i];
 }
 
 static void
@@ -309,8 +309,8 @@ HalfvecSumCenter(Pointer v, float *x)
 	int			dim = vec->dim;
 
 	/* Auto-vectorized on aarch64 */
-	for (int k = 0; k < dim; k++)
-		x[k] += HalfToFloat4(vec->x[k]);
+	for (int i = 0; i < dim; i++)
+		x[i] += HalfToFloat4(vec->x[i]);
 }
 
 static void
@@ -318,8 +318,8 @@ BitSumCenter(Pointer v, float *x)
 {
 	VarBit	   *vec = (VarBit *) v;
 
-	for (int k = 0; k < VARBITLEN(vec); k++)
-		x[k] += (float) (((VARBITS(vec)[k / 8]) >> (7 - (k % 8))) & 0x01);
+	for (int i = 0; i < VARBITLEN(vec); i++)
+		x[i] += (float) (((VARBITS(vec)[i / 8]) >> (7 - (i % 8))) & 0x01);
 }
 
 /*

From 61b00f065e7bfbbebf42a940fc80c8e0fc3d0c3b Mon Sep 17 00:00:00 2001
From: Jon Daniel <binarycleric@users.noreply.github.com>
Date: Wed, 18 Jun 2025 23:06:32 -0400
Subject: [PATCH 22/64] Vectorizing vector_concat for improved performance
 (#861)

* Vectorizing vector_concat for improved performance

On an ARM chip this should generate SIMD instructions to copy the two
incoming vectors to the new vector as opposed to doing it all in
software.

* Moving declarations to above CheckDim

* Removing const from dims

* Formatting
---
 src/vector.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/vector.c b/src/vector.c
index a5b2aac36..0ced82bbd 100644
--- a/src/vector.c
+++ b/src/vector.c
@@ -916,15 +916,19 @@ vector_concat(PG_FUNCTION_ARGS)
 	Vector	   *b = PG_GETARG_VECTOR_P(1);
 	Vector	   *result;
 	int			dim = a->dim + b->dim;
+	int     dim_a = a->dim;
+	int     dim_b = b->dim;
 
 	CheckDim(dim);
 	result = InitVector(dim);
 
-	for (int i = 0; i < a->dim; i++)
+	/* Auto-vectorized */
+	for (int i = 0; i < dim_a; i++)
 		result->x[i] = a->x[i];
 
-	for (int i = 0; i < b->dim; i++)
-		result->x[i + a->dim] = b->x[i];
+	/* Auto-vectorized */
+	for (int i = 0; i < dim_b; i++)
+		result->x[i + dim_a] = b->x[i];
 
 	PG_RETURN_POINTER(result);
 }

From ec0138f80d9cd3b4356a171082a46fa621d1b47f Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Wed, 18 Jun 2025 20:07:46 -0700
Subject: [PATCH 23/64] Ran pgindent [skip ci]

---
 src/vector.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/vector.c b/src/vector.c
index 0ced82bbd..d2312e042 100644
--- a/src/vector.c
+++ b/src/vector.c
@@ -916,8 +916,8 @@ vector_concat(PG_FUNCTION_ARGS)
 	Vector	   *b = PG_GETARG_VECTOR_P(1);
 	Vector	   *result;
 	int			dim = a->dim + b->dim;
-	int     dim_a = a->dim;
-	int     dim_b = b->dim;
+	int			dim_a = a->dim;
+	int			dim_b = b->dim;
 
 	CheckDim(dim);
 	result = InitVector(dim);

From 505831a62695ceed9b014ffd8126808181c234a0 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 1 Jul 2025 03:35:26 -0700
Subject: [PATCH 24/64] Updated link [skip ci]

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 344971611..d4d2f49f6 100644
--- a/README.md
+++ b/README.md
@@ -783,7 +783,7 @@ Go | [pgvector-go](https://github.com/pgvector/pgvector-go)
 Haskell | [pgvector-haskell](https://github.com/pgvector/pgvector-haskell)
 Java, Kotlin, Groovy, Scala | [pgvector-java](https://github.com/pgvector/pgvector-java)
 JavaScript, TypeScript | [pgvector-node](https://github.com/pgvector/pgvector-node)
-Julia | [pgvector-julia](https://github.com/pgvector/pgvector-julia)
+Julia | [Pgvector.jl](https://github.com/pgvector/Pgvector.jl)
 Lisp | [pgvector-lisp](https://github.com/pgvector/pgvector-lisp)
 Lua | [pgvector-lua](https://github.com/pgvector/pgvector-lua)
 Nim | [pgvector-nim](https://github.com/pgvector/pgvector-nim)

From 8a2d50219ba6aaa95bf2cd7b4e9e1958fe2e46c8 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 1 Jul 2025 10:11:50 -0700
Subject: [PATCH 25/64] Fixed relaxed results example for Postgres 17 - #862
 [skip ci]

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d4d2f49f6..ab8a06dd3 100644
--- a/README.md
+++ b/README.md
@@ -492,7 +492,7 @@ With relaxed ordering, you can use a [materialized CTE](https://www.postgresql.o
 ```sql
 WITH relaxed_results AS MATERIALIZED (
     SELECT id, embedding <-> '[1,2,3]' AS distance FROM items WHERE category_id = 123 ORDER BY distance LIMIT 5
-) SELECT * FROM relaxed_results ORDER BY distance;
+) SELECT * FROM relaxed_results ORDER BY distance + 0;
 ```
 
 For queries that filter by distance, use a materialized CTE and place the distance filter outside of it for best performance (due to the [current behavior](https://www.postgresql.org/message-id/flat/CAOdR5yGUoMQ6j7M5hNUXrySzaqZVGf_Ne%2B8fwZMRKTFxU1nbJg%40mail.gmail.com) of the Postgres executor)

From 096c7068ae027686528af386196b70dfa72a8343 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 1 Jul 2025 11:13:29 -0700
Subject: [PATCH 26/64] Updated readme [skip ci]

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index ab8a06dd3..43c0430d2 100644
--- a/README.md
+++ b/README.md
@@ -495,6 +495,8 @@ WITH relaxed_results AS MATERIALIZED (
 ) SELECT * FROM relaxed_results ORDER BY distance + 0;
 ```
 
+Note: `+ 0` is needed for Postgres 17+
+
 For queries that filter by distance, use a materialized CTE and place the distance filter outside of it for best performance (due to the [current behavior](https://www.postgresql.org/message-id/flat/CAOdR5yGUoMQ6j7M5hNUXrySzaqZVGf_Ne%2B8fwZMRKTFxU1nbJg%40mail.gmail.com) of the Postgres executor)
 
 ```sql

From db8f3bfac481c9190dede94268866396d023d90d Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Wed, 9 Jul 2025 16:22:43 -0700
Subject: [PATCH 27/64] Synced .dockerignore with .gitignore [skip ci]

---
 .dockerignore | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.dockerignore b/.dockerignore
index 6c60e6d5d..49dfd6c71 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,8 +1,15 @@
 /.git/
 /dist/
+/log/
 /results/
 /tmp_check/
 /sql/vector--?.?.?.sql
 regression.*
 *.o
 *.so
+*.bc
+*.dll
+*.dylib
+*.obj
+*.lib
+*.exp

From 2b169b5e30b9256cbbfb416b1b989b1b121eddd6 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Wed, 9 Jul 2025 16:25:05 -0700
Subject: [PATCH 28/64] Added OS to Dockerfile [skip ci]

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 936440928..840330258 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,5 @@
 ARG PG_MAJOR=17
-FROM postgres:$PG_MAJOR
+FROM postgres:$PG_MAJOR-bookworm
 ARG PG_MAJOR
 
 COPY . /tmp/pgvector

From ac8f7ed6b9c7fe48bf27fe6af8a39d82a18fb1d0 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Wed, 9 Jul 2025 22:01:34 -0700
Subject: [PATCH 29/64] Revert "Added OS to Dockerfile [skip ci]"

This reverts commit 33ca8a61e23c887a129be66acaedfc76c6191cf2.
---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 840330258..936440928 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,5 @@
 ARG PG_MAJOR=17
-FROM postgres:$PG_MAJOR-bookworm
+FROM postgres:$PG_MAJOR
 ARG PG_MAJOR
 
 COPY . /tmp/pgvector

From 6de928242e434364af6e521843a9568abe7d62fb Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 27 Jul 2025 12:34:23 -0700
Subject: [PATCH 30/64] Updated Dockerfile to use release [skip ci]

---
 .dockerignore | 15 ---------------
 Dockerfile    |  4 +++-
 2 files changed, 3 insertions(+), 16 deletions(-)
 delete mode 100644 .dockerignore

diff --git a/.dockerignore b/.dockerignore
deleted file mode 100644
index 49dfd6c71..000000000
--- a/.dockerignore
+++ /dev/null
@@ -1,15 +0,0 @@
-/.git/
-/dist/
-/log/
-/results/
-/tmp_check/
-/sql/vector--?.?.?.sql
-regression.*
-*.o
-*.so
-*.bc
-*.dll
-*.dylib
-*.obj
-*.lib
-*.exp
diff --git a/Dockerfile b/Dockerfile
index 936440928..7cc81f02e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,8 +1,10 @@
+# syntax=docker/dockerfile:1
+
 ARG PG_MAJOR=17
 FROM postgres:$PG_MAJOR
 ARG PG_MAJOR
 
-COPY . /tmp/pgvector
+ADD https://github.com/pgvector/pgvector.git#v0.8.0 /tmp/pgvector
 
 RUN apt-get update && \
 		apt-mark hold locales && \

From 3c33debde6b83118b779a084497e0df357fde681 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 27 Jul 2025 13:14:29 -0700
Subject: [PATCH 31/64] Added Docker images with -bookworm suffix [skip ci]

---
 Dockerfile | 3 ++-
 Makefile   | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 7cc81f02e..23682129e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,7 +1,8 @@
 # syntax=docker/dockerfile:1
 
 ARG PG_MAJOR=17
-FROM postgres:$PG_MAJOR
+ARG DEBIAN_CODENAME=bookworm
+FROM postgres:$PG_MAJOR-$DEBIAN_CODENAME
 ARG PG_MAJOR
 
 ADD https://github.com/pgvector/pgvector.git#v0.8.0 /tmp/pgvector
diff --git a/Makefile b/Makefile
index 75d1330cc..56616e744 100644
--- a/Makefile
+++ b/Makefile
@@ -82,4 +82,4 @@ docker:
 .PHONY: docker-release
 
 docker-release:
-	docker buildx build --push --pull --no-cache --platform linux/amd64,linux/arm64 --build-arg PG_MAJOR=$(PG_MAJOR) -t pgvector/pgvector:pg$(PG_MAJOR) -t pgvector/pgvector:$(EXTVERSION)-pg$(PG_MAJOR) .
+	docker buildx build --push --pull --no-cache --platform linux/amd64,linux/arm64 --build-arg PG_MAJOR=$(PG_MAJOR) --build-arg DEBIAN_CODENAME=bookworm -t pgvector/pgvector:pg$(PG_MAJOR) -t pgvector/pgvector:pg$(PG_MAJOR)-bookworm -t pgvector/pgvector:$(EXTVERSION)-pg$(PG_MAJOR) -t pgvector/pgvector:$(EXTVERSION)-pg$(PG_MAJOR)-bookworm .

From 4f1f47cd0080122cb7044f20734ea5a1c9ab9ac9 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 27 Jul 2025 13:25:51 -0700
Subject: [PATCH 32/64] Updated changelog [skip ci]

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 757f998d0..27dcb50cc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 0.8.1 (unreleased)
+
+- Added support for Postgres 18
+
 ## 0.8.0 (2024-10-30)
 
 - Added support for iterative index scans

From 940961df33542bb7d8c651e647fa7a2cf6ace2a8 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 27 Jul 2025 13:44:59 -0700
Subject: [PATCH 33/64] Added supported Docker tags to readme [skip ci]

---
 README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/README.md b/README.md
index 43c0430d2..536c3d107 100644
--- a/README.md
+++ b/README.md
@@ -1143,6 +1143,14 @@ docker pull pgvector/pgvector:pg17
 
 This adds pgvector to the [Postgres image](https://hub.docker.com/_/postgres) (replace `17` with your Postgres server version, and run it the same way).
 
+Supported tags are:
+
+- `pg17`, `pg17-bookworm`, `0.8.0-pg17`, `0.8.0-pg17-bookworm`
+- `pg16`, `pg16-bookworm`, `0.8.0-pg16`, `0.8.0-pg16-bookworm`
+- `pg15`, `pg15-bookworm`, `0.8.0-pg15`, `0.8.0-pg15-bookworm`
+- `pg14`, `pg14-bookworm`, `0.8.0-pg14`, `0.8.0-pg14-bookworm`
+- `pg13`, `pg13-bookworm`, `0.8.0-pg13`, `0.8.0-pg13-bookworm`
+
 You can also build the image manually:
 
 ```sh

From 60d17bac33f0922c26db9ebb23ac9f74aad25ca7 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 27 Jul 2025 18:19:28 -0700
Subject: [PATCH 34/64] Use consistent style [skip ci]

---
 src/vector.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/vector.c b/src/vector.c
index d2312e042..66db3d2f1 100644
--- a/src/vector.c
+++ b/src/vector.c
@@ -916,19 +916,17 @@ vector_concat(PG_FUNCTION_ARGS)
 	Vector	   *b = PG_GETARG_VECTOR_P(1);
 	Vector	   *result;
 	int			dim = a->dim + b->dim;
-	int			dim_a = a->dim;
-	int			dim_b = b->dim;
 
 	CheckDim(dim);
 	result = InitVector(dim);
 
 	/* Auto-vectorized */
-	for (int i = 0; i < dim_a; i++)
+	for (int i = 0, imax = a->dim; i < imax; i++)
 		result->x[i] = a->x[i];
 
 	/* Auto-vectorized */
-	for (int i = 0; i < dim_b; i++)
-		result->x[i + dim_a] = b->x[i];
+	for (int i = 0, imax = b->dim, start = a->dim; i < imax; i++)
+		result->x[i + start] = b->x[i];
 
 	PG_RETURN_POINTER(result);
 }

From 636ab9b15c2d864859946726b6a6514416189699 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sun, 27 Jul 2025 19:50:46 -0700
Subject: [PATCH 35/64] Added Postgres 19 to CI [skip ci]

---
 .github/workflows/build.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 9ed7ff4bf..ff462fb38 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -8,6 +8,8 @@ jobs:
       fail-fast: false
       matrix:
         include:
+          - postgres: 19
+            os: ubuntu-24.04
           - postgres: 18
             os: ubuntu-24.04
           - postgres: 17
@@ -28,7 +30,7 @@ jobs:
           dev-files: true
       - run: make
         env:
-          PG_CFLAGS: ${{ matrix.postgres == 18 && '-Wno-missing-field-initializers' || '' }} -DUSE_ASSERT_CHECKING -Wall -Wextra -Werror -Wno-unused-parameter -Wno-sign-compare
+          PG_CFLAGS: ${{ matrix.postgres >= 18 && '-Wno-missing-field-initializers' || '' }} -DUSE_ASSERT_CHECKING -Wall -Wextra -Werror -Wno-unused-parameter -Wno-sign-compare
       - run: |
           export PG_CONFIG=`which pg_config`
           sudo --preserve-env=PG_CONFIG make install

From 7c6a90b215844ea4a0186db70dbc22ca56feba3e Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Thu, 31 Jul 2025 19:59:27 -0700
Subject: [PATCH 36/64] Switched to PG_MODULE_MAGIC_EXT for Postgres 18+ [skip
 ci]

---
 src/vector.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/vector.c b/src/vector.c
index 66db3d2f1..256d632a6 100644
--- a/src/vector.c
+++ b/src/vector.c
@@ -35,7 +35,11 @@
 #define VECTOR_TARGET_CLONES
 #endif
 
+#if PG_VERSION_NUM >= 180000
+PG_MODULE_MAGIC_EXT(.name = "vector", .version = "0.8.0");
+#else
 PG_MODULE_MAGIC;
+#endif
 
 /*
  * Initialize index options and variables

From b5dc0cc6d0bfdb049537cd5d591d4552dc3d6d79 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 9 Aug 2025 12:09:29 -0700
Subject: [PATCH 37/64] Updated FreeBSD package name in readme [skip ci]

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 536c3d107..9f2a930e3 100644
--- a/README.md
+++ b/README.md
@@ -1210,7 +1210,7 @@ Note: Replace `17` with your Postgres server version
 Install the FreeBSD package with:
 
 ```sh
-pkg install postgresql16-pgvector
+pkg install postgresql17-pgvector
 ```
 
 or the port with:

From b2e441ecc2d0eb0a220da6765630f5e3b310f599 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 19 Aug 2025 12:44:54 -0700
Subject: [PATCH 38/64] Ran pgindent [skip ci]

---
 src/vector.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vector.c b/src/vector.c
index 256d632a6..7da2bf30b 100644
--- a/src/vector.c
+++ b/src/vector.c
@@ -36,7 +36,7 @@
 #endif
 
 #if PG_VERSION_NUM >= 180000
-PG_MODULE_MAGIC_EXT(.name = "vector", .version = "0.8.0");
+PG_MODULE_MAGIC_EXT(.name = "vector",.version = "0.8.0");
 #else
 PG_MODULE_MAGIC;
 #endif

From 8c5bbdd8e8a1938ce10979aea6e8fe57cf226316 Mon Sep 17 00:00:00 2001
From: Luca Giacchino <luca.giacchino@intel.com>
Date: Tue, 12 Aug 2025 19:52:49 -0700
Subject: [PATCH 39/64] Add autovectorized implementation of binary quantize

---
 src/vector.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/vector.c b/src/vector.c
index 7da2bf30b..4c1f695e6 100644
--- a/src/vector.c
+++ b/src/vector.c
@@ -946,8 +946,18 @@ binary_quantize(PG_FUNCTION_ARGS)
 	float	   *ax = a->x;
 	VarBit	   *result = InitBitVector(a->dim);
 	unsigned char *rx = VARBITS(result);
+	int			i;
+	int			count = (a->dim / 8) * 8;
+	unsigned char result_byte;
 
-	for (int i = 0; i < a->dim; i++)
+	for (i = 0; i < count; i += 8)
+	{
+		result_byte = 0;
+		for (int j = 0; j < 8; j++)
+			result_byte |= (ax[i + j] > 0) << (7 - j);
+		rx[i / 8] = result_byte;
+	}
+	for (; i < a->dim; i++)
 		rx[i / 8] |= (ax[i] > 0) << (7 - (i % 8));
 
 	PG_RETURN_VARBIT_P(result);

From 8762819a817f0d7709ca2bc7bf8a23446d73aff9 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 19 Aug 2025 13:47:11 -0700
Subject: [PATCH 40/64] Improved performance of binary_quantize function for
 halfvec

---
 CHANGELOG.md  |  1 +
 src/halfvec.c | 15 ++++++++++++++-
 src/vector.c  | 11 +++++++----
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 27dcb50cc..c85bcf03c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,7 @@
 ## 0.8.1 (unreleased)
 
 - Added support for Postgres 18
+- Improved performance of `binary_quantize` function
 
 ## 0.8.0 (2024-10-30)
 
diff --git a/src/halfvec.c b/src/halfvec.c
index 9b85ba741..4db337c7e 100644
--- a/src/halfvec.c
+++ b/src/halfvec.c
@@ -897,8 +897,21 @@ halfvec_binary_quantize(PG_FUNCTION_ARGS)
 	half	   *ax = a->x;
 	VarBit	   *result = InitBitVector(a->dim);
 	unsigned char *rx = VARBITS(result);
+	int			i = 0;
+	int			count = (a->dim / 8) * 8;
 
-	for (int i = 0; i < a->dim; i++)
+	/* Auto-vectorized */
+	for (; i < count; i += 8)
+	{
+		unsigned char result_byte = 0;
+
+		for (int j = 0; j < 8; j++)
+			result_byte |= (HalfToFloat4(ax[i + j]) > 0) << (7 - j);
+
+		rx[i / 8] = result_byte;
+	}
+
+	for (; i < a->dim; i++)
 		rx[i / 8] |= (HalfToFloat4(ax[i]) > 0) << (7 - (i % 8));
 
 	PG_RETURN_VARBIT_P(result);
diff --git a/src/vector.c b/src/vector.c
index 4c1f695e6..0630cf811 100644
--- a/src/vector.c
+++ b/src/vector.c
@@ -946,17 +946,20 @@ binary_quantize(PG_FUNCTION_ARGS)
 	float	   *ax = a->x;
 	VarBit	   *result = InitBitVector(a->dim);
 	unsigned char *rx = VARBITS(result);
-	int			i;
+	int			i = 0;
 	int			count = (a->dim / 8) * 8;
-	unsigned char result_byte;
 
-	for (i = 0; i < count; i += 8)
+	/* Auto-vectorized */
+	for (; i < count; i += 8)
 	{
-		result_byte = 0;
+		unsigned char result_byte = 0;
+
 		for (int j = 0; j < 8; j++)
 			result_byte |= (ax[i + j] > 0) << (7 - j);
+
 		rx[i / 8] = result_byte;
 	}
+
 	for (; i < a->dim; i++)
 		rx[i / 8] |= (ax[i] > 0) << (7 - (i % 8));
 

From a6109ada33f067bbf431c0e61a6aebc0196e3a52 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 19 Aug 2025 14:26:36 -0700
Subject: [PATCH 41/64] Updated comment [skip ci]

---
 src/halfvec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/halfvec.c b/src/halfvec.c
index 4db337c7e..f9a42a559 100644
--- a/src/halfvec.c
+++ b/src/halfvec.c
@@ -900,7 +900,7 @@ halfvec_binary_quantize(PG_FUNCTION_ARGS)
 	int			i = 0;
 	int			count = (a->dim / 8) * 8;
 
-	/* Auto-vectorized */
+	/* Auto-vectorized on aarch64 */
 	for (; i < count; i += 8)
 	{
 		unsigned char result_byte = 0;

From a94212c8a6a25a92988cd948e718f9499433be05 Mon Sep 17 00:00:00 2001
From: Julien Rouhaud <julien.rouhaud@free.fr>
Date: Sat, 23 Aug 2025 18:31:22 +0800
Subject: [PATCH 42/64] Use NIL for empty lists (#890)

Postgres standard way to check for list emptiness is to compare a pointer to
NIL rather than NULL.
---
 src/hnsw.c    | 2 +-
 src/ivfflat.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/hnsw.c b/src/hnsw.c
index ec7c98852..6a6db507f 100644
--- a/src/hnsw.c
+++ b/src/hnsw.c
@@ -130,7 +130,7 @@ hnswcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 	Relation	index;
 
 	/* Never use index without order */
-	if (path->indexorderbys == NULL)
+	if (path->indexorderbys == NIL)
 	{
 		*indexStartupCost = get_float8_infinity();
 		*indexTotalCost = get_float8_infinity();
diff --git a/src/ivfflat.c b/src/ivfflat.c
index 736d537d1..31c2f7d5e 100644
--- a/src/ivfflat.c
+++ b/src/ivfflat.c
@@ -92,7 +92,7 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 	Relation	index;
 
 	/* Never use index without order */
-	if (path->indexorderbys == NULL)
+	if (path->indexorderbys == NIL)
 	{
 		*indexStartupCost = get_float8_infinity();
 		*indexTotalCost = get_float8_infinity();

From b7ad6761e263e683eae1858583c465b400346d94 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 23 Aug 2025 19:34:11 -0700
Subject: [PATCH 43/64] Added Docker images for Debian 13 / Trixie [skip ci]

---
 Makefile | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Makefile b/Makefile
index 56616e744..5fb759ff3 100644
--- a/Makefile
+++ b/Makefile
@@ -83,3 +83,8 @@ docker:
 
 docker-release:
 	docker buildx build --push --pull --no-cache --platform linux/amd64,linux/arm64 --build-arg PG_MAJOR=$(PG_MAJOR) --build-arg DEBIAN_CODENAME=bookworm -t pgvector/pgvector:pg$(PG_MAJOR) -t pgvector/pgvector:pg$(PG_MAJOR)-bookworm -t pgvector/pgvector:$(EXTVERSION)-pg$(PG_MAJOR) -t pgvector/pgvector:$(EXTVERSION)-pg$(PG_MAJOR)-bookworm .
+
+.PHONY: docker-release-trixie
+
+docker-release-trixie:
+	docker buildx build --push --pull --no-cache --platform linux/amd64,linux/arm64 --build-arg PG_MAJOR=$(PG_MAJOR) --build-arg DEBIAN_CODENAME=trixie -t pgvector/pgvector:pg$(PG_MAJOR)-trixie -t pgvector/pgvector:$(EXTVERSION)-pg$(PG_MAJOR)-trixie .

From a89928839f3745bc424c7ce8f13743948e600998 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 23 Aug 2025 19:46:07 -0700
Subject: [PATCH 44/64] Added new Docker images to readme [skip ci]

---
 README.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9f2a930e3..851630d2f 100644
--- a/README.md
+++ b/README.md
@@ -1138,17 +1138,22 @@ If installation fails with `Access is denied`, re-run the installation instructi
 Get the [Docker image](https://hub.docker.com/r/pgvector/pgvector) with:
 
 ```sh
-docker pull pgvector/pgvector:pg17
+docker pull pgvector/pgvector:pg17-trixie
 ```
 
 This adds pgvector to the [Postgres image](https://hub.docker.com/_/postgres) (replace `17` with your Postgres server version, and run it the same way).
 
 Supported tags are:
 
+- `pg17-trixie`, `0.8.0-pg17-trixie`
 - `pg17`, `pg17-bookworm`, `0.8.0-pg17`, `0.8.0-pg17-bookworm`
+- `pg16-trixie`, `0.8.0-pg16-trixie`
 - `pg16`, `pg16-bookworm`, `0.8.0-pg16`, `0.8.0-pg16-bookworm`
+- `pg15-trixie`, `0.8.0-pg15-trixie`
 - `pg15`, `pg15-bookworm`, `0.8.0-pg15`, `0.8.0-pg15-bookworm`
+- `pg14-trixie`, `0.8.0-pg14-trixie`
 - `pg14`, `pg14-bookworm`, `0.8.0-pg14`, `0.8.0-pg14-bookworm`
+- `pg13-trixie`, `0.8.0-pg13-trixie`
 - `pg13`, `pg13-bookworm`, `0.8.0-pg13`, `0.8.0-pg13-bookworm`
 
 You can also build the image manually:

From fc47ad8df8f11f6f088a155c53ec1e312460cdc5 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 23 Aug 2025 19:57:14 -0700
Subject: [PATCH 45/64] Reordered supported Docker tags [skip ci]

---
 README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 851630d2f..b8259f4a6 100644
--- a/README.md
+++ b/README.md
@@ -1146,15 +1146,15 @@ This adds pgvector to the [Postgres image](https://hub.docker.com/_/postgres) (r
 Supported tags are:
 
 - `pg17-trixie`, `0.8.0-pg17-trixie`
-- `pg17`, `pg17-bookworm`, `0.8.0-pg17`, `0.8.0-pg17-bookworm`
+- `pg17-bookworm`, `0.8.0-pg17-bookworm`, `pg17`, `0.8.0-pg17`
 - `pg16-trixie`, `0.8.0-pg16-trixie`
-- `pg16`, `pg16-bookworm`, `0.8.0-pg16`, `0.8.0-pg16-bookworm`
+- `pg16-bookworm`, `0.8.0-pg16-bookworm`, `pg16`, `0.8.0-pg16`
 - `pg15-trixie`, `0.8.0-pg15-trixie`
-- `pg15`, `pg15-bookworm`, `0.8.0-pg15`, `0.8.0-pg15-bookworm`
+- `pg15-bookworm`, `0.8.0-pg15-bookworm`, `pg15`, `0.8.0-pg15`
 - `pg14-trixie`, `0.8.0-pg14-trixie`
-- `pg14`, `pg14-bookworm`, `0.8.0-pg14`, `0.8.0-pg14-bookworm`
+- `pg14-bookworm`, `0.8.0-pg14-bookworm`, `pg14`, `0.8.0-pg14`
 - `pg13-trixie`, `0.8.0-pg13-trixie`
-- `pg13`, `pg13-bookworm`, `0.8.0-pg13`, `0.8.0-pg13-bookworm`
+- `pg13-bookworm`, `0.8.0-pg13-bookworm`, `pg13`, `0.8.0-pg13`
 
 You can also build the image manually:
 

From 4bd530a2229f188a81efaee7c0165c9c7a86fbb9 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Fri, 29 Aug 2025 01:09:18 -0700
Subject: [PATCH 46/64] Added another test for binary_quantize function [skip
 ci]

---
 test/expected/halfvec.out     | 6 ++++++
 test/expected/vector_type.out | 6 ++++++
 test/sql/halfvec.sql          | 1 +
 test/sql/vector_type.sql      | 1 +
 4 files changed, 14 insertions(+)

diff --git a/test/expected/halfvec.out b/test/expected/halfvec.out
index 867dd176b..c3856c512 100644
--- a/test/expected/halfvec.out
+++ b/test/expected/halfvec.out
@@ -606,6 +606,12 @@ SELECT binary_quantize('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::halfvec)
  01001110101
 (1 row)
 
+SELECT binary_quantize('[1,2,3,-4,5,6,-7,8,1,-2,-3,4,5,-6,7,8,-1,2,3]'::halfvec);
+   binary_quantize   
+---------------------
+ 1110110110011011011
+(1 row)
+
 SELECT subvector('[1,2,3,4,5]'::halfvec, 1, 3);
  subvector 
 -----------
diff --git a/test/expected/vector_type.out b/test/expected/vector_type.out
index 674865822..f4c85d035 100644
--- a/test/expected/vector_type.out
+++ b/test/expected/vector_type.out
@@ -576,6 +576,12 @@ SELECT binary_quantize('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::vector);
  01001110101
 (1 row)
 
+SELECT binary_quantize('[1,2,3,-4,5,6,-7,8,1,-2,-3,4,5,-6,7,8,-1,2,3]'::vector);
+   binary_quantize   
+---------------------
+ 1110110110011011011
+(1 row)
+
 SELECT subvector('[1,2,3,4,5]'::vector, 1, 3);
  subvector 
 -----------
diff --git a/test/sql/halfvec.sql b/test/sql/halfvec.sql
index d94518348..744d03868 100644
--- a/test/sql/halfvec.sql
+++ b/test/sql/halfvec.sql
@@ -132,6 +132,7 @@ SELECT l2_normalize('[65504]'::halfvec);
 
 SELECT binary_quantize('[1,0,-1]'::halfvec);
 SELECT binary_quantize('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::halfvec);
+SELECT binary_quantize('[1,2,3,-4,5,6,-7,8,1,-2,-3,4,5,-6,7,8,-1,2,3]'::halfvec);
 
 SELECT subvector('[1,2,3,4,5]'::halfvec, 1, 3);
 SELECT subvector('[1,2,3,4,5]'::halfvec, 3, 2);
diff --git a/test/sql/vector_type.sql b/test/sql/vector_type.sql
index 088b040aa..086a39bcf 100644
--- a/test/sql/vector_type.sql
+++ b/test/sql/vector_type.sql
@@ -128,6 +128,7 @@ SELECT l2_normalize('[3e38]'::vector);
 
 SELECT binary_quantize('[1,0,-1]'::vector);
 SELECT binary_quantize('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::vector);
+SELECT binary_quantize('[1,2,3,-4,5,6,-7,8,1,-2,-3,4,5,-6,7,8,-1,2,3]'::vector);
 
 SELECT subvector('[1,2,3,4,5]'::vector, 1, 3);
 SELECT subvector('[1,2,3,4,5]'::vector, 3, 2);

From 429c3656428ea33dea520a76a8ad25a2547b1e66 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Thu, 4 Sep 2025 15:36:18 -0700
Subject: [PATCH 47/64] Updated changelog [skip ci]

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c85bcf03c..4b2aed351 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 ## 0.8.1 (unreleased)
 
-- Added support for Postgres 18
+- Added support for Postgres 18 rc1
 - Improved performance of `binary_quantize` function
 
 ## 0.8.0 (2024-10-30)

From e1146d4fb04efe3726e454acaf40c739c6a22db7 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Thu, 4 Sep 2025 15:58:23 -0700
Subject: [PATCH 48/64] Fixed compilation error with Postgres 19

---
 src/hnsw.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/hnsw.c b/src/hnsw.c
index 6a6db507f..1d56ef6ea 100644
--- a/src/hnsw.c
+++ b/src/hnsw.c
@@ -52,12 +52,20 @@ HnswInitLockTranche(void)
 								  sizeof(int) * 1,
 								  &found);
 	if (!found)
+	{
+#if PG_VERSION_NUM >= 190000
+		tranche_ids[0] = LWLockNewTrancheId("HnswBuild");
+#else
 		tranche_ids[0] = LWLockNewTrancheId();
+#endif
+	}
 	hnsw_lock_tranche_id = tranche_ids[0];
 	LWLockRelease(AddinShmemInitLock);
 
+#if PG_VERSION_NUM < 190000
 	/* Per-backend registration of the tranche ID */
 	LWLockRegisterTranche(hnsw_lock_tranche_id, "HnswBuild");
+#endif
 }
 
 /*

From b02ed17ab3f219fc5800ebd55205d2dbfb6790a9 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Thu, 4 Sep 2025 17:51:09 -0700
Subject: [PATCH 49/64] Version bump to 0.8.1 [skip ci]

---
 CHANGELOG.md                 |  2 +-
 Dockerfile                   |  2 +-
 META.json                    |  4 ++--
 Makefile                     |  2 +-
 Makefile.win                 |  2 +-
 README.md                    | 26 +++++++++++++-------------
 sql/vector--0.8.0--0.8.1.sql |  2 ++
 src/vector.c                 |  2 +-
 vector.control               |  2 +-
 9 files changed, 23 insertions(+), 21 deletions(-)
 create mode 100644 sql/vector--0.8.0--0.8.1.sql

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4b2aed351..8618d1afd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-## 0.8.1 (unreleased)
+## 0.8.1 (2025-09-04)
 
 - Added support for Postgres 18 rc1
 - Improved performance of `binary_quantize` function
diff --git a/Dockerfile b/Dockerfile
index 23682129e..7e06759d5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -5,7 +5,7 @@ ARG DEBIAN_CODENAME=bookworm
 FROM postgres:$PG_MAJOR-$DEBIAN_CODENAME
 ARG PG_MAJOR
 
-ADD https://github.com/pgvector/pgvector.git#v0.8.0 /tmp/pgvector
+ADD https://github.com/pgvector/pgvector.git#v0.8.1 /tmp/pgvector
 
 RUN apt-get update && \
 		apt-mark hold locales && \
diff --git a/META.json b/META.json
index b9a68f62a..343518d1c 100644
--- a/META.json
+++ b/META.json
@@ -2,7 +2,7 @@
 	"name": "vector",
 	"abstract": "Open-source vector similarity search for Postgres",
 	"description": "Supports L2 distance, inner product, and cosine distance",
-	"version": "0.8.0",
+	"version": "0.8.1",
 	"maintainer": [
 		"Andrew Kane <andrew@ankane.org>"
 	],
@@ -20,7 +20,7 @@
 		"vector": {
 			"file": "sql/vector.sql",
 			"docfile": "README.md",
-			"version": "0.8.0",
+			"version": "0.8.1",
 			"abstract": "Open-source vector similarity search for Postgres"
 		}
 	},
diff --git a/Makefile b/Makefile
index 5fb759ff3..e89ee1342 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 EXTENSION = vector
-EXTVERSION = 0.8.0
+EXTVERSION = 0.8.1
 
 MODULE_big = vector
 DATA = $(wildcard sql/*--*--*.sql)
diff --git a/Makefile.win b/Makefile.win
index 8c62f9d5f..1fee4a683 100644
--- a/Makefile.win
+++ b/Makefile.win
@@ -1,5 +1,5 @@
 EXTENSION = vector
-EXTVERSION = 0.8.0
+EXTVERSION = 0.8.1
 
 DATA_built = sql\$(EXTENSION)--$(EXTVERSION).sql
 OBJS = src\bitutils.obj src\bitvec.obj src\halfutils.obj src\halfvec.obj src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\sparsevec.obj src\vector.obj
diff --git a/README.md b/README.md
index b8259f4a6..dfd16ec64 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ Compile and install the extension (supports Postgres 13+)
 
 ```sh
 cd /tmp
-git clone --branch v0.8.0 https://github.com/pgvector/pgvector.git
+git clone --branch v0.8.1 https://github.com/pgvector/pgvector.git
 cd pgvector
 make
 make install # may need sudo
@@ -38,7 +38,7 @@ Ensure [C++ support in Visual Studio](https://learn.microsoft.com/en-us/cpp/buil
 ```cmd
 set "PGROOT=C:\Program Files\PostgreSQL\17"
 cd %TEMP%
-git clone --branch v0.8.0 https://github.com/pgvector/pgvector.git
+git clone --branch v0.8.1 https://github.com/pgvector/pgvector.git
 cd pgvector
 nmake /F Makefile.win
 nmake /F Makefile.win install
@@ -1145,21 +1145,21 @@ This adds pgvector to the [Postgres image](https://hub.docker.com/_/postgres) (r
 
 Supported tags are:
 
-- `pg17-trixie`, `0.8.0-pg17-trixie`
-- `pg17-bookworm`, `0.8.0-pg17-bookworm`, `pg17`, `0.8.0-pg17`
-- `pg16-trixie`, `0.8.0-pg16-trixie`
-- `pg16-bookworm`, `0.8.0-pg16-bookworm`, `pg16`, `0.8.0-pg16`
-- `pg15-trixie`, `0.8.0-pg15-trixie`
-- `pg15-bookworm`, `0.8.0-pg15-bookworm`, `pg15`, `0.8.0-pg15`
-- `pg14-trixie`, `0.8.0-pg14-trixie`
-- `pg14-bookworm`, `0.8.0-pg14-bookworm`, `pg14`, `0.8.0-pg14`
-- `pg13-trixie`, `0.8.0-pg13-trixie`
-- `pg13-bookworm`, `0.8.0-pg13-bookworm`, `pg13`, `0.8.0-pg13`
+- `pg17-trixie`, `0.8.1-pg17-trixie`
+- `pg17-bookworm`, `0.8.1-pg17-bookworm`, `pg17`, `0.8.1-pg17`
+- `pg16-trixie`, `0.8.1-pg16-trixie`
+- `pg16-bookworm`, `0.8.1-pg16-bookworm`, `pg16`, `0.8.1-pg16`
+- `pg15-trixie`, `0.8.1-pg15-trixie`
+- `pg15-bookworm`, `0.8.1-pg15-bookworm`, `pg15`, `0.8.1-pg15`
+- `pg14-trixie`, `0.8.1-pg14-trixie`
+- `pg14-bookworm`, `0.8.1-pg14-bookworm`, `pg14`, `0.8.1-pg14`
+- `pg13-trixie`, `0.8.1-pg13-trixie`
+- `pg13-bookworm`, `0.8.1-pg13-bookworm`, `pg13`, `0.8.1-pg13`
 
 You can also build the image manually:
 
 ```sh
-git clone --branch v0.8.0 https://github.com/pgvector/pgvector.git
+git clone --branch v0.8.1 https://github.com/pgvector/pgvector.git
 cd pgvector
 docker build --pull --build-arg PG_MAJOR=17 -t myuser/pgvector .
 ```
diff --git a/sql/vector--0.8.0--0.8.1.sql b/sql/vector--0.8.0--0.8.1.sql
new file mode 100644
index 000000000..547bd44a3
--- /dev/null
+++ b/sql/vector--0.8.0--0.8.1.sql
@@ -0,0 +1,2 @@
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION vector UPDATE TO '0.8.1'" to load this file. \quit
diff --git a/src/vector.c b/src/vector.c
index 0630cf811..03f70d601 100644
--- a/src/vector.c
+++ b/src/vector.c
@@ -36,7 +36,7 @@
 #endif
 
 #if PG_VERSION_NUM >= 180000
-PG_MODULE_MAGIC_EXT(.name = "vector",.version = "0.8.0");
+PG_MODULE_MAGIC_EXT(.name = "vector",.version = "0.8.1");
 #else
 PG_MODULE_MAGIC;
 #endif
diff --git a/vector.control b/vector.control
index 7bfc0f1fa..2ad02286a 100644
--- a/vector.control
+++ b/vector.control
@@ -1,4 +1,4 @@
 comment = 'vector data type and ivfflat and hnsw access methods'
-default_version = '0.8.0'
+default_version = '0.8.1'
 module_pathname = '$libdir/vector'
 relocatable = true

From c6c38de4ce881187f4816abefb2e3af5513b51f0 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Fri, 5 Sep 2025 16:41:22 -0700
Subject: [PATCH 50/64] Added windows-2025 to CI [skip ci]

---
 .github/workflows/build.yml | 14 +++++++++++---
 Makefile.win                |  5 ++++-
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index ff462fb38..a52b5fa28 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -113,18 +113,26 @@ jobs:
           LLVM_VERSION: ${{ matrix.os == 'macos-15' && 18 || 15 }}
           PG_CFLAGS: -DUSE_ASSERT_CHECKING
   windows:
-    runs-on: windows-latest
+    runs-on: ${{ matrix.os }}
     if: ${{ !startsWith(github.ref_name, 'mac') }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - postgres: 17
+            os: windows-2025
+          - postgres: 14
+            os: windows-2022
     steps:
       - uses: actions/checkout@v4
       - uses: ankane/setup-postgres@v1
         with:
-          postgres-version: 14
+          postgres-version: ${{ matrix.postgres }}
       - run: |
           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" && ^
           nmake /NOLOGO /F Makefile.win && ^
           nmake /NOLOGO /F Makefile.win install && ^
-          nmake /NOLOGO /F Makefile.win installcheck && ^
+          nmake /NOLOGO /F Makefile.win installcheck ${{ matrix.postgres != 17 && 'PG_REGRESS=$(PGROOT)\bin\pg_regress' || '' }} && ^
           nmake /NOLOGO /F Makefile.win clean && ^
           nmake /NOLOGO /F Makefile.win uninstall
         shell: cmd
diff --git a/Makefile.win b/Makefile.win
index 1fee4a683..46ab3da64 100644
--- a/Makefile.win
+++ b/Makefile.win
@@ -31,6 +31,9 @@ LIBDIR = $(PGROOT)\lib
 PKGLIBDIR = $(PGROOT)\lib
 SHAREDIR = $(PGROOT)\share
 
+# Use $(PGROOT)\bin\pg_regress for Postgres < 17
+PG_REGRESS = $(LIBDIR)\pgxs\src\test\regress\pg_regress
+
 CFLAGS = /nologo /I"$(INCLUDEDIR_SERVER)\port\win32_msvc" /I"$(INCLUDEDIR_SERVER)\port\win32" /I"$(INCLUDEDIR_SERVER)" /I"$(INCLUDEDIR)"
 
 CFLAGS = $(CFLAGS) $(PG_CFLAGS)
@@ -58,7 +61,7 @@ install: all
 	for %f in ($(HEADERS)) do copy %f "$(INCLUDEDIR_SERVER)\extension\$(EXTENSION)"
 
 installcheck:
-	"$(BINDIR)\pg_regress" --bindir="$(BINDIR)" $(REGRESS_OPTS) $(REGRESS)
+	"$(PG_REGRESS)" --bindir="$(BINDIR)" $(REGRESS_OPTS) $(REGRESS)
 
 uninstall:
 	del /f "$(PKGLIBDIR)\$(SHLIB)"

From b0d989b0d9e80734e05180c102f0d7c7b5449f15 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Fri, 5 Sep 2025 17:03:37 -0700
Subject: [PATCH 51/64] Updated CI [skip ci]

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index a52b5fa28..7215c0cab 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -104,7 +104,7 @@ jobs:
           tar xf $TAG.tar.gz
           mv postgres-$TAG postgres
         env:
-          TAG: ${{ matrix.postgres == 17 && 'REL_17_2' || 'REL_14_15' }}
+          TAG: ${{ matrix.postgres == 17 && 'REL_17_6' || 'REL_14_19' }}
       - run: make prove_installcheck PROVE_FLAGS="-I ./postgres/src/test/perl -I ./test/perl"
         env:
           PERL5LIB: /Users/runner/perl5/lib/perl5

From 04f3ea2c228601f056fe4435f5c4743ce8c5058d Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Fri, 19 Sep 2025 18:34:51 -0700
Subject: [PATCH 52/64] Test with macos-15-intel on CI [skip ci]

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 7215c0cab..0ebb1fa39 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -83,7 +83,7 @@ jobs:
           - postgres: 17
             os: macos-15
           - postgres: 14
-            os: macos-13
+            os: macos-15-intel
     steps:
       - uses: actions/checkout@v4
       - uses: ankane/setup-postgres@v1

From 6c8d31895d06c9bf8bf46314cded4f5364a1afdb Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Fri, 19 Sep 2025 19:16:08 -0700
Subject: [PATCH 53/64] Updated CI [skip ci]

---
 .github/workflows/build.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 0ebb1fa39..6032cdfb5 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -81,7 +81,7 @@ jobs:
       matrix:
         include:
           - postgres: 17
-            os: macos-15
+            os: macos-26
           - postgres: 14
             os: macos-15-intel
     steps:
@@ -110,7 +110,7 @@ jobs:
           PERL5LIB: /Users/runner/perl5/lib/perl5
       - run: make clean && $(brew --prefix llvm@$LLVM_VERSION)/bin/scan-build --status-bugs make
         env:
-          LLVM_VERSION: ${{ matrix.os == 'macos-15' && 18 || 15 }}
+          LLVM_VERSION: ${{ matrix.os == 'macos-26' && 20 || 18 }}
           PG_CFLAGS: -DUSE_ASSERT_CHECKING
   windows:
     runs-on: ${{ matrix.os }}

From 0e8b7c46dc4fb6a502c2a126e7c3a4356753dbb7 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Mon, 22 Sep 2025 11:55:01 -0700
Subject: [PATCH 54/64] Updated checkout action [skip ci]

---
 .github/workflows/build.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 6032cdfb5..25be06acc 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -23,7 +23,7 @@ jobs:
           - postgres: 13
             os: ubuntu-22.04
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
       - uses: ankane/setup-postgres@v1
         with:
           postgres-version: ${{ matrix.postgres }}
@@ -85,7 +85,7 @@ jobs:
           - postgres: 14
             os: macos-15-intel
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
       - uses: ankane/setup-postgres@v1
         with:
           postgres-version: ${{ matrix.postgres }}
@@ -124,7 +124,7 @@ jobs:
           - postgres: 14
             os: windows-2022
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
       - uses: ankane/setup-postgres@v1
         with:
           postgres-version: ${{ matrix.postgres }}
@@ -165,7 +165,7 @@ jobs:
     if: ${{ !startsWith(github.ref_name, 'mac') && !startsWith(github.ref_name, 'windows') }}
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
       - uses: ankane/setup-postgres-valgrind@v1
         with:
           postgres-version: 17

From 5d70fe2fe27ac0d14ab3a813b8995cb834211e01 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Thu, 25 Sep 2025 15:46:28 -0700
Subject: [PATCH 55/64] Updated readme for Postgres 18 [skip ci]

---
 README.md | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index dfd16ec64..f6607151d 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ You can also install it with [Docker](#docker), [Homebrew](#homebrew), [PGXN](#p
 Ensure [C++ support in Visual Studio](https://learn.microsoft.com/en-us/cpp/build/building-on-the-command-line?view=msvc-170#download-and-install-the-tools) is installed and run `x64 Native Tools Command Prompt for VS [version]` as administrator. Then use `nmake` to build:
 
 ```cmd
-set "PGROOT=C:\Program Files\PostgreSQL\17"
+set "PGROOT=C:\Program Files\PostgreSQL\18"
 cd %TEMP%
 git clone --branch v0.8.1 https://github.com/pgvector/pgvector.git
 cd pgvector
@@ -1064,7 +1064,7 @@ l2_normalize(sparsevec) → sparsevec | Normalize with Euclidean norm | 0.7.0
 If your machine has multiple Postgres installations, specify the path to [pg_config](https://www.postgresql.org/docs/current/app-pgconfig.html) with:
 
 ```sh
-export PG_CONFIG=/Library/PostgreSQL/17/bin/pg_config
+export PG_CONFIG=/Library/PostgreSQL/18/bin/pg_config
 ```
 
 Then re-run the installation instructions (run `make clean` before `make` if needed). If `sudo` is needed for `make install`, use:
@@ -1075,11 +1075,11 @@ sudo --preserve-env=PG_CONFIG make install
 
 A few common paths on Mac are:
 
-- EDB installer - `/Library/PostgreSQL/17/bin/pg_config`
-- Homebrew (arm64) - `/opt/homebrew/opt/postgresql@17/bin/pg_config`
-- Homebrew (x86-64) - `/usr/local/opt/postgresql@17/bin/pg_config`
+- EDB installer - `/Library/PostgreSQL/18/bin/pg_config`
+- Homebrew (arm64) - `/opt/homebrew/opt/postgresql@18/bin/pg_config`
+- Homebrew (x86-64) - `/usr/local/opt/postgresql@18/bin/pg_config`
 
-Note: Replace `17` with your Postgres server version
+Note: Replace `18` with your Postgres server version
 
 ### Missing Header
 
@@ -1088,10 +1088,10 @@ If compilation fails with `fatal error: postgres.h: No such file or directory`,
 For Ubuntu and Debian, use:
 
 ```sh
-sudo apt install postgresql-server-dev-17
+sudo apt install postgresql-server-dev-18
 ```
 
-Note: Replace `17` with your Postgres server version
+Note: Replace `18` with your Postgres server version
 
 ### Missing SDK
 
@@ -1193,22 +1193,22 @@ pgxn install vector
 Debian and Ubuntu packages are available from the [PostgreSQL APT Repository](https://wiki.postgresql.org/wiki/Apt). Follow the [setup instructions](https://wiki.postgresql.org/wiki/Apt#Quickstart) and run:
 
 ```sh
-sudo apt install postgresql-17-pgvector
+sudo apt install postgresql-18-pgvector
 ```
 
-Note: Replace `17` with your Postgres server version
+Note: Replace `18` with your Postgres server version
 
 ### Yum
 
 RPM packages are available from the [PostgreSQL Yum Repository](https://yum.postgresql.org/). Follow the [setup instructions](https://www.postgresql.org/download/linux/redhat/) for your distribution and run:
 
 ```sh
-sudo yum install pgvector_17
+sudo yum install pgvector_18
 # or
-sudo dnf install pgvector_17
+sudo dnf install pgvector_18
 ```
 
-Note: Replace `17` with your Postgres server version
+Note: Replace `18` with your Postgres server version
 
 ### pkg
 

From 3bb30ed74c155b339d74641c07aa2a6a5aa06ae2 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Thu, 25 Sep 2025 15:54:37 -0700
Subject: [PATCH 56/64] Updated CI for Postgres 18 [skip ci]

---
 .github/workflows/build.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 25be06acc..4e72e2a19 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -80,7 +80,7 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - postgres: 17
+          - postgres: 18
             os: macos-26
           - postgres: 14
             os: macos-15-intel
@@ -91,7 +91,7 @@ jobs:
           postgres-version: ${{ matrix.postgres }}
       - run: make
         env:
-          PG_CFLAGS: -DUSE_ASSERT_CHECKING -Wall -Wextra -Werror -Wno-unused-parameter -Wno-unknown-warning-option
+          PG_CFLAGS: -DUSE_ASSERT_CHECKING -Wall -Wextra -Werror -Wno-unused-parameter -Wno-unknown-warning-option ${{ matrix.postgres >= 18 && '-Wno-missing-field-initializers' || '' }}
       - run: make install
       - run: make installcheck
       - if: ${{ failure() }}
@@ -104,7 +104,7 @@ jobs:
           tar xf $TAG.tar.gz
           mv postgres-$TAG postgres
         env:
-          TAG: ${{ matrix.postgres == 17 && 'REL_17_6' || 'REL_14_19' }}
+          TAG: ${{ matrix.postgres == 18 && 'REL_18_0' || 'REL_14_19' }}
       - run: make prove_installcheck PROVE_FLAGS="-I ./postgres/src/test/perl -I ./test/perl"
         env:
           PERL5LIB: /Users/runner/perl5/lib/perl5
@@ -168,7 +168,7 @@ jobs:
       - uses: actions/checkout@v5
       - uses: ankane/setup-postgres-valgrind@v1
         with:
-          postgres-version: 17
+          postgres-version: 18
           check-ub: yes
       - run: make OPTFLAGS=""
       - run: sudo --preserve-env=PG_CONFIG make install

From 410a616a840fbe99f3f6034cd9e48afded395f41 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Thu, 25 Sep 2025 15:55:30 -0700
Subject: [PATCH 57/64] Updated style to be consistent with Mac [skip ci]

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 4e72e2a19..9f541f131 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -30,7 +30,7 @@ jobs:
           dev-files: true
       - run: make
         env:
-          PG_CFLAGS: ${{ matrix.postgres >= 18 && '-Wno-missing-field-initializers' || '' }} -DUSE_ASSERT_CHECKING -Wall -Wextra -Werror -Wno-unused-parameter -Wno-sign-compare
+          PG_CFLAGS: -DUSE_ASSERT_CHECKING -Wall -Wextra -Werror -Wno-unused-parameter -Wno-sign-compare ${{ matrix.postgres >= 18 && '-Wno-missing-field-initializers' || '' }}
       - run: |
           export PG_CONFIG=`which pg_config`
           sudo --preserve-env=PG_CONFIG make install

From 85fb52ebef58b3f0266eca49f7fef1e8611c458e Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Thu, 25 Sep 2025 20:00:10 -0700
Subject: [PATCH 58/64] Added Docker images for Postgres 18 [skip ci]

---
 README.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index f6607151d..85e98e09a 100644
--- a/README.md
+++ b/README.md
@@ -1138,13 +1138,15 @@ If installation fails with `Access is denied`, re-run the installation instructi
 Get the [Docker image](https://hub.docker.com/r/pgvector/pgvector) with:
 
 ```sh
-docker pull pgvector/pgvector:pg17-trixie
+docker pull pgvector/pgvector:pg18-trixie
 ```
 
-This adds pgvector to the [Postgres image](https://hub.docker.com/_/postgres) (replace `17` with your Postgres server version, and run it the same way).
+This adds pgvector to the [Postgres image](https://hub.docker.com/_/postgres) (replace `18` with your Postgres server version, and run it the same way).
 
 Supported tags are:
 
+- `pg18-trixie`, `0.8.1-pg18-trixie`
+- `pg18-bookworm`, `0.8.1-pg18-bookworm`, `pg18`, `0.8.1-pg18`
 - `pg17-trixie`, `0.8.1-pg17-trixie`
 - `pg17-bookworm`, `0.8.1-pg17-bookworm`, `pg17`, `0.8.1-pg17`
 - `pg16-trixie`, `0.8.1-pg16-trixie`
@@ -1161,7 +1163,7 @@ You can also build the image manually:
 ```sh
 git clone --branch v0.8.1 https://github.com/pgvector/pgvector.git
 cd pgvector
-docker build --pull --build-arg PG_MAJOR=17 -t myuser/pgvector .
+docker build --pull --build-arg PG_MAJOR=18 -t myuser/pgvector .
 ```
 
 If you increase `maintenance_work_mem`, make sure `--shm-size` is at least that size to avoid an error with parallel HNSW index builds.

From 615beccfe509f941ee91b4242f3acc6828c4a7a5 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Fri, 26 Sep 2025 13:02:37 -0700
Subject: [PATCH 59/64] Updated readme [skip ci]

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 85e98e09a..b8d06279c 100644
--- a/README.md
+++ b/README.md
@@ -1180,7 +1180,7 @@ With Homebrew Postgres, you can use:
 brew install pgvector
 ```
 
-Note: This only adds it to the `postgresql@17` and `postgresql@14` formulas
+Note: This only adds it to the `postgresql@18` and `postgresql@17` formulas
 
 ### PGXN
 

From 990b24771399c48ba6d2fd13d1ddcd0b6fad8a30 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 27 Sep 2025 15:51:32 -0700
Subject: [PATCH 60/64] Removed unused parameters [skip ci]

---
 src/hnswbuild.c  |  8 ++++----
 src/hnswinsert.c |  4 ++--
 src/ivfbuild.c   | 10 +++++-----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/hnswbuild.c b/src/hnswbuild.c
index b667478b6..08d8d5641 100644
--- a/src/hnswbuild.c
+++ b/src/hnswbuild.c
@@ -398,7 +398,7 @@ UpdateNeighborsInMemory(char *base, HnswSupport * support, HnswElement e, int m)
  * Update graph in memory
  */
 static void
-UpdateGraphInMemory(HnswSupport * support, HnswElement element, int m, int efConstruction, HnswElement entryPoint, HnswBuildState * buildstate)
+UpdateGraphInMemory(HnswSupport * support, HnswElement element, int m, HnswElement entryPoint, HnswBuildState * buildstate)
 {
 	HnswGraph  *graph = buildstate->graph;
 	char	   *base = buildstate->hnswarea;
@@ -460,7 +460,7 @@ InsertTupleInMemory(HnswBuildState * buildstate, HnswElement element)
 	HnswFindElementNeighbors(base, element, entryPoint, NULL, support, m, efConstruction, false);
 
 	/* Update graph in memory */
-	UpdateGraphInMemory(support, element, m, efConstruction, entryPoint, buildstate);
+	UpdateGraphInMemory(support, element, m, entryPoint, buildstate);
 
 	/* Release entry lock */
 	LWLockRelease(entryLock);
@@ -1054,7 +1054,7 @@ ComputeParallelWorkers(Relation heap, Relation index)
  * Build graph
  */
 static void
-BuildGraph(HnswBuildState * buildstate, ForkNumber forkNum)
+BuildGraph(HnswBuildState * buildstate)
 {
 	int			parallel_workers = 0;
 
@@ -1102,7 +1102,7 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
 
 	InitBuildState(buildstate, heap, index, indexInfo, forkNum);
 
-	BuildGraph(buildstate, forkNum);
+	BuildGraph(buildstate);
 
 	if (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM)
 		log_newpage_range(index, forkNum, 0, RelationGetNumberOfBlocksInFork(index, forkNum), true);
diff --git a/src/hnswinsert.c b/src/hnswinsert.c
index a5fac4eda..8adfaff01 100644
--- a/src/hnswinsert.c
+++ b/src/hnswinsert.c
@@ -660,7 +660,7 @@ FindDuplicateOnDisk(Relation index, HnswElement element, bool building)
  * Update graph on disk
  */
 static void
-UpdateGraphOnDisk(Relation index, HnswSupport * support, HnswElement element, int m, int efConstruction, HnswElement entryPoint, bool building)
+UpdateGraphOnDisk(Relation index, HnswSupport * support, HnswElement element, int m, HnswElement entryPoint, bool building)
 {
 	BlockNumber newInsertPage = InvalidBlockNumber;
 
@@ -728,7 +728,7 @@ HnswInsertTupleOnDisk(Relation index, HnswSupport * support, Datum value, ItemPo
 	HnswFindElementNeighbors(base, element, entryPoint, index, support, m, efConstruction, false);
 
 	/* Update graph on disk */
-	UpdateGraphOnDisk(index, support, element, m, efConstruction, entryPoint, building);
+	UpdateGraphOnDisk(index, support, element, m, entryPoint, building);
 
 	/* Release lock */
 	UnlockPage(index, HNSW_UPDATE_LOCK, lockmode);
diff --git a/src/ivfbuild.c b/src/ivfbuild.c
index c51f40884..80433d832 100644
--- a/src/ivfbuild.c
+++ b/src/ivfbuild.c
@@ -138,7 +138,7 @@ SampleRows(IvfflatBuildState * buildstate)
  * Add tuple to sort
  */
 static void
-AddTupleToSort(Relation index, ItemPointer tid, Datum *values, IvfflatBuildState * buildstate)
+AddTupleToSort(ItemPointer tid, Datum *values, IvfflatBuildState * buildstate)
 {
 	double		distance;
 	double		minDistance = DBL_MAX;
@@ -215,7 +215,7 @@ BuildCallback(Relation index, ItemPointer tid, Datum *values,
 	oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
 
 	/* Add tuple to sort */
-	AddTupleToSort(index, tid, values, buildstate);
+	AddTupleToSort(tid, values, buildstate);
 
 	/* Reset memory context */
 	MemoryContextSwitchTo(oldCtx);
@@ -470,8 +470,8 @@ CreateMetaPage(Relation index, int dimensions, int lists, ForkNumber forkNum)
  * Create list pages
  */
 static void
-CreateListPages(Relation index, VectorArray centers, int dimensions,
-				int lists, ForkNumber forkNum, ListInfo * *listInfo)
+CreateListPages(Relation index, VectorArray centers, int lists,
+				ForkNumber forkNum, ListInfo * *listInfo)
 {
 	Buffer		buf;
 	Page		page;
@@ -1004,7 +1004,7 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
 
 	/* Create pages */
 	CreateMetaPage(index, buildstate->dimensions, buildstate->lists, forkNum);
-	CreateListPages(index, buildstate->centers, buildstate->dimensions, buildstate->lists, forkNum, &buildstate->listInfo);
+	CreateListPages(index, buildstate->centers, buildstate->lists, forkNum, &buildstate->listInfo);
 	CreateEntryPages(buildstate, forkNum);
 
 	/* Write WAL for initialization fork since GenericXLog functions do not */

From e11534985e5e55312d3770318b1f2c435cf2e6cc Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Sat, 27 Sep 2025 15:58:18 -0700
Subject: [PATCH 61/64] Removed unused parameter [skip ci]

---
 src/ivfinsert.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ivfinsert.c b/src/ivfinsert.c
index 014c9be82..ebc61e49d 100644
--- a/src/ivfinsert.c
+++ b/src/ivfinsert.c
@@ -65,7 +65,7 @@ FindInsertPage(Relation index, Datum *values, BlockNumber *insertPage, ListInfo
  * Insert a tuple into the index
  */
 static void
-InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heapRel)
+InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid)
 {
 	const		IvfflatTypeInfo *typeInfo = IvfflatGetTypeInfo(index);
 	IndexTuple	itup;
@@ -204,7 +204,7 @@ ivfflatinsert(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid,
 	oldCtx = MemoryContextSwitchTo(insertCtx);
 
 	/* Insert tuple */
-	InsertTuple(index, values, isnull, heap_tid, heap);
+	InsertTuple(index, values, isnull, heap_tid);
 
 	/* Delete memory context */
 	MemoryContextSwitchTo(oldCtx);

From 2393c55b8ae5678f16105fedfef9b25582275ad0 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Wed, 22 Oct 2025 11:47:11 -0700
Subject: [PATCH 62/64] Fixed Index Searches in EXPLAIN output for Postgres 18

---
 CHANGELOG.md   | 4 ++++
 src/hnswscan.c | 4 ++++
 src/ivfscan.c  | 4 ++++
 3 files changed, 12 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8618d1afd..a3515593f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 0.8.2 (unreleased)
+
+- Fixed `Index Searches` in `EXPLAIN` output for Postgres 18
+
 ## 0.8.1 (2025-09-04)
 
 - Added support for Postgres 18 rc1
diff --git a/src/hnswscan.c b/src/hnswscan.c
index 955998a52..5c526f4a6 100644
--- a/src/hnswscan.c
+++ b/src/hnswscan.c
@@ -193,6 +193,10 @@ hnswgettuple(IndexScanDesc scan, ScanDirection dir)
 
 		/* Count index scan for stats */
 		pgstat_count_index_scan(scan->indexRelation);
+#if PG_VERSION_NUM >= 180000
+		if (scan->instrument)
+			scan->instrument->nsearches++;
+#endif
 
 		/* Safety check */
 		if (scan->orderByData == NULL)
diff --git a/src/ivfscan.c b/src/ivfscan.c
index 6cc5d2efd..b42ada15d 100644
--- a/src/ivfscan.c
+++ b/src/ivfscan.c
@@ -355,6 +355,10 @@ ivfflatgettuple(IndexScanDesc scan, ScanDirection dir)
 
 		/* Count index scan for stats */
 		pgstat_count_index_scan(scan->indexRelation);
+#if PG_VERSION_NUM >= 180000
+		if (scan->instrument)
+			scan->instrument->nsearches++;
+#endif
 
 		/* Safety check */
 		if (scan->orderByData == NULL)

From 80027b669f2dae1fcdb597cd286fcfc3cc67fe7e Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Wed, 22 Oct 2025 11:56:05 -0700
Subject: [PATCH 63/64] Added varatt headers for Postgres 16+

---
 src/hnswbuild.c  | 4 ++++
 src/hnswinsert.c | 4 ++++
 src/hnswutils.c  | 4 ++++
 src/hnswvacuum.c | 4 ++++
 src/ivfbuild.c   | 4 ++++
 src/ivfflat.h    | 4 ++++
 src/ivfkmeans.c  | 4 ++++
 7 files changed, 28 insertions(+)

diff --git a/src/hnswbuild.c b/src/hnswbuild.c
index 08d8d5641..03f0ef4a4 100644
--- a/src/hnswbuild.c
+++ b/src/hnswbuild.c
@@ -54,6 +54,10 @@
 #include "utils/datum.h"
 #include "utils/memutils.h"
 
+#if PG_VERSION_NUM >= 160000
+#include "varatt.h"
+#endif
+
 #if PG_VERSION_NUM >= 140000
 #include "utils/backend_progress.h"
 #else
diff --git a/src/hnswinsert.c b/src/hnswinsert.c
index 8adfaff01..a4d288506 100644
--- a/src/hnswinsert.c
+++ b/src/hnswinsert.c
@@ -9,6 +9,10 @@
 #include "utils/datum.h"
 #include "utils/memutils.h"
 
+#if PG_VERSION_NUM >= 160000
+#include "varatt.h"
+#endif
+
 /*
  * Get the insert page
  */
diff --git a/src/hnswutils.c b/src/hnswutils.c
index c52d2c78a..8e2a42c1d 100644
--- a/src/hnswutils.c
+++ b/src/hnswutils.c
@@ -15,6 +15,10 @@
 #include "utils/memdebug.h"
 #include "utils/rel.h"
 
+#if PG_VERSION_NUM >= 160000
+#include "varatt.h"
+#endif
+
 #if PG_VERSION_NUM < 170000
 static inline uint64
 murmurhash64(uint64 data)
diff --git a/src/hnswvacuum.c b/src/hnswvacuum.c
index 2f7b2f372..3a8ee26c0 100644
--- a/src/hnswvacuum.c
+++ b/src/hnswvacuum.c
@@ -9,6 +9,10 @@
 #include "storage/lmgr.h"
 #include "utils/memutils.h"
 
+#if PG_VERSION_NUM >= 160000
+#include "varatt.h"
+#endif
+
 #if PG_VERSION_NUM >= 180000
 #define vacuum_delay_point() vacuum_delay_point(false)
 #endif
diff --git a/src/ivfbuild.c b/src/ivfbuild.c
index 80433d832..7166b7da3 100644
--- a/src/ivfbuild.c
+++ b/src/ivfbuild.c
@@ -20,6 +20,10 @@
 #include "utils/memutils.h"
 #include "vector.h"
 
+#if PG_VERSION_NUM >= 160000
+#include "varatt.h"
+#endif
+
 #if PG_VERSION_NUM >= 140000
 #include "utils/backend_progress.h"
 #else
diff --git a/src/ivfflat.h b/src/ivfflat.h
index 76608f334..c4e64e1dc 100644
--- a/src/ivfflat.h
+++ b/src/ivfflat.h
@@ -13,6 +13,10 @@
 #include "utils/tuplesort.h"
 #include "vector.h"
 
+#if PG_VERSION_NUM >= 160000
+#include "varatt.h"
+#endif
+
 #if PG_VERSION_NUM >= 150000
 #include "common/pg_prng.h"
 #endif
diff --git a/src/ivfkmeans.c b/src/ivfkmeans.c
index 4b6d14f1a..9c79e64ad 100644
--- a/src/ivfkmeans.c
+++ b/src/ivfkmeans.c
@@ -13,6 +13,10 @@
 #include "utils/memutils.h"
 #include "vector.h"
 
+#if PG_VERSION_NUM >= 160000
+#include "varatt.h"
+#endif
+
 /*
  * Initialize with kmeans++
  *

From 832b4b79b987c094a17fb420a54230cc9e4f69e2 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Wed, 22 Oct 2025 13:10:58 -0700
Subject: [PATCH 64/64] Updated readme [skip ci]

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b8d06279c..0f7ee7a51 100644
--- a/README.md
+++ b/README.md
@@ -812,7 +812,7 @@ Yes, pgvector uses the write-ahead log (WAL), which allows for replication and p
 
 #### What if I want to index vectors with more than 2,000 dimensions?
 
-You can use [half-precision indexing](#half-precision-indexing) to index up to 4,000 dimensions or [binary quantization](#binary-quantization) to index up to 64,000 dimensions. Another option is [dimensionality reduction](https://en.wikipedia.org/wiki/Dimensionality_reduction).
+You can use [half-precision vectors](#half-precision-vectors) or [half-precision indexing](#half-precision-indexing) to index up to 4,000 dimensions or [binary quantization](#binary-quantization) to index up to 64,000 dimensions. Other options are [indexing subvectors](#indexing-subvectors) (for models that support it) or [dimensionality reduction](https://en.wikipedia.org/wiki/Dimensionality_reduction).
 
 #### Can I store vectors with different dimensions in the same column?