Skip to content

Commit d8fc1bb

Browse files
authored
Merge pull request #665 from utopia-php/feat/postgresql-vector-support
Add vector attribute support for PostgreSQL with pgvector extension
2 parents 141338a + f06ef55 commit d8fc1bb

27 files changed

+3567
-270
lines changed

postgres.dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ RUN apt-get update \
44
&& apt-get install -y --no-install-recommends \
55
postgresql-16-postgis-3 \
66
postgresql-16-postgis-3-scripts \
7+
postgresql-16-pgvector \
78
&& rm -rf /var/lib/apt/lists/*

src/Database/Adapter.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,6 +1021,13 @@ abstract public function getSupportForGetConnectionId(): bool;
10211021
*/
10221022
abstract public function getSupportForUpserts(): bool;
10231023

1024+
/**
1025+
* Is vector type supported?
1026+
*
1027+
* @return bool
1028+
*/
1029+
abstract public function getSupportForVectors(): bool;
1030+
10241031
/**
10251032
* Is Cache Fallback supported?
10261033
*

src/Database/Adapter/MariaDB.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ public function createCollection(string $name, array $attributes = [], array $in
139139
$indexAttributes[$nested] = "`{$indexAttribute}`{$indexLength} {$indexOrder}";
140140

141141
if (!empty($hash[$indexAttribute]['array']) && $this->getSupportForCastIndexArray()) {
142-
$indexAttributes[$nested] = '(CAST(`' . $indexAttribute . '` AS char(' . Database::ARRAY_INDEX_LENGTH . ') ARRAY))';
142+
$indexAttributes[$nested] = '(CAST(`' . $indexAttribute . '` AS char(' . Database::MAX_ARRAY_INDEX_LENGTH . ') ARRAY))';
143143
}
144144
}
145145

@@ -746,7 +746,7 @@ public function createIndex(string $collection, string $id, string $type, array
746746
$attributes[$i] = "`{$attr}`{$length} {$order}";
747747

748748
if ($this->getSupportForCastIndexArray() && !empty($attribute['array'])) {
749-
$attributes[$i] = '(CAST(`' . $attr . '` AS char(' . Database::ARRAY_INDEX_LENGTH . ') ARRAY))';
749+
$attributes[$i] = '(CAST(`' . $attr . '` AS char(' . Database::MAX_ARRAY_INDEX_LENGTH . ') ARRAY))';
750750
}
751751
}
752752

@@ -1890,7 +1890,7 @@ public function getSupportForDistanceBetweenMultiDimensionGeometryInMeters(): bo
18901890

18911891
public function getSpatialSQLType(string $type, bool $required): string
18921892
{
1893-
$srid = Database::SRID;
1893+
$srid = Database::DEFAULT_SRID;
18941894
$nullability = '';
18951895

18961896
if (!$this->getSupportForSpatialIndexNull()) {

src/Database/Adapter/MySQL.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ protected function handleDistanceSpatialQueries(Query $query, array &$binds, str
117117
}
118118

119119
if ($useMeters) {
120-
$attr = "ST_SRID({$alias}.{$attribute}, " . Database::SRID . ")";
120+
$attr = "ST_SRID({$alias}.{$attribute}, " . Database::DEFAULT_SRID . ")";
121121
$geom = $this->getSpatialGeomFromText(":{$placeholder}_0", null);
122122
return "ST_Distance({$attr}, {$geom}, 'metre') {$operator} :{$placeholder}_1";
123123
}

src/Database/Adapter/Pool.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,11 @@ public function getSupportForUpserts(): bool
405405
return $this->delegate(__FUNCTION__, \func_get_args());
406406
}
407407

408+
public function getSupportForVectors(): bool
409+
{
410+
return $this->delegate(__FUNCTION__, \func_get_args());
411+
}
412+
408413
public function getSupportForCacheSkipOnFailure(): bool
409414
{
410415
return $this->delegate(__FUNCTION__, \func_get_args());

src/Database/Adapter/Postgres.php

Lines changed: 99 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -147,15 +147,16 @@ public function create(string $name): bool
147147
->prepare($sql)
148148
->execute();
149149

150-
// extension for supporting spatial types
151-
$this->getPDO()->prepare('CREATE EXTENSION IF NOT EXISTS postgis;')->execute();
150+
// Enable extensions
151+
$this->getPDO()->prepare('CREATE EXTENSION IF NOT EXISTS postgis')->execute();
152+
$this->getPDO()->prepare('CREATE EXTENSION IF NOT EXISTS vector')->execute();
152153

153154
$collation = "
154155
CREATE COLLATION IF NOT EXISTS utf8_ci_ai (
155156
provider = icu,
156157
locale = 'und-u-ks-level1',
157158
deterministic = false
158-
);
159+
)
159160
";
160161
$this->getPDO()->prepare($collation)->execute();
161162
return $dbCreation;
@@ -193,9 +194,6 @@ public function createCollection(string $name, array $attributes = [], array $in
193194
$namespace = $this->getNamespace();
194195
$id = $this->filter($name);
195196

196-
/** @var array<string> $attributeStrings */
197-
$attributeStrings = [];
198-
199197
/** @var array<string> $attributeStrings */
200198
$attributeStrings = [];
201199
foreach ($attributes as $attribute) {
@@ -443,6 +441,16 @@ public function analyzeCollection(string $collection): bool
443441
*/
444442
public function createAttribute(string $collection, string $id, string $type, int $size, bool $signed = true, bool $array = false, bool $required = false): bool
445443
{
444+
// Ensure pgvector extension is installed for vector types
445+
if ($type === Database::VAR_VECTOR) {
446+
if ($size <= 0) {
447+
throw new DatabaseException('Vector dimensions must be a positive integer');
448+
}
449+
if ($size > Database::MAX_VECTOR_DIMENSIONS) {
450+
throw new DatabaseException('Vector dimensions cannot exceed ' . Database::MAX_VECTOR_DIMENSIONS);
451+
}
452+
}
453+
446454
$name = $this->filter($collection);
447455
$id = $this->filter($id);
448456
$type = $this->getSQLType($type, $size, $signed, $array, $required);
@@ -543,7 +551,23 @@ public function updateAttribute(string $collection, string $id, string $type, in
543551
$name = $this->filter($collection);
544552
$id = $this->filter($id);
545553
$newKey = empty($newKey) ? null : $this->filter($newKey);
546-
$type = $this->getSQLType($type, $size, $signed, $array, $required);
554+
555+
if ($type === Database::VAR_VECTOR) {
556+
if ($size <= 0) {
557+
throw new DatabaseException('Vector dimensions must be a positive integer');
558+
}
559+
if ($size > Database::MAX_VECTOR_DIMENSIONS) {
560+
throw new DatabaseException('Vector dimensions cannot exceed ' . Database::MAX_VECTOR_DIMENSIONS);
561+
}
562+
}
563+
564+
$type = $this->getSQLType(
565+
$type,
566+
$size,
567+
$signed,
568+
$array,
569+
$required,
570+
);
547571

548572
if ($type == 'TIMESTAMP(3)') {
549573
$type = "TIMESTAMP(3) without time zone USING TO_TIMESTAMP(\"$id\", 'YYYY-MM-DD HH24:MI:SS.MS')";
@@ -841,7 +865,6 @@ public function createIndex(string $collection, string $id, string $type, array
841865
$collection = $this->filter($collection);
842866
$id = $this->filter($id);
843867

844-
845868
foreach ($attributes as $i => $attr) {
846869
$order = empty($orders[$i]) || Database::INDEX_FULLTEXT === $type ? '' : $orders[$i];
847870

@@ -857,29 +880,33 @@ public function createIndex(string $collection, string $id, string $type, array
857880

858881
$sqlType = match ($type) {
859882
Database::INDEX_KEY,
860-
Database::INDEX_FULLTEXT => 'INDEX',
883+
Database::INDEX_FULLTEXT,
884+
Database::INDEX_SPATIAL,
885+
Database::INDEX_HNSW_EUCLIDEAN,
886+
Database::INDEX_HNSW_COSINE,
887+
Database::INDEX_HNSW_DOT => 'INDEX',
861888
Database::INDEX_UNIQUE => 'UNIQUE INDEX',
862-
Database::INDEX_SPATIAL => 'INDEX',
863-
default => throw new DatabaseException('Unknown index type: ' . $type . '. Must be one of ' . Database::INDEX_KEY . ', ' . Database::INDEX_UNIQUE . ', ' . Database::INDEX_FULLTEXT . ', ' . Database::INDEX_SPATIAL),
889+
default => throw new DatabaseException('Unknown index type: ' . $type . '. Must be one of ' . Database::INDEX_KEY . ', ' . Database::INDEX_UNIQUE . ', ' . Database::INDEX_FULLTEXT . ', ' . Database::INDEX_SPATIAL . ', ' . Database::INDEX_HNSW_EUCLIDEAN . ', ' . Database::INDEX_HNSW_COSINE . ', ' . Database::INDEX_HNSW_DOT),
864890
};
865891

866892
$key = "\"{$this->getNamespace()}_{$this->tenant}_{$collection}_{$id}\"";
867893
$attributes = \implode(', ', $attributes);
868894

869-
// Spatial indexes can't include _tenant because GIST indexes require all columns to have compatible operator classes
870-
if ($this->sharedTables && $type !== Database::INDEX_FULLTEXT && $type !== Database::INDEX_SPATIAL) {
895+
if ($this->sharedTables && \in_array($type, [Database::INDEX_KEY, Database::INDEX_UNIQUE])) {
871896
// Add tenant as first index column for best performance
872897
$attributes = "_tenant, {$attributes}";
873898
}
874899

875900
$sql = "CREATE {$sqlType} {$key} ON {$this->getSQLTable($collection)}";
876901

877-
// Add USING GIST for spatial indexes
878-
if ($type === Database::INDEX_SPATIAL) {
879-
$sql .= " USING GIST";
880-
}
881-
882-
$sql .= " ({$attributes});";
902+
// Add USING clause for special index types
903+
$sql .= match ($type) {
904+
Database::INDEX_SPATIAL => " USING GIST ({$attributes})",
905+
Database::INDEX_HNSW_EUCLIDEAN => " USING HNSW ({$attributes} vector_l2_ops)",
906+
Database::INDEX_HNSW_COSINE => " USING HNSW ({$attributes} vector_cosine_ops)",
907+
Database::INDEX_HNSW_DOT => " USING HNSW ({$attributes} vector_ip_ops)",
908+
default => " ({$attributes})",
909+
};
883910

884911
$sql = $this->trigger(Database::EVENT_INDEX_CREATE, $sql);
885912

@@ -1480,7 +1507,7 @@ protected function handleDistanceSpatialQueries(Query $query, array &$binds, str
14801507

14811508
if ($meters) {
14821509
$attr = "({$alias}.{$attribute}::geography)";
1483-
$geom = "ST_SetSRID(" . $this->getSpatialGeomFromText(":{$placeholder}_0", null) . ", " . Database::SRID . ")::geography";
1510+
$geom = "ST_SetSRID(" . $this->getSpatialGeomFromText(":{$placeholder}_0", null) . ", " . Database::DEFAULT_SRID . ")::geography";
14841511
return "ST_Distance({$attr}, {$geom}) {$operator} :{$placeholder}_1";
14851512
}
14861513

@@ -1605,6 +1632,11 @@ protected function getSQLCondition(Query $query, array &$binds): string
16051632
$binds[":{$placeholder}_0"] = $this->getFulltextValue($query->getValue());
16061633
return "NOT (to_tsvector(regexp_replace({$attribute}, '[^\w]+',' ','g')) @@ websearch_to_tsquery(:{$placeholder}_0))";
16071634

1635+
case Query::TYPE_VECTOR_DOT:
1636+
case Query::TYPE_VECTOR_COSINE:
1637+
case Query::TYPE_VECTOR_EUCLIDEAN:
1638+
return ''; // Handled in ORDER BY clause
1639+
16081640
case Query::TYPE_BETWEEN:
16091641
$binds[":{$placeholder}_0"] = $query->getValues()[0];
16101642
$binds[":{$placeholder}_1"] = $query->getValues()[1];
@@ -1623,8 +1655,6 @@ protected function getSQLCondition(Query $query, array &$binds): string
16231655
case Query::TYPE_NOT_CONTAINS:
16241656
if ($query->onArray()) {
16251657
$operator = '@>';
1626-
} else {
1627-
$operator = null;
16281658
}
16291659

16301660
// no break
@@ -1665,6 +1695,37 @@ protected function getSQLCondition(Query $query, array &$binds): string
16651695
}
16661696
}
16671697

1698+
/**
1699+
* Get vector distance calculation for ORDER BY clause
1700+
*
1701+
* @param Query $query
1702+
* @param array<string, mixed> $binds
1703+
* @param string $alias
1704+
* @return string|null
1705+
* @throws DatabaseException
1706+
*/
1707+
protected function getVectorDistanceOrder(Query $query, array &$binds, string $alias): ?string
1708+
{
1709+
$query->setAttribute($this->getInternalKeyForAttribute($query->getAttribute()));
1710+
1711+
$attribute = $this->filter($query->getAttribute());
1712+
$attribute = $this->quote($attribute);
1713+
$alias = $this->quote($alias);
1714+
$placeholder = ID::unique();
1715+
1716+
$values = $query->getValues();
1717+
$vectorArray = $values[0] ?? [];
1718+
$vector = \json_encode(\array_map(\floatval(...), $vectorArray));
1719+
$binds[":vector_{$placeholder}"] = $vector;
1720+
1721+
return match ($query->getMethod()) {
1722+
Query::TYPE_VECTOR_DOT => "({$alias}.{$attribute} <#> :vector_{$placeholder}::vector)",
1723+
Query::TYPE_VECTOR_COSINE => "({$alias}.{$attribute} <=> :vector_{$placeholder}::vector)",
1724+
Query::TYPE_VECTOR_EUCLIDEAN => "({$alias}.{$attribute} <-> :vector_{$placeholder}::vector)",
1725+
default => null,
1726+
};
1727+
}
1728+
16681729
/**
16691730
* @param string $value
16701731
* @return string
@@ -1732,15 +1793,17 @@ protected function getSQLType(string $type, int $size, bool $signed = true, bool
17321793
case Database::VAR_DATETIME:
17331794
return 'TIMESTAMP(3)';
17341795

1735-
// in all other DB engines, 4326 is the default SRID
17361796
case Database::VAR_POINT:
1737-
return 'GEOMETRY(POINT,' . Database::SRID . ')';
1797+
return 'GEOMETRY(POINT,' . Database::DEFAULT_SRID . ')';
17381798

17391799
case Database::VAR_LINESTRING:
1740-
return 'GEOMETRY(LINESTRING,' . Database::SRID . ')';
1800+
return 'GEOMETRY(LINESTRING,' . Database::DEFAULT_SRID . ')';
17411801

17421802
case Database::VAR_POLYGON:
1743-
return 'GEOMETRY(POLYGON,' . Database::SRID . ')';
1803+
return 'GEOMETRY(POLYGON,' . Database::DEFAULT_SRID . ')';
1804+
1805+
case Database::VAR_VECTOR:
1806+
return "VECTOR({$size})";
17441807

17451808
default:
17461809
throw new DatabaseException('Unknown Type: ' . $type . '. Must be one of ' . Database::VAR_STRING . ', ' . Database::VAR_INTEGER . ', ' . Database::VAR_FLOAT . ', ' . Database::VAR_BOOLEAN . ', ' . Database::VAR_DATETIME . ', ' . Database::VAR_RELATIONSHIP . ', ' . Database::VAR_POINT . ', ' . Database::VAR_LINESTRING . ', ' . Database::VAR_POLYGON);
@@ -1889,6 +1952,16 @@ public function getSupportForUpserts(): bool
18891952
return true;
18901953
}
18911954

1955+
/**
1956+
* Is vector type supported?
1957+
*
1958+
* @return bool
1959+
*/
1960+
public function getSupportForVectors(): bool
1961+
{
1962+
return true;
1963+
}
1964+
18921965
/**
18931966
* @return string
18941967
*/

0 commit comments

Comments
 (0)