diff --git a/java/pom.xml b/java/pom.xml index 747f4f86182..c3392e0c76a 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -106,9 +106,9 @@ 33.3.1-jre - com.lancedb - lance-namespace-core - 0.0.20 + org.lance + lance-namespace-apache-client + 0.1.0 com.fasterxml.jackson.core diff --git a/java/src/main/java/org/lance/OpenDatasetBuilder.java b/java/src/main/java/org/lance/OpenDatasetBuilder.java index 3154924afb9..ae082e14ceb 100644 --- a/java/src/main/java/org/lance/OpenDatasetBuilder.java +++ b/java/src/main/java/org/lance/OpenDatasetBuilder.java @@ -13,11 +13,11 @@ */ package org.lance; +import org.lance.namespace.LanceNamespace; import org.lance.namespace.LanceNamespaceStorageOptionsProvider; +import org.lance.namespace.model.DescribeTableRequest; +import org.lance.namespace.model.DescribeTableResponse; -import com.lancedb.lance.namespace.LanceNamespace; -import com.lancedb.lance.namespace.model.DescribeTableRequest; -import com.lancedb.lance.namespace.model.DescribeTableResponse; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.util.Preconditions; diff --git a/java/src/main/java/org/lance/WriteDatasetBuilder.java b/java/src/main/java/org/lance/WriteDatasetBuilder.java index 03e82c358aa..74f8c298fe8 100644 --- a/java/src/main/java/org/lance/WriteDatasetBuilder.java +++ b/java/src/main/java/org/lance/WriteDatasetBuilder.java @@ -14,13 +14,13 @@ package org.lance; import org.lance.io.StorageOptionsProvider; +import org.lance.namespace.LanceNamespace; import org.lance.namespace.LanceNamespaceStorageOptionsProvider; +import org.lance.namespace.model.CreateEmptyTableRequest; +import org.lance.namespace.model.CreateEmptyTableResponse; +import org.lance.namespace.model.DescribeTableRequest; +import org.lance.namespace.model.DescribeTableResponse; -import com.lancedb.lance.namespace.LanceNamespace; -import com.lancedb.lance.namespace.model.CreateEmptyTableRequest; -import com.lancedb.lance.namespace.model.CreateEmptyTableResponse; -import com.lancedb.lance.namespace.model.DescribeTableRequest; -import com.lancedb.lance.namespace.model.DescribeTableResponse; import org.apache.arrow.c.ArrowArrayStream; import org.apache.arrow.c.Data; import org.apache.arrow.memory.BufferAllocator; diff --git a/java/src/main/java/org/lance/namespace/DirectoryNamespace.java b/java/src/main/java/org/lance/namespace/DirectoryNamespace.java index 554e1a5c248..19de6d0a4bf 100644 --- a/java/src/main/java/org/lance/namespace/DirectoryNamespace.java +++ b/java/src/main/java/org/lance/namespace/DirectoryNamespace.java @@ -14,11 +14,10 @@ package org.lance.namespace; import org.lance.JniLoader; +import org.lance.namespace.model.*; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; -import com.lancedb.lance.namespace.LanceNamespace; -import com.lancedb.lance.namespace.model.*; import org.apache.arrow.memory.BufferAllocator; import java.io.Closeable; diff --git a/java/src/main/java/org/lance/namespace/LanceNamespace.java b/java/src/main/java/org/lance/namespace/LanceNamespace.java new file mode 100644 index 00000000000..e0c713532bf --- /dev/null +++ b/java/src/main/java/org/lance/namespace/LanceNamespace.java @@ -0,0 +1,427 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace; + +import org.lance.namespace.model.*; + +import org.apache.arrow.memory.BufferAllocator; + +import java.lang.reflect.Constructor; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Interface for LanceDB namespace operations. + * + *

A namespace provides hierarchical organization for tables and supports various storage + * backends (local filesystem, S3, Azure, GCS) with optional credential vending for cloud providers. + * + *

Implementations of this interface can provide different storage backends: + * + *

+ * + *

External libraries can implement this interface to provide integration with catalog systems + * like AWS Glue, Hive Metastore, or Databricks Unity Catalog. + * + *

Most methods have default implementations that throw {@link UnsupportedOperationException}. + * Implementations should override the methods they support. + * + *

Use {@link #connect(String, Map, BufferAllocator)} to create namespace instances, and {@link + * #registerNamespaceImpl(String, String)} to register external implementations. + */ +public interface LanceNamespace { + + // ========== Static Registry and Factory Methods ========== + + /** Native implementations (Rust-backed). */ + Map NATIVE_IMPLS = + Collections.unmodifiableMap( + new HashMap() { + { + put("dir", "org.lance.namespace.DirectoryNamespace"); + put("rest", "org.lance.namespace.RestNamespace"); + } + }); + + /** Plugin registry for external implementations. Thread-safe for concurrent access. */ + Map REGISTERED_IMPLS = new ConcurrentHashMap<>(); + + /** + * Register a namespace implementation with a short name. + * + *

External libraries can use this to register their implementations, allowing users to use + * short names like "glue" instead of full class paths. + * + * @param name Short name for the implementation (e.g., "glue", "hive2", "unity") + * @param className Full class name (e.g., "org.lance.namespace.glue.GlueNamespace") + */ + static void registerNamespaceImpl(String name, String className) { + REGISTERED_IMPLS.put(name, className); + } + + /** + * Unregister a previously registered namespace implementation. + * + * @param name Short name of the implementation to unregister + * @return true if an implementation was removed, false if it wasn't registered + */ + static boolean unregisterNamespaceImpl(String name) { + return REGISTERED_IMPLS.remove(name) != null; + } + + /** + * Check if an implementation is registered with the given name. + * + * @param name Short name or class name to check + * @return true if the implementation is available + */ + static boolean isRegistered(String name) { + return NATIVE_IMPLS.containsKey(name) || REGISTERED_IMPLS.containsKey(name); + } + + /** + * Connect to a Lance namespace implementation. + * + *

This factory method creates namespace instances based on implementation aliases or full + * class names. It provides a unified way to instantiate different namespace backends. + * + * @param impl Implementation alias or full class name. Built-in aliases: "dir" for + * DirectoryNamespace, "rest" for RestNamespace. External libraries can register additional + * aliases using {@link #registerNamespaceImpl(String, String)}. + * @param properties Configuration properties passed to the namespace + * @param allocator Arrow buffer allocator for memory management + * @return The connected namespace instance + * @throws IllegalArgumentException If the implementation class cannot be loaded or does not + * implement LanceNamespace interface + */ + static LanceNamespace connect( + String impl, Map properties, BufferAllocator allocator) { + // Check native impls first, then registered plugins, then treat as full class name + String className = NATIVE_IMPLS.get(impl); + if (className == null) { + className = REGISTERED_IMPLS.get(impl); + } + if (className == null) { + className = impl; + } + + try { + Class clazz = Class.forName(className); + + if (!LanceNamespace.class.isAssignableFrom(clazz)) { + throw new IllegalArgumentException( + "Class " + className + " does not implement LanceNamespace interface"); + } + + @SuppressWarnings("unchecked") + Class namespaceClass = (Class) clazz; + + Constructor constructor = namespaceClass.getConstructor(); + LanceNamespace namespace = constructor.newInstance(); + namespace.initialize(properties, allocator); + + return namespace; + } catch (ClassNotFoundException e) { + throw new IllegalArgumentException("Namespace implementation class not found: " + className); + } catch (NoSuchMethodException e) { + throw new IllegalArgumentException( + "Namespace implementation class " + className + " must have a no-arg constructor"); + } catch (Exception e) { + throw new IllegalArgumentException( + "Failed to construct namespace impl " + className + ": " + e.getMessage(), e); + } + } + + // ========== Instance Methods ========== + + /** + * Initialize the namespace with configuration properties. + * + * @param configProperties Configuration properties (e.g., root path, storage options) + * @param allocator Arrow buffer allocator for memory management + */ + void initialize(Map configProperties, BufferAllocator allocator); + + /** + * Return a human-readable unique identifier for this namespace instance. + * + *

This is used for equality comparison and caching. Two namespace instances with the same ID + * are considered equal and will share cached resources. + * + * @return A human-readable unique identifier string + */ + String namespaceId(); + + // Namespace operations + + /** + * List namespaces. + * + * @param request The list namespaces request + * @return The list namespaces response + */ + default ListNamespacesResponse listNamespaces(ListNamespacesRequest request) { + throw new UnsupportedOperationException("Not supported: listNamespaces"); + } + + /** + * Describe a namespace. + * + * @param request The describe namespace request + * @return The describe namespace response + */ + default DescribeNamespaceResponse describeNamespace(DescribeNamespaceRequest request) { + throw new UnsupportedOperationException("Not supported: describeNamespace"); + } + + /** + * Create a new namespace. + * + * @param request The create namespace request + * @return The create namespace response + */ + default CreateNamespaceResponse createNamespace(CreateNamespaceRequest request) { + throw new UnsupportedOperationException("Not supported: createNamespace"); + } + + /** + * Drop a namespace. + * + * @param request The drop namespace request + * @return The drop namespace response + */ + default DropNamespaceResponse dropNamespace(DropNamespaceRequest request) { + throw new UnsupportedOperationException("Not supported: dropNamespace"); + } + + /** + * Check if a namespace exists. + * + * @param request The namespace exists request + * @throws RuntimeException if the namespace does not exist + */ + default void namespaceExists(NamespaceExistsRequest request) { + throw new UnsupportedOperationException("Not supported: namespaceExists"); + } + + // Table operations + + /** + * List tables in a namespace. + * + * @param request The list tables request + * @return The list tables response + */ + default ListTablesResponse listTables(ListTablesRequest request) { + throw new UnsupportedOperationException("Not supported: listTables"); + } + + /** + * Describe a table. + * + * @param request The describe table request + * @return The describe table response + */ + default DescribeTableResponse describeTable(DescribeTableRequest request) { + throw new UnsupportedOperationException("Not supported: describeTable"); + } + + /** + * Register a table. + * + * @param request The register table request + * @return The register table response + */ + default RegisterTableResponse registerTable(RegisterTableRequest request) { + throw new UnsupportedOperationException("Not supported: registerTable"); + } + + /** + * Check if a table exists. + * + * @param request The table exists request + * @throws RuntimeException if the table does not exist + */ + default void tableExists(TableExistsRequest request) { + throw new UnsupportedOperationException("Not supported: tableExists"); + } + + /** + * Drop a table. + * + * @param request The drop table request + * @return The drop table response + */ + default DropTableResponse dropTable(DropTableRequest request) { + throw new UnsupportedOperationException("Not supported: dropTable"); + } + + /** + * Deregister a table. + * + * @param request The deregister table request + * @return The deregister table response + */ + default DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { + throw new UnsupportedOperationException("Not supported: deregisterTable"); + } + + /** + * Count rows in a table. + * + * @param request The count table rows request + * @return The row count + */ + default Long countTableRows(CountTableRowsRequest request) { + throw new UnsupportedOperationException("Not supported: countTableRows"); + } + + // Data operations + + /** + * Create a new table with data from Arrow IPC stream. + * + * @param request The create table request + * @param requestData Arrow IPC stream data + * @return The create table response + */ + default CreateTableResponse createTable(CreateTableRequest request, byte[] requestData) { + throw new UnsupportedOperationException("Not supported: createTable"); + } + + /** + * Create an empty table (metadata only operation). + * + * @param request The create empty table request + * @return The create empty table response + */ + default CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request) { + throw new UnsupportedOperationException("Not supported: createEmptyTable"); + } + + /** + * Insert data into a table. + * + * @param request The insert into table request + * @param requestData Arrow IPC stream data + * @return The insert into table response + */ + default InsertIntoTableResponse insertIntoTable( + InsertIntoTableRequest request, byte[] requestData) { + throw new UnsupportedOperationException("Not supported: insertIntoTable"); + } + + /** + * Merge insert data into a table. + * + * @param request The merge insert into table request + * @param requestData Arrow IPC stream data + * @return The merge insert into table response + */ + default MergeInsertIntoTableResponse mergeInsertIntoTable( + MergeInsertIntoTableRequest request, byte[] requestData) { + throw new UnsupportedOperationException("Not supported: mergeInsertIntoTable"); + } + + /** + * Update a table. + * + * @param request The update table request + * @return The update table response + */ + default UpdateTableResponse updateTable(UpdateTableRequest request) { + throw new UnsupportedOperationException("Not supported: updateTable"); + } + + /** + * Delete from a table. + * + * @param request The delete from table request + * @return The delete from table response + */ + default DeleteFromTableResponse deleteFromTable(DeleteFromTableRequest request) { + throw new UnsupportedOperationException("Not supported: deleteFromTable"); + } + + /** + * Query a table. + * + * @param request The query table request + * @return Arrow IPC stream data containing query results + */ + default byte[] queryTable(QueryTableRequest request) { + throw new UnsupportedOperationException("Not supported: queryTable"); + } + + // Index operations + + /** + * Create a table index. + * + * @param request The create table index request + * @return The create table index response + */ + default CreateTableIndexResponse createTableIndex(CreateTableIndexRequest request) { + throw new UnsupportedOperationException("Not supported: createTableIndex"); + } + + /** + * List table indices. + * + * @param request The list table indices request + * @return The list table indices response + */ + default ListTableIndicesResponse listTableIndices(ListTableIndicesRequest request) { + throw new UnsupportedOperationException("Not supported: listTableIndices"); + } + + /** + * Describe table index statistics. + * + * @param request The describe table index stats request + * @param indexName The name of the index + * @return The describe table index stats response + */ + default DescribeTableIndexStatsResponse describeTableIndexStats( + DescribeTableIndexStatsRequest request, String indexName) { + throw new UnsupportedOperationException("Not supported: describeTableIndexStats"); + } + + // Transaction operations + + /** + * Describe a transaction. + * + * @param request The describe transaction request + * @return The describe transaction response + */ + default DescribeTransactionResponse describeTransaction(DescribeTransactionRequest request) { + throw new UnsupportedOperationException("Not supported: describeTransaction"); + } + + /** + * Alter a transaction. + * + * @param request The alter transaction request + * @return The alter transaction response + */ + default AlterTransactionResponse alterTransaction(AlterTransactionRequest request) { + throw new UnsupportedOperationException("Not supported: alterTransaction"); + } +} diff --git a/java/src/main/java/org/lance/namespace/LanceNamespaceStorageOptionsProvider.java b/java/src/main/java/org/lance/namespace/LanceNamespaceStorageOptionsProvider.java index 3869c8fcc08..f8a92936666 100644 --- a/java/src/main/java/org/lance/namespace/LanceNamespaceStorageOptionsProvider.java +++ b/java/src/main/java/org/lance/namespace/LanceNamespaceStorageOptionsProvider.java @@ -14,9 +14,8 @@ package org.lance.namespace; import org.lance.io.StorageOptionsProvider; - -import com.lancedb.lance.namespace.model.DescribeTableRequest; -import com.lancedb.lance.namespace.model.DescribeTableResponse; +import org.lance.namespace.model.DescribeTableRequest; +import org.lance.namespace.model.DescribeTableResponse; import java.util.List; import java.util.Map; @@ -57,7 +56,7 @@ */ public class LanceNamespaceStorageOptionsProvider implements StorageOptionsProvider { - private final com.lancedb.lance.namespace.LanceNamespace namespace; + private final LanceNamespace namespace; private final List tableId; /** @@ -66,8 +65,7 @@ public class LanceNamespaceStorageOptionsProvider implements StorageOptionsProvi * @param namespace The namespace instance to fetch storage options from * @param tableId The table identifier (e.g., ["workspace", "table_name"]) */ - public LanceNamespaceStorageOptionsProvider( - com.lancedb.lance.namespace.LanceNamespace namespace, List tableId) { + public LanceNamespaceStorageOptionsProvider(LanceNamespace namespace, List tableId) { this.namespace = namespace; this.tableId = tableId; } diff --git a/java/src/main/java/org/lance/namespace/RestNamespace.java b/java/src/main/java/org/lance/namespace/RestNamespace.java index 834a7a9f73b..995c53c4b92 100644 --- a/java/src/main/java/org/lance/namespace/RestNamespace.java +++ b/java/src/main/java/org/lance/namespace/RestNamespace.java @@ -14,11 +14,10 @@ package org.lance.namespace; import org.lance.JniLoader; +import org.lance.namespace.model.*; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; -import com.lancedb.lance.namespace.LanceNamespace; -import com.lancedb.lance.namespace.model.*; import org.apache.arrow.memory.BufferAllocator; import java.io.Closeable; diff --git a/java/src/test/java/org/lance/NamespaceIntegrationTest.java b/java/src/test/java/org/lance/NamespaceIntegrationTest.java index 744953edb76..d2ea43f5e53 100644 --- a/java/src/test/java/org/lance/NamespaceIntegrationTest.java +++ b/java/src/test/java/org/lance/NamespaceIntegrationTest.java @@ -14,14 +14,14 @@ package org.lance; import org.lance.namespace.DirectoryNamespace; +import org.lance.namespace.LanceNamespace; import org.lance.namespace.LanceNamespaceStorageOptionsProvider; +import org.lance.namespace.model.CreateEmptyTableRequest; +import org.lance.namespace.model.CreateEmptyTableResponse; +import org.lance.namespace.model.DescribeTableRequest; +import org.lance.namespace.model.DescribeTableResponse; import org.lance.operation.Append; -import com.lancedb.lance.namespace.LanceNamespace; -import com.lancedb.lance.namespace.model.CreateEmptyTableRequest; -import com.lancedb.lance.namespace.model.CreateEmptyTableResponse; -import com.lancedb.lance.namespace.model.DescribeTableRequest; -import com.lancedb.lance.namespace.model.DescribeTableResponse; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; diff --git a/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java b/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java index beec2844e13..7d6c4741ad8 100644 --- a/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java +++ b/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java @@ -13,7 +13,8 @@ */ package org.lance.namespace; -import com.lancedb.lance.namespace.model.*; +import org.lance.namespace.model.*; + import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; diff --git a/java/src/test/java/org/lance/namespace/RestNamespaceTest.java b/java/src/test/java/org/lance/namespace/RestNamespaceTest.java index c13d29cb833..3e861de44e4 100644 --- a/java/src/test/java/org/lance/namespace/RestNamespaceTest.java +++ b/java/src/test/java/org/lance/namespace/RestNamespaceTest.java @@ -13,7 +13,8 @@ */ package org.lance.namespace; -import com.lancedb.lance.namespace.model.*; +import org.lance.namespace.model.*; + import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; diff --git a/python/pyproject.toml b/python/pyproject.toml index 99e531547e2..bfbcd0c3a9e 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "pylance" dynamic = ["version"] -dependencies = ["pyarrow>=14", "numpy>=1.22", "lance_namespace>=0.0.21"] +dependencies = ["pyarrow>=14", "numpy>=1.22", "lance_namespace_urllib3_client>=0.1.0"] description = "python wrapper for Lance columnar format" authors = [{ name = "Lance Devs", email = "dev@lance.org" }] license = { file = "LICENSE" } diff --git a/python/python/lance/__init__.py b/python/python/lance/__init__.py index b637b7a362f..aa05c70286d 100644 --- a/python/python/lance/__init__.py +++ b/python/python/lance/__init__.py @@ -8,8 +8,6 @@ import warnings from typing import TYPE_CHECKING, Dict, List, Optional, Union -from lance_namespace import DescribeTableRequest, LanceNamespace - from . import io, log from .blob import BlobColumn, BlobFile from .dataset import ( @@ -34,7 +32,11 @@ bytes_read_counter, iops_counter, ) -from .namespace import LanceNamespaceStorageOptionsProvider +from .namespace import ( + DescribeTableRequest, + LanceNamespace, + LanceNamespaceStorageOptionsProvider, +) from .schema import json_to_schema, schema_to_json from .util import sanitize_ts @@ -155,7 +157,7 @@ def dataset( across multiple datasets. namespace : optional, LanceNamespace A namespace instance from which to fetch table location and storage options. - Use lance_namespace.connect() from the lance_namespace package. + Use lance.namespace.connect() to create a namespace instance. Must be provided together with `table_id`. Cannot be used with `uri`. When provided, the table location will be fetched automatically from the namespace via describe_table(). diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 396a0d02ae2..801e809a7ef 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -71,9 +71,10 @@ from .util import _target_partition_size_to_num_partitions, td_to_micros if TYPE_CHECKING: - from lance_namespace import LanceNamespace from pyarrow._compute import Expression + from lance.namespace import LanceNamespace + from .commit import CommitLock from .io import StorageOptionsProvider from .lance.indices import IndexDescription @@ -5368,9 +5369,11 @@ def write_dataset( # - APPEND/OVERWRITE mode: calls namespace.describe_table() # - Both modes: create storage options provider and merge storage options - from lance_namespace import CreateEmptyTableRequest, DescribeTableRequest - - from .namespace import LanceNamespaceStorageOptionsProvider + from .namespace import ( + CreateEmptyTableRequest, + DescribeTableRequest, + LanceNamespaceStorageOptionsProvider, + ) # Determine which namespace method to call based on mode if mode == "create": diff --git a/python/python/lance/namespace.py b/python/python/lance/namespace.py index 29a65acab74..da3f1c4ea04 100644 --- a/python/python/lance/namespace.py +++ b/python/python/lance/namespace.py @@ -4,38 +4,61 @@ """LanceNamespace storage options integration and implementations. This module provides: -1. Native Rust-backed namespace implementations (DirectoryNamespace) -2. Storage options integration with LanceNamespace for automatic credential refresh +1. LanceNamespace ABC interface for namespace implementations +2. Native Rust-backed namespace implementations (DirectoryNamespace, RestNamespace) +3. Storage options integration with LanceNamespace for automatic credential refresh +4. Plugin registry for external namespace implementations """ +import importlib +from abc import ABC, abstractmethod from typing import Dict, List -from lance_namespace import ( +from lance_namespace_urllib3_client.models import ( + AlterTransactionRequest, + AlterTransactionResponse, + CountTableRowsRequest, CreateEmptyTableRequest, CreateEmptyTableResponse, CreateNamespaceRequest, CreateNamespaceResponse, + CreateTableIndexRequest, + CreateTableIndexResponse, CreateTableRequest, CreateTableResponse, + DeleteFromTableRequest, + DeleteFromTableResponse, DeregisterTableRequest, DeregisterTableResponse, DescribeNamespaceRequest, DescribeNamespaceResponse, + DescribeTableIndexStatsRequest, + DescribeTableIndexStatsResponse, DescribeTableRequest, DescribeTableResponse, + DescribeTransactionRequest, + DescribeTransactionResponse, DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, DropTableResponse, - LanceNamespace, + InsertIntoTableRequest, + InsertIntoTableResponse, ListNamespacesRequest, ListNamespacesResponse, + ListTableIndicesRequest, + ListTableIndicesResponse, ListTablesRequest, ListTablesResponse, + MergeInsertIntoTableRequest, + MergeInsertIntoTableResponse, NamespaceExistsRequest, + QueryTableRequest, RegisterTableRequest, RegisterTableResponse, TableExistsRequest, + UpdateTableRequest, + UpdateTableResponse, ) from .io import StorageOptionsProvider @@ -52,13 +75,218 @@ PyRestAdapter = None __all__ = [ + # Interface and factory + "LanceNamespace", + "connect", + "register_namespace_impl", + # Implementations "DirectoryNamespace", "RestNamespace", "RestAdapter", "LanceNamespaceStorageOptionsProvider", + # Request/Response types (re-exported from lance_namespace_urllib3_client) + "AlterTransactionRequest", + "AlterTransactionResponse", + "CountTableRowsRequest", + "CreateEmptyTableRequest", + "CreateEmptyTableResponse", + "CreateNamespaceRequest", + "CreateNamespaceResponse", + "CreateTableIndexRequest", + "CreateTableIndexResponse", + "CreateTableRequest", + "CreateTableResponse", + "DeleteFromTableRequest", + "DeleteFromTableResponse", + "DeregisterTableRequest", + "DeregisterTableResponse", + "DescribeNamespaceRequest", + "DescribeNamespaceResponse", + "DescribeTableIndexStatsRequest", + "DescribeTableIndexStatsResponse", + "DescribeTableRequest", + "DescribeTableResponse", + "DescribeTransactionRequest", + "DescribeTransactionResponse", + "DropNamespaceRequest", + "DropNamespaceResponse", + "DropTableRequest", + "DropTableResponse", + "InsertIntoTableRequest", + "InsertIntoTableResponse", + "ListNamespacesRequest", + "ListNamespacesResponse", + "ListTableIndicesRequest", + "ListTableIndicesResponse", + "ListTablesRequest", + "ListTablesResponse", + "MergeInsertIntoTableRequest", + "MergeInsertIntoTableResponse", + "NamespaceExistsRequest", + "QueryTableRequest", + "RegisterTableRequest", + "RegisterTableResponse", + "TableExistsRequest", + "UpdateTableRequest", + "UpdateTableResponse", ] +class LanceNamespace(ABC): + """Base interface for Lance Namespace implementations. + + This abstract base class defines the contract for namespace implementations + that manage Lance tables. Implementations can provide different storage backends + (directory-based, REST API, cloud catalogs, etc.). + + To create a custom namespace implementation, subclass this ABC and implement + at least the `namespace_id()` method. Other methods have default implementations + that raise `NotImplementedError`. + """ + + @abstractmethod + def namespace_id(self) -> str: + """Return a human-readable unique identifier for this namespace instance. + + This is used for equality comparison and hashing when the namespace is + used as part of a storage options provider. Two namespace instances with + the same ID are considered equal and will share cached resources. + + The ID should be human-readable for debugging and logging purposes. + For example: + - REST namespace: "RestNamespace { uri: 'https://api.example.com' }" + - Directory namespace: "DirectoryNamespace { root: '/path/to/data' }" + + Returns + ------- + str + A human-readable unique identifier string + """ + pass + + def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesResponse: + """List namespaces.""" + raise NotImplementedError("Not supported: list_namespaces") + + def describe_namespace( + self, request: DescribeNamespaceRequest + ) -> DescribeNamespaceResponse: + """Describe a namespace.""" + raise NotImplementedError("Not supported: describe_namespace") + + def create_namespace( + self, request: CreateNamespaceRequest + ) -> CreateNamespaceResponse: + """Create a new namespace.""" + raise NotImplementedError("Not supported: create_namespace") + + def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse: + """Drop a namespace.""" + raise NotImplementedError("Not supported: drop_namespace") + + def namespace_exists(self, request: NamespaceExistsRequest) -> None: + """Check if a namespace exists.""" + raise NotImplementedError("Not supported: namespace_exists") + + def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: + """List tables in a namespace.""" + raise NotImplementedError("Not supported: list_tables") + + def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse: + """Describe a table.""" + raise NotImplementedError("Not supported: describe_table") + + def register_table(self, request: RegisterTableRequest) -> RegisterTableResponse: + """Register a table.""" + raise NotImplementedError("Not supported: register_table") + + def table_exists(self, request: TableExistsRequest) -> None: + """Check if a table exists.""" + raise NotImplementedError("Not supported: table_exists") + + def drop_table(self, request: DropTableRequest) -> DropTableResponse: + """Drop a table.""" + raise NotImplementedError("Not supported: drop_table") + + def deregister_table( + self, request: DeregisterTableRequest + ) -> DeregisterTableResponse: + """Deregister a table.""" + raise NotImplementedError("Not supported: deregister_table") + + def count_table_rows(self, request: CountTableRowsRequest) -> int: + """Count rows in a table.""" + raise NotImplementedError("Not supported: count_table_rows") + + def create_table( + self, request: CreateTableRequest, request_data: bytes + ) -> CreateTableResponse: + """Create a new table with data from Arrow IPC stream.""" + raise NotImplementedError("Not supported: create_table") + + def create_empty_table( + self, request: CreateEmptyTableRequest + ) -> CreateEmptyTableResponse: + """Create an empty table (metadata only operation).""" + raise NotImplementedError("Not supported: create_empty_table") + + def insert_into_table( + self, request: InsertIntoTableRequest, request_data: bytes + ) -> InsertIntoTableResponse: + """Insert data into a table.""" + raise NotImplementedError("Not supported: insert_into_table") + + def merge_insert_into_table( + self, request: MergeInsertIntoTableRequest, request_data: bytes + ) -> MergeInsertIntoTableResponse: + """Merge insert data into a table.""" + raise NotImplementedError("Not supported: merge_insert_into_table") + + def update_table(self, request: UpdateTableRequest) -> UpdateTableResponse: + """Update a table.""" + raise NotImplementedError("Not supported: update_table") + + def delete_from_table( + self, request: DeleteFromTableRequest + ) -> DeleteFromTableResponse: + """Delete from a table.""" + raise NotImplementedError("Not supported: delete_from_table") + + def query_table(self, request: QueryTableRequest) -> bytes: + """Query a table.""" + raise NotImplementedError("Not supported: query_table") + + def create_table_index( + self, request: CreateTableIndexRequest + ) -> CreateTableIndexResponse: + """Create a table index.""" + raise NotImplementedError("Not supported: create_table_index") + + def list_table_indices( + self, request: ListTableIndicesRequest + ) -> ListTableIndicesResponse: + """List table indices.""" + raise NotImplementedError("Not supported: list_table_indices") + + def describe_table_index_stats( + self, request: DescribeTableIndexStatsRequest + ) -> DescribeTableIndexStatsResponse: + """Describe table index statistics.""" + raise NotImplementedError("Not supported: describe_table_index_stats") + + def describe_transaction( + self, request: DescribeTransactionRequest + ) -> DescribeTransactionResponse: + """Describe a transaction.""" + raise NotImplementedError("Not supported: describe_transaction") + + def alter_transaction( + self, request: AlterTransactionRequest + ) -> AlterTransactionResponse: + """Alter a transaction.""" + raise NotImplementedError("Not supported: alter_transaction") + + class DirectoryNamespace(LanceNamespace): """Directory-based Lance Namespace implementation backed by Rust. @@ -96,9 +324,9 @@ class DirectoryNamespace(LanceNamespace): ... **{"storage.region": "us-west-2"} ... ) >>> - >>> # Compatible with lance_namespace.connect() - >>> import lance_namespace - >>> ns = lance_namespace.connect("dir", {"root": "memory://test"}) + >>> # Using the connect() factory function + >>> import lance.namespace + >>> ns = lance.namespace.connect("dir", {"root": "memory://test"}) """ def __init__(self, session=None, **properties): @@ -213,9 +441,9 @@ class RestNamespace(LanceNamespace): ... **{"header.Authorization": "Bearer token"} ... ) >>> - >>> # Compatible with lance_namespace.connect() - >>> import lance_namespace - >>> ns = lance_namespace.connect("rest", {"uri": "http://localhost:4099"}) + >>> # Using the connect() factory function + >>> import lance.namespace + >>> ns = lance.namespace.connect("rest", {"uri": "http://localhost:4099"}) """ def __init__(self, **properties): @@ -404,7 +632,7 @@ class LanceNamespaceStorageOptionsProvider(StorageOptionsProvider): ---------- namespace : LanceNamespace The namespace instance to fetch storage options from. Use - lance_namespace.connect() from the lance_namespace PyPI package. + lance.namespace.connect() to create a namespace instance. table_id : List[str] The table identifier (e.g., ["workspace", "table_name"]) @@ -415,10 +643,10 @@ class LanceNamespaceStorageOptionsProvider(StorageOptionsProvider): .. code-block:: python import lance - import lance_namespace + import lance.namespace - # Connect to a namespace (using the lance_namespace package) - namespace = lance_namespace.connect("http://localhost:4099") + # Connect to a namespace + namespace = lance.namespace.connect("rest", {"uri": "http://localhost:4099"}) # Create storage options provider provider = lance.LanceNamespaceStorageOptionsProvider( @@ -504,3 +732,76 @@ def provider_id(self) -> str: f"LanceNamespaceStorageOptionsProvider {{ " f"namespace: {namespace_id}, table_id: {self._table_id!r} }}" ) + + +# Native implementations (Rust-backed) +NATIVE_IMPLS = { + "rest": "lance.namespace.RestNamespace", + "dir": "lance.namespace.DirectoryNamespace", +} + +# Plugin registry for external implementations +_REGISTERED_IMPLS: Dict[str, str] = {} + + +def register_namespace_impl(name: str, class_path: str) -> None: + """Register a namespace implementation with a short name. + + External libraries can use this to register their implementations, + allowing users to use short names like "glue" instead of full class paths. + + Parameters + ---------- + name : str + Short name for the implementation (e.g., "glue", "hive2", "unity") + class_path : str + Full class path (e.g., "lance_glue.GlueNamespace") + """ + _REGISTERED_IMPLS[name] = class_path + + +def connect(impl: str, properties: Dict[str, str]) -> LanceNamespace: + """Connect to a Lance namespace implementation. + + This factory function creates namespace instances based on implementation + aliases or full class paths. It provides a unified way to instantiate + different namespace backends. + + Parameters + ---------- + impl : str + Implementation alias or full class path. Built-in aliases: + - "rest": RestNamespace (REST API client) + - "dir": DirectoryNamespace (local/cloud filesystem) + You can also use full class paths like "my.custom.Namespace" + External libraries can register additional aliases using + `register_namespace_impl()`. + properties : Dict[str, str] + Configuration properties passed to the namespace constructor + + Returns + ------- + LanceNamespace + The connected namespace instance + + Raises + ------ + ValueError + If the implementation class cannot be loaded or does not + implement LanceNamespace interface + """ + # Check native impls first, then registered plugins, then treat as full class path + impl_class = NATIVE_IMPLS.get(impl) or _REGISTERED_IMPLS.get(impl) or impl + try: + module_name, class_name = impl_class.rsplit(".", 1) + module = importlib.import_module(module_name) + namespace_class = getattr(module, class_name) + + if not issubclass(namespace_class, LanceNamespace): + raise ValueError( + f"Class {impl_class} does not implement LanceNamespace interface" + ) + + return namespace_class(**properties) + except Exception as e: + raise ValueError(f"Failed to construct namespace impl {impl_class}: {e}") diff --git a/python/python/tests/test_namespace_dir.py b/python/python/tests/test_namespace_dir.py index 1ff4c641c55..9158acee54e 100644 --- a/python/python/tests/test_namespace_dir.py +++ b/python/python/tests/test_namespace_dir.py @@ -17,7 +17,7 @@ import lance.namespace import pyarrow as pa import pytest -from lance_namespace import ( +from lance.namespace import ( CreateEmptyTableRequest, CreateNamespaceRequest, CreateTableRequest, @@ -31,6 +31,7 @@ NamespaceExistsRequest, RegisterTableRequest, TableExistsRequest, + connect, ) @@ -58,22 +59,18 @@ def table_to_ipc_bytes(table): @pytest.fixture def temp_namespace(): """Create a temporary DirectoryNamespace for testing.""" - import lance_namespace - with tempfile.TemporaryDirectory() as tmpdir: - # Use lance_namespace.connect() for consistency - ns = lance_namespace.connect("dir", {"root": f"file://{tmpdir}"}) + # Use lance.namespace.connect() for consistency + ns = connect("dir", {"root": f"file://{tmpdir}"}) yield ns @pytest.fixture def memory_namespace(): """Create a memory-based DirectoryNamespace for testing.""" - import lance_namespace - unique_id = uuid.uuid4().hex[:8] - # Use lance_namespace.connect() for consistency - ns = lance_namespace.connect("dir", {"root": f"memory://test_{unique_id}"}) + # Use lance.namespace.connect() for consistency + ns = connect("dir", {"root": f"memory://test_{unique_id}"}) yield ns @@ -676,14 +673,12 @@ def test_list_namespaces(self, memory_namespace): class TestLanceNamespaceConnect: - """Tests for lance_namespace.connect integration.""" + """Tests for lance.namespace.connect integration.""" def test_connect_with_properties(self): - """Test creating DirectoryNamespace via lance_namespace.connect().""" + """Test creating DirectoryNamespace via lance.namespace.connect().""" import uuid - import lance_namespace - unique_id = uuid.uuid4().hex[:8] properties = { "root": f"memory://test_connect_{unique_id}", @@ -691,9 +686,9 @@ def test_connect_with_properties(self): "dir_listing_enabled": "true", } - # Connect via lance_namespace.connect + # Connect via lance.namespace.connect # should use lance.namespace.DirectoryNamespace - ns = lance_namespace.connect("dir", properties) + ns = connect("dir", properties) # Verify it's a DirectoryNamespace instance assert isinstance(ns, lance.namespace.DirectoryNamespace) @@ -716,8 +711,6 @@ def test_connect_with_storage_options(self): """Test creating DirectoryNamespace with storage options via connect().""" import uuid - import lance_namespace - unique_id = uuid.uuid4().hex[:8] properties = { "root": f"memory://test_storage_{unique_id}", @@ -725,5 +718,5 @@ def test_connect_with_storage_options(self): } # This should work without errors - ns = lance_namespace.connect("dir", properties) + ns = connect("dir", properties) assert isinstance(ns, lance.namespace.DirectoryNamespace) diff --git a/python/python/tests/test_namespace_integration.py b/python/python/tests/test_namespace_integration.py index 12e101a51e7..592bbd2c3ef 100644 --- a/python/python/tests/test_namespace_integration.py +++ b/python/python/tests/test_namespace_integration.py @@ -19,7 +19,7 @@ import lance import pyarrow as pa import pytest -from lance_namespace import ( +from lance.namespace import ( CreateEmptyTableRequest, CreateEmptyTableResponse, DescribeTableRequest, @@ -434,8 +434,6 @@ def test_namespace_distributed_write(s3_bucket: str): table_name = uuid.uuid4().hex table_id = ["test_ns", table_name] - from lance_namespace import CreateEmptyTableRequest - request = CreateEmptyTableRequest(id=table_id, location=None, properties=None) response = namespace.create_empty_table(request) diff --git a/python/python/tests/test_namespace_rest.py b/python/python/tests/test_namespace_rest.py index d70a4cd91e6..9410cf89e4c 100644 --- a/python/python/tests/test_namespace_rest.py +++ b/python/python/tests/test_namespace_rest.py @@ -17,7 +17,7 @@ import lance.namespace import pyarrow as pa import pytest -from lance_namespace import ( +from lance.namespace import ( CreateEmptyTableRequest, CreateNamespaceRequest, CreateTableRequest, @@ -31,6 +31,7 @@ NamespaceExistsRequest, RegisterTableRequest, TableExistsRequest, + connect, ) @@ -58,18 +59,14 @@ def table_to_ipc_bytes(table): @pytest.fixture def rest_namespace(): """Create a REST namespace with adapter for testing.""" - import lance_namespace - unique_id = uuid.uuid4().hex[:8] with tempfile.TemporaryDirectory() as tmpdir: backend_config = {"root": tmpdir} port = 4000 + hash(unique_id) % 10000 with lance.namespace.RestAdapter("dir", backend_config, port=port): - # Use lance_namespace.connect() for consistency - client = lance_namespace.connect( - "rest", {"uri": f"http://127.0.0.1:{port}"} - ) + # Use lance.namespace.connect() for consistency + client = connect("rest", {"uri": f"http://127.0.0.1:{port}"}) yield client @@ -644,21 +641,19 @@ def test_list_namespaces(self, rest_namespace): class TestLanceNamespaceConnect: - """Tests for lance_namespace.connect integration.""" + """Tests for lance.namespace.connect integration.""" def test_connect_with_rest(self): - """Test creating RestNamespace via lance_namespace.connect().""" - import lance_namespace - + """Test creating RestNamespace via lance.namespace.connect().""" unique_id = uuid.uuid4().hex[:8] with tempfile.TemporaryDirectory() as tmpdir: backend_config = {"root": tmpdir} port = 4000 + hash(unique_id) % 10000 with lance.namespace.RestAdapter("dir", backend_config, port=port): - # Connect via lance_namespace.connect + # Connect via lance.namespace.connect properties = {"uri": f"http://127.0.0.1:{port}"} - ns = lance_namespace.connect("rest", properties) + ns = connect("rest", properties) # Verify it's a RestNamespace instance assert isinstance(ns, lance.namespace.RestNamespace) @@ -678,8 +673,6 @@ def test_connect_with_rest(self): def test_connect_with_custom_delimiter(self): """Test creating RestNamespace with custom delimiter via connect().""" - import lance_namespace - unique_id = uuid.uuid4().hex[:8] with tempfile.TemporaryDirectory() as tmpdir: backend_config = {"root": tmpdir} @@ -692,7 +685,7 @@ def test_connect_with_custom_delimiter(self): "uri": f"http://127.0.0.1:{port}", "delimiter": "@", } - ns = lance_namespace.connect("rest", properties) + ns = connect("rest", properties) # Verify it's a RestNamespace instance assert isinstance(ns, lance.namespace.RestNamespace)