diff --git a/python/pylance/__init__.py b/python/pylance/__init__.py new file mode 100644 index 00000000000..1e542f1e11d --- /dev/null +++ b/python/pylance/__init__.py @@ -0,0 +1,15 @@ +import pyarrow.dataset as ds +from pylance.lib import LanceFileFormat + + +def dataset(uri: str): + """ + Create an Arrow Dataset from the given lance uri. + + Parameters + ---------- + uri: str + The uri to the lance data + """ + fmt = LanceFileFormat() + return ds.dataset(uri, format=fmt) diff --git a/python/pylance/_lib.pyx b/python/pylance/_lib.pyx new file mode 100644 index 00000000000..6161d5d9a5d --- /dev/null +++ b/python/pylance/_lib.pyx @@ -0,0 +1,27 @@ +from libcpp.memory cimport shared_ptr +from pyarrow.includes.libarrow_dataset cimport CFileFormat +from pyarrow._dataset cimport FileFormat + + +cdef extern from "lance/arrow/file_lance.h" namespace "lance" nogil: + + cdef cppclass CLanceFileFormat "::lance::arrow::LanceFileFormat"( + CFileFormat): + pass + + +cdef class LanceFileFormat(FileFormat): + + def __init__(self): + self.init(shared_ptr[CFileFormat](new CLanceFileFormat())) + + def equals(self, LanceFileFormat other): + return True + + @property + def default_extname(self): + return "lance" + + def __reduce__(self): + return LanceFileFormat, tuple() +