Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions cpp/src/arrow/filesystem/path_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,8 @@ std::string ConcatAbstractPath(const std::string& base, const std::string& stem)
DCHECK(!stem.empty());
if (base.empty()) {
return stem;
} else if (base.back() == kSep) {
return base + stem;
} else {
return base + kSep + stem;
}
return EnsureTrailingSlash(base) + RemoveLeadingSlash(stem).to_string();
}

std::string EnsureTrailingSlash(util::string_view v) {
Expand Down
11 changes: 10 additions & 1 deletion cpp/src/arrow/filesystem/s3fs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,16 @@ struct S3Path {
out->bucket = std::string(src.substr(0, first_sep));
out->key = std::string(src.substr(first_sep + 1));
out->key_parts = internal::SplitAbstractPath(out->key);
return internal::ValidateAbstractPathParts(out->key_parts);
return Validate(out);
}

static Status Validate(S3Path* path) {
auto result = internal::ValidateAbstractPathParts(path->key_parts);
if (!result.ok()) {
return Status::Invalid(result.message(), " in path ", path->full_path);
} else {
return result;
}
}

Aws::String ToURLEncodedAwsString() const {
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def parse_git(root, **kwargs):
Field,
Schema,
schema,
unify_schemas,
Array, Tensor,
array, chunked_array, record_batch, table,
SparseCOOTensor, SparseCSRMatrix, SparseCSCMatrix,
Expand Down
27 changes: 20 additions & 7 deletions python/pyarrow/_dataset.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ cdef class Dataset:
shared_ptr[CDataset] wrapped
CDataset* dataset

def __init__(self, children, Schema schema not None):
def __init__(self):
_forbid_instantiation(self.__class__)

cdef void init(self, const shared_ptr[CDataset]& sp):
Expand Down Expand Up @@ -985,12 +985,12 @@ cdef class DatasetFactory:
def __init__(self, list children):
_forbid_instantiation(self.__class__)

cdef init(self, shared_ptr[CDatasetFactory]& sp):
cdef init(self, const shared_ptr[CDatasetFactory]& sp):
self.wrapped = sp
self.factory = sp.get()

@staticmethod
cdef wrap(shared_ptr[CDatasetFactory]& sp):
cdef wrap(const shared_ptr[CDatasetFactory]& sp):
cdef DatasetFactory self = \
DatasetFactory.__new__(DatasetFactory)
self.init(sp)
Expand Down Expand Up @@ -1030,8 +1030,9 @@ cdef class DatasetFactory:
-------
Schema
"""
cdef CResult[shared_ptr[CSchema]] result
cdef CInspectOptions options
cdef:
CInspectOptions options
CResult[shared_ptr[CSchema]] result
with nogil:
result = self.factory.Inspect(options)
return pyarrow_wrap_schema(GetResultValue(result))
Expand All @@ -1054,13 +1055,15 @@ cdef class DatasetFactory:
cdef:
shared_ptr[CSchema] sp_schema
CResult[shared_ptr[CDataset]] result

if schema is not None:
sp_schema = pyarrow_unwrap_schema(schema)
with nogil:
result = self.factory.FinishWithSchema(sp_schema)
else:
with nogil:
result = self.factory.Finish()

return Dataset.wrap(GetResultValue(result))


Expand Down Expand Up @@ -1093,8 +1096,14 @@ cdef class FileSystemFactoryOptions:

__slots__ = () # avoid mistakingly creating attributes

def __init__(self, partition_base_dir=None, exclude_invalid_files=None,
def __init__(self, partition_base_dir=None, partitioning=None,
exclude_invalid_files=None,
list selector_ignore_prefixes=None):
if isinstance(partitioning, PartitioningFactory):
self.partitioning_factory = partitioning
elif isinstance(partitioning, Partitioning):
self.partitioning = partitioning

if partition_base_dir is not None:
self.partition_base_dir = partition_base_dir
if exclude_invalid_files is not None:
Expand Down Expand Up @@ -1245,7 +1254,7 @@ cdef class UnionDatasetFactory(DatasetFactory):
"""

cdef:
CDatasetFactory* union_factory
CUnionDatasetFactory* union_factory

def __init__(self, list factories):
cdef:
Expand All @@ -1255,6 +1264,10 @@ cdef class UnionDatasetFactory(DatasetFactory):
c_factories.push_back(factory.unwrap())
self.init(GetResultValue(CUnionDatasetFactory.Make(c_factories)))

cdef init(self, const shared_ptr[CDatasetFactory]& sp):
DatasetFactory.init(self, sp)
self.union_factory = <CUnionDatasetFactory*> sp.get()


cdef class ScanTask:
"""Read record batches from a range of a single data fragment.
Expand Down
9 changes: 9 additions & 0 deletions python/pyarrow/_fs.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,15 @@ cdef class SubTreeFileSystem(FileSystem):
FileSystem.wrap(self.subtreefs.base_fs())
)

@property
def base_path(self):
return frombytes(self.subtreefs.base_path())

@property
def base_fs(self):
return FileSystem.wrap(self.subtreefs.base_fs())


cdef class _MockFileSystem(FileSystem):

def __init__(self, datetime current_time=None):
Expand Down
Loading