diff --git a/ci/scripts/r_windows_build.sh b/ci/scripts/r_windows_build.sh index 145525c2389..d263d51dc86 100755 --- a/ci/scripts/r_windows_build.sh +++ b/ci/scripts/r_windows_build.sh @@ -17,7 +17,7 @@ # specific language governing permissions and limitations # under the License. -set -x +set -ex : ${ARROW_HOME:=$(pwd)} # Make sure it is absolute and exported diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index dbcc5b6ff01..b9d2e5b9ff8 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -138,9 +138,15 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") define_option(ARROW_BUILD_BENCHMARKS_REFERENCE "Build the Arrow micro reference benchmarks" OFF) + if(ARROW_BUILD_SHARED) + set(ARROW_TEST_LINKAGE_DEFAULT "shared") + else() + set(ARROW_TEST_LINKAGE_DEFAULT "static") + endif() + define_option_string(ARROW_TEST_LINKAGE "Linkage of Arrow libraries with unit tests executables." - "shared" + "${ARROW_TEST_LINKAGE_DEFAULT}" "shared" "static") diff --git a/cpp/src/arrow/dataset/dataset.cc b/cpp/src/arrow/dataset/dataset.cc index 2e17cc3ec67..37d0da0bbc5 100644 --- a/cpp/src/arrow/dataset/dataset.cc +++ b/cpp/src/arrow/dataset/dataset.cc @@ -135,6 +135,12 @@ InMemoryDataset::InMemoryDataset(std::shared_ptr table) : Dataset(table->schema()), get_batches_(new TableRecordBatchGenerator(std::move(table))) {} +Result> InMemoryDataset::ReplaceSchema( + std::shared_ptr schema) const { + RETURN_NOT_OK(CheckProjectable(*schema_, *schema)); + return std::make_shared(std::move(schema), get_batches_); +} + FragmentIterator InMemoryDataset::GetFragmentsImpl( std::shared_ptr scan_options) { auto schema = this->schema(); @@ -175,6 +181,17 @@ Result> UnionDataset::Make(std::shared_ptr new UnionDataset(std::move(schema), std::move(children))); } +Result> UnionDataset::ReplaceSchema( + std::shared_ptr schema) const { + auto children = children_; + for (auto& child : children) { + ARROW_ASSIGN_OR_RAISE(child, child->ReplaceSchema(schema)); + } + + return std::shared_ptr( + new UnionDataset(std::move(schema), std::move(children))); +} + FragmentIterator UnionDataset::GetFragmentsImpl(std::shared_ptr options) { return GetFragmentsFromDatasets(children_, options); } diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h index 9f0e26feabe..740263df3f8 100644 --- a/cpp/src/arrow/dataset/dataset.h +++ b/cpp/src/arrow/dataset/dataset.h @@ -122,6 +122,13 @@ class ARROW_DS_EXPORT Dataset : public std::enable_shared_from_this { /// \brief The name identifying the kind of Dataset virtual std::string type_name() const = 0; + /// \brief Return a copy of this Dataset with a different schema. + /// + /// The copy will view the same Fragments. If the new schema is not compatible with the + /// original dataset's schema then an error will be raised. + virtual Result> ReplaceSchema( + std::shared_ptr schema) const = 0; + virtual ~Dataset() = default; protected: @@ -155,7 +162,7 @@ class ARROW_DS_EXPORT InMemoryDataset : public Dataset { }; InMemoryDataset(std::shared_ptr schema, - std::unique_ptr get_batches) + std::shared_ptr get_batches) : Dataset(std::move(schema)), get_batches_(std::move(get_batches)) {} // Convenience constructor taking a fixed list of batches @@ -163,12 +170,15 @@ class ARROW_DS_EXPORT InMemoryDataset : public Dataset { explicit InMemoryDataset(std::shared_ptr
table); - FragmentIterator GetFragmentsImpl(std::shared_ptr options) override; - std::string type_name() const override { return "in-memory"; } - private: - std::unique_ptr get_batches_; + Result> ReplaceSchema( + std::shared_ptr schema) const override; + + protected: + FragmentIterator GetFragmentsImpl(std::shared_ptr options) override; + + std::shared_ptr get_batches_; }; /// \brief A Dataset wrapping child Datasets. @@ -182,13 +192,16 @@ class ARROW_DS_EXPORT UnionDataset : public Dataset { static Result> Make(std::shared_ptr schema, DatasetVector children); - FragmentIterator GetFragmentsImpl(std::shared_ptr options) override; - const DatasetVector& children() const { return children_; } std::string type_name() const override { return "union"; } + Result> ReplaceSchema( + std::shared_ptr schema) const override; + protected: + FragmentIterator GetFragmentsImpl(std::shared_ptr options) override; + explicit UnionDataset(std::shared_ptr schema, DatasetVector children) : Dataset(std::move(schema)), children_(std::move(children)) {} diff --git a/cpp/src/arrow/dataset/dataset_test.cc b/cpp/src/arrow/dataset/dataset_test.cc index 01c5b5439b2..7470efac20a 100644 --- a/cpp/src/arrow/dataset/dataset_test.cc +++ b/cpp/src/arrow/dataset/dataset_test.cc @@ -52,6 +52,35 @@ TEST_F(TestInMemoryFragment, Scan) { class TestInMemoryDataset : public DatasetFixtureMixin {}; +TEST_F(TestInMemoryDataset, ReplaceSchema) { + constexpr int64_t kBatchSize = 1; + constexpr int64_t kNumberBatches = 1; + + SetSchema({field("i32", int32()), field("f64", float64())}); + auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_); + auto reader = ConstantArrayGenerator::Repeat(kNumberBatches, batch); + + auto dataset = std::make_shared( + schema_, RecordBatchVector{static_cast(kNumberBatches), batch}); + + // drop field + ASSERT_OK(dataset->ReplaceSchema(schema({field("i32", int32())})).status()); + // add field (will be materialized as null during projection) + ASSERT_OK(dataset->ReplaceSchema(schema({field("str", utf8())})).status()); + // incompatible type + ASSERT_RAISES(TypeError, + dataset->ReplaceSchema(schema({field("i32", utf8())})).status()); + // incompatible nullability + ASSERT_RAISES( + TypeError, + dataset->ReplaceSchema(schema({field("f64", float64(), /*nullable=*/false)})) + .status()); + // add non-nullable field + ASSERT_RAISES(TypeError, + dataset->ReplaceSchema(schema({field("str", utf8(), /*nullable=*/false)})) + .status()); +} + TEST_F(TestInMemoryDataset, GetFragments) { constexpr int64_t kBatchSize = 1024; constexpr int64_t kNumberBatches = 16; @@ -60,8 +89,6 @@ TEST_F(TestInMemoryDataset, GetFragments) { auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_); auto reader = ConstantArrayGenerator::Repeat(kNumberBatches, batch); - // It is safe to copy fragment multiple time since Scan() does not consume - // the internal array. auto dataset = std::make_shared( schema_, RecordBatchVector{static_cast(kNumberBatches), batch}); @@ -70,6 +97,45 @@ TEST_F(TestInMemoryDataset, GetFragments) { class TestUnionDataset : public DatasetFixtureMixin {}; +TEST_F(TestUnionDataset, ReplaceSchema) { + constexpr int64_t kBatchSize = 1; + constexpr int64_t kNumberBatches = 1; + + SetSchema({field("i32", int32()), field("f64", float64())}); + auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_); + + std::vector> batches{static_cast(kNumberBatches), + batch}; + + DatasetVector children = { + std::make_shared(schema_, batches), + std::make_shared(schema_, batches), + }; + + const int64_t total_batches = children.size() * kNumberBatches; + auto reader = ConstantArrayGenerator::Repeat(total_batches, batch); + + ASSERT_OK_AND_ASSIGN(auto dataset, UnionDataset::Make(schema_, children)); + AssertDatasetEquals(reader.get(), dataset.get()); + + // drop field + ASSERT_OK(dataset->ReplaceSchema(schema({field("i32", int32())})).status()); + // add nullable field (will be materialized as null during projection) + ASSERT_OK(dataset->ReplaceSchema(schema({field("str", utf8())})).status()); + // incompatible type + ASSERT_RAISES(TypeError, + dataset->ReplaceSchema(schema({field("i32", utf8())})).status()); + // incompatible nullability + ASSERT_RAISES( + TypeError, + dataset->ReplaceSchema(schema({field("f64", float64(), /*nullable=*/false)})) + .status()); + // add non-nullable field + ASSERT_RAISES(TypeError, + dataset->ReplaceSchema(schema({field("str", utf8(), /*nullable=*/false)})) + .status()); +} + TEST_F(TestUnionDataset, GetFragments) { constexpr int64_t kBatchSize = 1024; constexpr int64_t kChildPerNode = 2; @@ -105,9 +171,7 @@ TEST_F(TestUnionDataset, GetFragments) { AssertDatasetEquals(reader.get(), root_dataset.get()); } -class TestDataset : public DatasetFixtureMixin {}; - -TEST_F(TestDataset, TrivialScan) { +TEST_F(TestUnionDataset, TrivialScan) { constexpr int64_t kNumberBatches = 16; constexpr int64_t kBatchSize = 1024; @@ -129,6 +193,57 @@ TEST_F(TestDataset, TrivialScan) { AssertDatasetEquals(reader.get(), dataset.get()); } +TEST(TestProjector, CheckProjectable) { + struct Assert { + explicit Assert(FieldVector from) : from_(from) {} + Schema from_; + + void ProjectableTo(FieldVector to) { + ARROW_EXPECT_OK(CheckProjectable(from_, Schema(to))); + } + + void NotProjectableTo(FieldVector to, std::string substr = "") { + EXPECT_RAISES_WITH_MESSAGE_THAT(TypeError, testing::HasSubstr(substr), + CheckProjectable(from_, Schema(to))); + } + }; + + auto i8 = field("i8", int8()); + auto u16 = field("u16", uint16()); + auto str = field("str", utf8()); + auto i8_req = field("i8", int8(), false); + auto u16_req = field("u16", uint16(), false); + auto str_req = field("str", utf8(), false); + + // trivial + Assert({}).ProjectableTo({}); + Assert({i8}).ProjectableTo({i8}); + Assert({i8, u16_req}).ProjectableTo({i8, u16_req}); + + // reorder + Assert({i8, u16}).ProjectableTo({u16, i8}); + Assert({i8, str, u16}).ProjectableTo({u16, i8, str}); + + // drop field(s) + Assert({i8}).ProjectableTo({}); + + // add field(s) + Assert({}).ProjectableTo({i8}); + Assert({}).ProjectableTo({i8, u16}); + Assert({}).NotProjectableTo({u16_req}, + "is not nullable and does not exist in origin schema"); + Assert({i8}).NotProjectableTo({u16_req, i8}); + + // change nullability + Assert({i8}).NotProjectableTo({i8_req}, + "not nullable but is not required in origin schema"); + Assert({i8_req}).ProjectableTo({i8}); + + // change field type + Assert({i8}).NotProjectableTo({field("i8", utf8())}, + "fields had matching names but differing types"); +} + TEST(TestProjector, MismatchedType) { constexpr int64_t kBatchSize = 1024; @@ -229,8 +344,8 @@ TEST(TestProjector, NonTrivial) { AssertBatchesEqual(*expected_batch, *reconciled_batch); } -class TestEndToEnd : public TestDataset { - void SetUp() { +class TestEndToEnd : public TestUnionDataset { + void SetUp() override { bool nullable = false; SetSchema({ field("region", utf8(), nullable), @@ -377,9 +492,9 @@ TEST_F(TestEndToEnd, EndToEndSingleDataset) { ASSERT_OK(scanner_builder->Project(columns)); // An optional filter expression may also be specified. The filter expression - // is evaluated against input rows. Only rows for which the filter evaluates to true are - // yielded. Predicate pushdown optimizations are applied using partition information if - // available. + // is evaluated against input rows. Only rows for which the filter evaluates to true + // are yielded. Predicate pushdown optimizations are applied using partition + // information if available. // // This API decouples predicate pushdown from the Dataset implementation // and partition discovery. @@ -413,7 +528,7 @@ inline std::shared_ptr SchemaFromNames(const std::vector na return schema(fields); } -class TestSchemaUnification : public TestDataset { +class TestSchemaUnification : public TestUnionDataset { public: using i32 = util::optional; using PathAndContent = std::vector>; @@ -487,7 +602,8 @@ class TestSchemaUnification : public TestDataset { ASSERT_OK_AND_ASSIGN(auto ds1, get_source("/dataset/alpha", {ds1_df1, ds1_df2})); ASSERT_OK_AND_ASSIGN(auto ds2, get_source("/dataset/beta", {ds2_df1, ds2_df2})); - // FIXME(bkietz) this is a hack: allow differing schemas for the purposes of this test + // FIXME(bkietz) this is a hack: allow differing schemas for the purposes of this + // test class DisparateSchemasUnionDataset : public UnionDataset { public: DisparateSchemasUnionDataset(std::shared_ptr schema, DatasetVector children) diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc index 66e0e1dd68d..fee471d975f 100644 --- a/cpp/src/arrow/dataset/file_base.cc +++ b/cpp/src/arrow/dataset/file_base.cc @@ -118,6 +118,14 @@ Result> FileSystemDataset::Make( std::move(filesystem), std::move(forest), std::move(partitions))); } +Result> FileSystemDataset::ReplaceSchema( + std::shared_ptr schema) const { + RETURN_NOT_OK(CheckProjectable(*schema_, *schema)); + return std::shared_ptr( + new FileSystemDataset(std::move(schema), partition_expression_, format_, + filesystem_, forest_, partitions_)); +} + std::vector FileSystemDataset::files() const { std::vector files; diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h index 157a4256e1c..e6d893193ff 100644 --- a/cpp/src/arrow/dataset/file_base.h +++ b/cpp/src/arrow/dataset/file_base.h @@ -244,6 +244,9 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset { std::string type_name() const override { return "filesystem"; } + Result> ReplaceSchema( + std::shared_ptr schema) const override; + const std::shared_ptr& format() const { return format_; } std::vector files() const; diff --git a/cpp/src/arrow/dataset/file_test.cc b/cpp/src/arrow/dataset/file_test.cc index 4d6d6f6348c..699ba989587 100644 --- a/cpp/src/arrow/dataset/file_test.cc +++ b/cpp/src/arrow/dataset/file_test.cc @@ -92,6 +92,30 @@ TEST_F(TestFileSystemDataset, Basic) { AssertFilesAre(dataset_, {"A/a", "A/B/b"}); } +TEST_F(TestFileSystemDataset, ReplaceSchema) { + auto schm = schema({field("i32", int32()), field("f64", float64())}); + auto format = std::make_shared(schm); + ASSERT_OK_AND_ASSIGN(auto dataset, + FileSystemDataset::Make(schm, scalar(true), format, fs_, {})); + + // drop field + ASSERT_OK(dataset->ReplaceSchema(schema({field("i32", int32())})).status()); + // add nullable field (will be materialized as null during projection) + ASSERT_OK(dataset->ReplaceSchema(schema({field("str", utf8())})).status()); + // incompatible type + ASSERT_RAISES(TypeError, + dataset->ReplaceSchema(schema({field("i32", utf8())})).status()); + // incompatible nullability + ASSERT_RAISES( + TypeError, + dataset->ReplaceSchema(schema({field("f64", float64(), /*nullable=*/false)})) + .status()); + // add non-nullable field + ASSERT_RAISES(TypeError, + dataset->ReplaceSchema(schema({field("str", utf8(), /*nullable=*/false)})) + .status()); +} + TEST_F(TestFileSystemDataset, RootPartitionPruning) { auto root_partition = ("a"_ == 5).Copy(); MakeDataset({fs::File("a"), fs::File("b")}, root_partition); diff --git a/cpp/src/arrow/dataset/projector.cc b/cpp/src/arrow/dataset/projector.cc index 531c4a56694..9ce90ad0ed3 100644 --- a/cpp/src/arrow/dataset/projector.cc +++ b/cpp/src/arrow/dataset/projector.cc @@ -34,6 +34,33 @@ namespace arrow { namespace dataset { +Status CheckProjectable(const Schema& from, const Schema& to) { + for (const auto& to_field : to.fields()) { + ARROW_ASSIGN_OR_RAISE(auto from_field, FieldRef(to_field->name()).GetOneOrNone(from)); + + if (from_field == nullptr) { + if (to_field->nullable()) continue; + + return Status::TypeError("field ", to_field->ToString(), + " is not nullable and does not exist in origin schema ", + from); + } + + if (!from_field->type()->Equals(to_field->type())) { + return Status::TypeError("fields had matching names but differing types. From: ", + from_field->ToString(), " To: ", to_field->ToString()); + } + + if (from_field->nullable() && !to_field->nullable()) { + return Status::TypeError("field ", to_field->ToString(), + " is not nullable but is not required in origin schema ", + from); + } + } + + return Status::OK(); +} + RecordBatchProjector::RecordBatchProjector(std::shared_ptr to) : to_(std::move(to)), missing_columns_(to_->num_fields(), nullptr), @@ -86,32 +113,23 @@ Result> RecordBatchProjector::Project( Status RecordBatchProjector::SetInputSchema(std::shared_ptr from, MemoryPool* pool) { + RETURN_NOT_OK(CheckProjectable(*from, *to_)); from_ = std::move(from); for (int i = 0; i < to_->num_fields(); ++i) { - const auto& field = to_->field(i); - FieldRef ref(field->name()); - auto matches = ref.FindAll(*from_); + ARROW_ASSIGN_OR_RAISE(auto match, + FieldRef(to_->field(i)->name()).FindOneOrNone(*from_)); - if (matches.empty()) { + if (match.indices().empty()) { // Mark column i as missing by setting missing_columns_[i] // to a non-null placeholder. ARROW_ASSIGN_OR_RAISE(missing_columns_[i], MakeArrayOfNull(to_->field(i)->type(), 0, pool)); column_indices_[i] = kNoMatch; } else { - RETURN_NOT_OK(ref.CheckNonMultiple(matches, *from_)); - int matching_index = matches[0].indices()[0]; - - if (!from_->field(matching_index)->Equals(field, /*check_metadata=*/false)) { - return Status::TypeError("fields had matching names but were not equivalent ", - from_->field(matching_index)->ToString(), " vs ", - field->ToString()); - } - // Mark column i as not missing by setting missing_columns_[i] to nullptr missing_columns_[i] = nullptr; - column_indices_[i] = matching_index; + column_indices_[i] = match.indices()[0]; } } return Status::OK(); diff --git a/cpp/src/arrow/dataset/projector.h b/cpp/src/arrow/dataset/projector.h index 13a0ffb1938..8fd157f7ece 100644 --- a/cpp/src/arrow/dataset/projector.h +++ b/cpp/src/arrow/dataset/projector.h @@ -27,6 +27,8 @@ namespace arrow { namespace dataset { +ARROW_DS_EXPORT Status CheckProjectable(const Schema& from, const Schema& to); + /// \brief Project a RecordBatch to a given schema. /// /// Projected record batches will reorder columns from input record batches when possible, diff --git a/cpp/src/arrow/result.h b/cpp/src/arrow/result.h index 13dd3870b1a..b492057a8b2 100644 --- a/cpp/src/arrow/result.h +++ b/cpp/src/arrow/result.h @@ -45,7 +45,7 @@ ARROW_EXPORT void InvalidValueOrDie(const Status& st); } // namespace internal -// A class for representing either a usable value, or an error. +/// A class for representing either a usable value, or an error. /// /// A Result object either contains a value of type `T` or a Status object /// explaining why such a value is not present. The type `T` must be @@ -98,7 +98,7 @@ ARROW_EXPORT void InvalidValueOrDie(const Status& st); /// arrow::Result CalculateFoo(); /// ``` template -class Result : public util::EqualityComparable> { +class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable> { template friend class Result; diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h index 195ed71cce9..aa1f2e151e5 100644 --- a/cpp/src/arrow/status.h +++ b/cpp/src/arrow/status.h @@ -95,11 +95,6 @@ enum class StatusCode : char { AlreadyExists = 45 }; -#if defined(__clang__) -// Only clang supports warn_unused_result as a type annotation. -class ARROW_MUST_USE_RESULT ARROW_EXPORT Status; -#endif - /// \brief An opaque class that allows subsystems to retain /// additional information inside the Status. class ARROW_EXPORT StatusDetail { @@ -124,8 +119,8 @@ class ARROW_EXPORT StatusDetail { /// /// Additionally, if an error occurred, a specific error message is generally /// attached. -class ARROW_EXPORT Status : public util::EqualityComparable, - public util::ToStringOstreamable { +class ARROW_MUST_USE_TYPE ARROW_EXPORT Status : public util::EqualityComparable, + public util::ToStringOstreamable { public: // Create a success status. Status() noexcept : state_(NULLPTR) {} diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h index 6119e0f0fc7..14da481f0bb 100644 --- a/cpp/src/arrow/testing/gtest_util.h +++ b/cpp/src/arrow/testing/gtest_util.h @@ -97,11 +97,12 @@ class Result; #define ASSERT_OK_NO_THROW(expr) ASSERT_NO_THROW(ASSERT_OK(expr)) -#define ARROW_EXPECT_OK(expr) \ - do { \ - auto _res = (expr); \ - ::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \ - EXPECT_TRUE(_st.ok()); \ +#define ARROW_EXPECT_OK(expr) \ + do { \ + auto _res = (expr); \ + ::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \ + EXPECT_TRUE(_st.ok()) << "'" ARROW_STRINGIFY(expr) "' failed with " \ + << _st.ToString(); \ } while (false) #define ABORT_NOT_OK(expr) \ diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h index edb03d31c14..ae8d56d098b 100644 --- a/cpp/src/arrow/util/macros.h +++ b/cpp/src/arrow/util/macros.h @@ -68,6 +68,13 @@ #define ARROW_MUST_USE_RESULT #endif +#if defined(__clang__) +// Only clang supports warn_unused_result as a type annotation. +#define ARROW_MUST_USE_TYPE ARROW_MUST_USE_RESULT +#else +#define ARROW_MUST_USE_TYPE +#endif + // ---------------------------------------------------------------------- // C++/CLI support macros (see ARROW-1134) diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx index 556faa74539..ab42aa7d99a 100644 --- a/python/pyarrow/_dataset.pyx +++ b/python/pyarrow/_dataset.pyx @@ -62,7 +62,7 @@ cdef class Dataset: self.dataset = sp.get() @staticmethod - cdef wrap(shared_ptr[CDataset]& sp): + cdef wrap(const shared_ptr[CDataset]& sp): cdef Dataset self typ = frombytes(sp.get().type_name()) @@ -92,6 +92,18 @@ cdef class Dataset: else: return Expression.wrap(expr) + def replace_schema(self, Schema schema not None): + """ + Return a copy of this Dataset with a different schema. + + The copy will view the same Fragments. If the new schema is not + compatible with the original dataset's schema then an error will + be raised. + """ + cdef shared_ptr[CDataset] copy = GetResultValue( + self.dataset.ReplaceSchema(pyarrow_unwrap_schema(schema))) + return Dataset.wrap(move(copy)) + def get_fragments(self, columns=None, filter=None): """Returns an iterator over the fragments in this dataset. diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd index 467a84c9319..53d5cde9dc9 100644 --- a/python/pyarrow/includes/libarrow_dataset.pxd +++ b/python/pyarrow/includes/libarrow_dataset.pxd @@ -187,6 +187,8 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil: const shared_ptr[CExpression] & partition_expression() c_string type_name() + CResult[shared_ptr[CDataset]] ReplaceSchema(shared_ptr[CSchema]) + CResult[shared_ptr[CScannerBuilder]] NewScanWithContext "NewScan"( shared_ptr[CScanContext] context) CResult[shared_ptr[CScannerBuilder]] NewScan()