Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 11 additions & 13 deletions cpp/src/arrow/dataset/scanner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,19 +141,17 @@ Result<std::shared_ptr<Schema>> GetProjectedSchemaFromExpression(
if (call->function_name != "make_struct") {
return Status::Invalid("Top level projection expression call must be make_struct");
}
for (const compute::Expression& arg : call->arguments) {
if (auto field_ref = arg.field_ref()) {
if (field_ref->IsName()) {
field_names.emplace(*field_ref->name());
} else if (field_ref->IsNested()) {
// We keep the top-level field name.
auto nested_field_refs = *field_ref->nested_refs();
field_names.emplace(*nested_field_refs[0].name());
} else {
return Status::Invalid(
"No projected schema was supplied and we could not infer the projected "
"schema from the projection expression.");
}
for (auto field_ref : compute::FieldsInExpression(projection)) {
if (field_ref.IsName()) {
field_names.emplace(*field_ref.name());
} else if (field_ref.IsNested()) {
// We keep the top-level field name.
auto nested_field_refs = *field_ref.nested_refs();
field_names.emplace(*nested_field_refs[0].name());
} else {
return Status::Invalid(
"No projected schema was supplied and we could not infer the projected "
"schema from the projection expression.");
}
}
}
Expand Down
22 changes: 19 additions & 3 deletions r/tests/testthat/test-dplyr-query.R
Original file line number Diff line number Diff line change
Expand Up @@ -740,12 +740,19 @@ test_that("Can use nested field refs", {
collect(),
nested_data
)
})

# Now with Dataset: make sure column pushdown in ScanNode works
test_that("Can use nested field refs with Dataset", {
skip_if_not_available("dataset")
# Now with Dataset: make sure column pushdown in ScanNode works
nested_data <- tibble(int = 1:5, df_col = tibble(a = 6:10, b = 11:15))
tf <- tempfile()
dir.create(tf)
write_dataset(nested_data, tf)
ds <- open_dataset(tf)

expect_equal(
nested_data %>%
InMemoryDataset$create() %>%
ds %>%
mutate(
nested = df_col$a,
times2 = df_col$a * 2
Expand All @@ -759,6 +766,15 @@ test_that("Can use nested field refs", {
) %>%
filter(nested > 7)
)
# Issue #34519: error when projecting same name, but only on file dataset
expect_equal(
ds %>%
mutate(int = as.numeric(int)) %>%
collect(),
nested_data %>%
mutate(int = as.numeric(int)) %>%
collect()
)
})

test_that("Use struct_field for $ on non-field-ref", {
Expand Down