diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc index 4e679eea661..fcefeda582b 100644 --- a/cpp/src/arrow/dataset/scanner.cc +++ b/cpp/src/arrow/dataset/scanner.cc @@ -141,19 +141,17 @@ Result> GetProjectedSchemaFromExpression( if (call->function_name != "make_struct") { return Status::Invalid("Top level projection expression call must be make_struct"); } - for (const compute::Expression& arg : call->arguments) { - if (auto field_ref = arg.field_ref()) { - if (field_ref->IsName()) { - field_names.emplace(*field_ref->name()); - } else if (field_ref->IsNested()) { - // We keep the top-level field name. - auto nested_field_refs = *field_ref->nested_refs(); - field_names.emplace(*nested_field_refs[0].name()); - } else { - return Status::Invalid( - "No projected schema was supplied and we could not infer the projected " - "schema from the projection expression."); - } + for (auto field_ref : compute::FieldsInExpression(projection)) { + if (field_ref.IsName()) { + field_names.emplace(*field_ref.name()); + } else if (field_ref.IsNested()) { + // We keep the top-level field name. + auto nested_field_refs = *field_ref.nested_refs(); + field_names.emplace(*nested_field_refs[0].name()); + } else { + return Status::Invalid( + "No projected schema was supplied and we could not infer the projected " + "schema from the projection expression."); } } } diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R index 469bcd10aa6..00a9784e801 100644 --- a/r/tests/testthat/test-dplyr-query.R +++ b/r/tests/testthat/test-dplyr-query.R @@ -740,12 +740,19 @@ test_that("Can use nested field refs", { collect(), nested_data ) +}) - # Now with Dataset: make sure column pushdown in ScanNode works +test_that("Can use nested field refs with Dataset", { skip_if_not_available("dataset") + # Now with Dataset: make sure column pushdown in ScanNode works + nested_data <- tibble(int = 1:5, df_col = tibble(a = 6:10, b = 11:15)) + tf <- tempfile() + dir.create(tf) + write_dataset(nested_data, tf) + ds <- open_dataset(tf) + expect_equal( - nested_data %>% - InMemoryDataset$create() %>% + ds %>% mutate( nested = df_col$a, times2 = df_col$a * 2 @@ -759,6 +766,15 @@ test_that("Can use nested field refs", { ) %>% filter(nested > 7) ) + # Issue #34519: error when projecting same name, but only on file dataset + expect_equal( + ds %>% + mutate(int = as.numeric(int)) %>% + collect(), + nested_data %>% + mutate(int = as.numeric(int)) %>% + collect() + ) }) test_that("Use struct_field for $ on non-field-ref", {