@@ -1066,17 +1066,29 @@ std::string FieldPath::ToString() const {
10661066 return repr;
10671067}
10681068
1069- static Status NonStructError () {
1070- return Status::NotImplemented (" Get child data of non-struct array" );
1071- }
1069+ struct NestedSelectorUtil {
1070+ static Status NonStructError () {
1071+ return Status::NotImplemented (" Get child data of non-struct array" );
1072+ }
1073+
1074+ template <typename T>
1075+ static const DataType* GetType (const T& input) {
1076+ if constexpr (std::is_same_v<T, ArrayData>) {
1077+ return input.type .get ();
1078+ } else {
1079+ return input.type ().get ();
1080+ }
1081+ }
1082+ };
10721083
1073- // Utility class for retrieving a child field/column from a top-level Field, Array, or
1074- // ChunkedArray. The "root" value can either be a single parent or a vector of its
1075- // children.
1084+ // Utility class for retrieving a child field/column from a top-level Field, Array,
1085+ // ArrayData, or ChunkedArray. The "root" value can either be a single parent or a vector
1086+ // of its children.
10761087template <typename T, bool IsFlattening = false >
10771088class NestedSelector {
10781089 public:
10791090 using ArrowType = T;
1091+ using Util = NestedSelectorUtil;
10801092
10811093 explicit NestedSelector (const std::vector<std::shared_ptr<T>>& children)
10821094 : parent_or_children_(&children) {}
@@ -1095,7 +1107,18 @@ class NestedSelector {
10951107 Result<NestedSelector> GetChild (int i) const {
10961108 std::shared_ptr<T> child;
10971109 if (auto parent = get_parent ()) {
1098- ARROW_ASSIGN_OR_RAISE (child, GetChild (*parent, i, pool_));
1110+ const DataType* type = Util::GetType (*parent);
1111+ // We avoid this check for schema fields since it's inconsequential (plus there are
1112+ // tests elsewhere that rely on it not happening)
1113+ if constexpr (!std::is_same_v<T, Field>) {
1114+ if (ARROW_PREDICT_FALSE (type->id () != Type::STRUCT)) {
1115+ return Util::NonStructError ();
1116+ }
1117+ }
1118+ // Bounds-check the index *once* using the parent's type
1119+ if (ARROW_PREDICT_TRUE (i >= 0 && i < type->num_fields ())) {
1120+ ARROW_ASSIGN_OR_RAISE (child, GetChild (*parent, i, pool_));
1121+ }
10991122 } else if (auto children = get_children ()) {
11001123 if (ARROW_PREDICT_TRUE (i >= 0 && static_cast <size_t >(i) < children->size ())) {
11011124 child = (*children)[i];
@@ -1129,10 +1152,10 @@ class NestedSelector {
11291152 *os << " column types: { " ;
11301153 if (auto children = get_children ()) {
11311154 for (const auto & child : *children) {
1132- *os << *child-> type ( ) << " , " ;
1155+ *os << *Util::GetType (*child ) << " , " ;
11331156 }
11341157 } else if (auto parent = get_parent ()) {
1135- for (const auto & field : parent-> type ( )->fields ()) {
1158+ for (const auto & field : Util::GetType (*parent )->fields ()) {
11361159 *os << *field->type () << " , " ;
11371160 }
11381161 }
@@ -1155,21 +1178,33 @@ class NestedSelector {
11551178 }
11561179
11571180 static Result<std::shared_ptr<Field>> GetChild (const Field& field, int i, MemoryPool*) {
1158- if (ARROW_PREDICT_FALSE (i < 0 || i >= field.type ()->num_fields ())) {
1159- return nullptr ;
1160- }
11611181 return field.type ()->field (i);
11621182 }
11631183
1164- static Result<std::shared_ptr<Array>> GetChild (const Array& array, int i,
1165- MemoryPool* pool) {
1166- if (ARROW_PREDICT_FALSE (array.type_id () != Type::STRUCT)) {
1167- return NonStructError ();
1168- }
1169- if (ARROW_PREDICT_FALSE (i < 0 || i >= array.num_fields ())) {
1170- return nullptr ;
1184+ static Result<std::shared_ptr<ArrayData>> GetChild (const ArrayData& data, int i,
1185+ MemoryPool* pool) {
1186+ std::shared_ptr<ArrayData> child_data;
1187+ if constexpr (IsFlattening) {
1188+ // First, convert to an Array so we can use StructArray::GetFlattenedField
1189+ auto array = MakeArray (data.Copy ());
1190+ ARROW_ASSIGN_OR_RAISE (auto child_array, GetChild (*array, i, pool));
1191+ child_data = child_array->data ();
1192+ } else {
1193+ // We could achieve the same result by converting to an Array (via MakeArray),
1194+ // calling StructArray::field(i), and pulling out the new ArrayData. However, this
1195+ // process can be very expensive when there are many columns - so we just
1196+ // reimplement the functionality that we need
1197+ child_data = data.child_data [i];
1198+ if (data.offset != 0 || data.child_data [i]->length != data.length ) {
1199+ child_data = child_data->Slice (data.offset , data.length );
1200+ }
11711201 }
11721202
1203+ return std::move (child_data);
1204+ }
1205+
1206+ static Result<std::shared_ptr<Array>> GetChild (const Array& array, int i,
1207+ MemoryPool* pool) {
11731208 const auto & struct_array = checked_cast<const StructArray&>(array);
11741209 if constexpr (IsFlattening) {
11751210 return struct_array.GetFlattenedField (i, pool);
@@ -1181,22 +1216,15 @@ class NestedSelector {
11811216 static Result<std::shared_ptr<ChunkedArray>> GetChild (const ChunkedArray& chunked_array,
11821217 int i, MemoryPool* pool) {
11831218 const auto & type = *chunked_array.type ();
1184- if (ARROW_PREDICT_FALSE (type.id () != Type::STRUCT)) {
1185- return NonStructError ();
1186- }
1187- if (ARROW_PREDICT_FALSE (i < 0 || i >= type.num_fields ())) {
1188- return nullptr ;
1189- }
11901219
11911220 ArrayVector chunks;
11921221 chunks.reserve (chunked_array.num_chunks ());
11931222 for (const auto & parent_chunk : chunked_array.chunks ()) {
11941223 ARROW_ASSIGN_OR_RAISE (auto chunk, GetChild (*parent_chunk, i, pool));
1195- if (!chunk) return nullptr ;
11961224 chunks.push_back (std::move (chunk));
11971225 }
11981226
1199- return ChunkedArray::Make (std::move (chunks), type.field (i)->type ());
1227+ return std::make_shared<ChunkedArray> (std::move (chunks), type.field (i)->type ());
12001228 }
12011229
12021230 std::shared_ptr<T> owned_parent_;
@@ -1289,7 +1317,11 @@ Result<std::shared_ptr<Schema>> FieldPath::GetAll(const Schema& schm,
12891317}
12901318
12911319Result<std::shared_ptr<Array>> FieldPath::Get (const RecordBatch& batch) const {
1292- return FieldPathGetImpl::Get (this , ZeroCopySelector<Array>(batch.columns ()));
1320+ // Deliberately calling `column_data` here because `RecordBatch::columns` is nontrivial
1321+ ARROW_ASSIGN_OR_RAISE (
1322+ auto data,
1323+ FieldPathGetImpl::Get (this , ZeroCopySelector<ArrayData>(batch.column_data ())));
1324+ return MakeArray (data);
12931325}
12941326
12951327Result<std::shared_ptr<ChunkedArray>> FieldPath::Get (const Table& table) const {
@@ -1301,11 +1333,7 @@ Result<std::shared_ptr<Array>> FieldPath::Get(const Array& array) const {
13011333}
13021334
13031335Result<std::shared_ptr<ArrayData>> FieldPath::Get (const ArrayData& data) const {
1304- // We indirect from ArrayData to Array rather than vice-versa because, when selecting a
1305- // nested column, the StructArray::field method does the work of adjusting the data's
1306- // offset/length if necessary.
1307- ARROW_ASSIGN_OR_RAISE (auto array, Get (*MakeArray (data.Copy ())));
1308- return array->data ();
1336+ return FieldPathGetImpl::Get (this , ZeroCopySelector<ArrayData>(data));
13091337}
13101338
13111339Result<std::shared_ptr<ChunkedArray>> FieldPath::Get (
@@ -1320,8 +1348,7 @@ Result<std::shared_ptr<Array>> FieldPath::GetFlattened(const Array& array,
13201348
13211349Result<std::shared_ptr<ArrayData>> FieldPath::GetFlattened (const ArrayData& data,
13221350 MemoryPool* pool) const {
1323- ARROW_ASSIGN_OR_RAISE (auto array, GetFlattened (*MakeArray (data.Copy ()), pool));
1324- return array->data ();
1351+ return FieldPathGetImpl::Get (this , FlatteningSelector<ArrayData>(data, pool));
13251352}
13261353
13271354Result<std::shared_ptr<ChunkedArray>> FieldPath::GetFlattened (
@@ -1332,7 +1359,10 @@ Result<std::shared_ptr<ChunkedArray>> FieldPath::GetFlattened(
13321359
13331360Result<std::shared_ptr<Array>> FieldPath::GetFlattened (const RecordBatch& batch,
13341361 MemoryPool* pool) const {
1335- return FieldPathGetImpl::Get (this , FlatteningSelector<Array>(batch.columns (), pool));
1362+ ARROW_ASSIGN_OR_RAISE (
1363+ auto data, FieldPathGetImpl::Get (
1364+ this , FlatteningSelector<ArrayData>(batch.column_data (), pool)));
1365+ return MakeArray (data);
13361366}
13371367
13381368Result<std::shared_ptr<ChunkedArray>> FieldPath::GetFlattened (const Table& table,
0 commit comments