Skip to content

[Python] Running a substrait plan that includes an extension type returns results where the column.chunks attribute has an AttributeError #32291

@asfimport

Description

@asfimport

SQL

SELECT l_returnflag, l_linestatus FROM lineitem

 

substrait plan type info for l_returnflag:

{
"fixedChar": {
"length": 1,
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}

fixedChar is an extension type.

 

Error:

pyarrow/table.pxi:1223: in pyarrow.lib.ChunkedArray.chunks.__get__
    ???
pyarrow/table.pxi:1241: in iterchunks
    ???
pyarrow/table.pxi:1185: in pyarrow.lib.ChunkedArray.chunk
    ???
pyarrow/public-api.pxi:200: in pyarrow.lib.pyarrow_wrap_array
    ???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
>   ???
E   AttributeError: 'pyarrow.lib.BaseExtensionType' object has no attribute '__arrow_ext_class__'

 

Reproduction Steps:

import pyarrow as pa
import pyarrow.substrait as substrait

from pyarrow import json as pyarrow_json
from pyarrow.lib import tobytes


substrait_query = <code block below>

json_file_path = os.path.join(<path>, 'lineitem.json')
arrow_data_path_ipc = os.path.join(<path>, 'substrait_data.arrow')
substrait_query = tobytes(substrait_query.replace("FILENAME_PLACEHOLDER", arrow_data_path_ipc))


# Save lineitem.json into IPC arrow binary file
table = pyarrow_json.read_json(json_file_path)

with pa.ipc.RecordBatchFileWriter(filepath, schema=table.schema, arrow_data_path_ipc) as writer:
    writer.write_table(table)


# Run the substrait query plan
buf = pa._substrait._parse_json_plan(substrait_query)
reader = substrait.run_query(buf)
result = reader.read_all()

print(result.columns[0].chunks)

lineitem.json is attached

substrait query plan:

"""
{
  "extensionUris": [],
  "extensions": [],
  "relations": [{
    "root": {
      "input": {
        "project": {
          "common": {
          },
          "input": {
            "read": {
              "common": {
                "direct": {
                }
              },
              "baseSchema": {
                "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"],
                "struct": {
                  "types": [{
                    "i64": {
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "i64": {
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "i64": {
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "i32": {
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "decimal": {
                      "scale": 0,
                      "precision": 19,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "decimal": {
                      "scale": 0,
                      "precision": 19,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "decimal": {
                      "scale": 0,
                      "precision": 19,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "decimal": {
                      "scale": 0,
                      "precision": 19,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "fixedChar": {
                      "length": 1,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "fixedChar": {
                      "length": 1,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "date": {
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "date": {
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "date": {
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "fixedChar": {
                      "length": 25,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "fixedChar": {
                      "length": 10,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "varchar": {
                      "length": 44,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }],
                  "typeVariationReference": 0,
                  "nullability": "NULLABILITY_REQUIRED"
                }
              },
             "local_files": {
                 "items": [
                 {
                     "uri_file": "file://FILENAME_PLACEHOLDER"
                 }
                 ]
             }
            }
          },
          "expressions": [{
            "selection": {
              "directReference": {
                "structField": {
                  "field": 8
                }
              },
              "rootReference": {
              }
            }
          }, {
            "selection": {
              "directReference": {
                "structField": {
                  "field": 9
                }
              },
              "rootReference": {
              }
            }
          }]
        }
      },
      "names": ["L_RETURNFLAG", "L_LINESTATUS"]
    }
  }],
  "expectedTypeUrls": []
} 

 

Reporter: Richard Tia / @richtia

Original Issue Attachments:

Note: This issue was originally created as ARROW-16975. Please see the migration documentation for further details.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions