Question
I'm using pyiceberg==0.7.1.
I inserted empty value to struct field in iceberg table (glue) using "append" function, but not null value is inserted.
It seems to occur only with struct field.
Why does this occur? How can I avoid it?
my script:
from pyiceberg.catalog import load_catalog
import pyarrow as pa
schema = pa.schema(
[
pa.field("string_field_1", pa.string(), True),
pa.field("int_field_1", pa.int32(), True),
pa.field("float_field_1", pa.float32(), True),
pa.field(
"struct_field_1",
pa.struct(
[
pa.field("string_nested_1", pa.string()),
pa.field("int_item_2", pa.int32()),
pa.field("float_item_2", pa.float32()),
]
),
),
pa.field("list_field_1", pa.list_(pa.string())),
pa.field("list_field_2", pa.list_(pa.int32())),
pa.field("list_field_3", pa.list_(pa.float32())),
pa.field("map_field_1", pa.map_(pa.string(), pa.string())),
pa.field("map_field_2", pa.map_(pa.string(), pa.int32())),
pa.field("map_field_3", pa.map_(pa.string(), pa.float32())),
]
)
records = [
{
"string_field_1": "field_1",
"int_field_1": 123,
"float_field_1": 1.23,
"struct_field_1": {
"string_nested_1": "nest_1",
"int_item_2": 1234,
"float_item_2": 1.234,
},
"list_field_1": ["a", "b", "c"],
"list_field_2": [1, 2, 3],
"list_field_3": [0.1, 0.2, 0.3],
"map_field_1": {"a": "b", "b": "c"},
"map_field_2": {"a": 1, "b": 2},
"map_field_3": {"a": 0.1, "b": 0.2},
},
{
"string_field_1": "field_1_b",
},
]
catalog = load_catalog(
"glue",
**{
"type": "glue",
"glue.region": "us-west-2",
"s3.region": "us-west-2",
},
)
table_name = "iceberg_test"
location = f"s3://tmp_bucket/test/iceberg/{table_name}"
catalog.drop_table(f"test.{table_name}")
table = catalog.create_table(
f"test.{table_name}",
schema,
location=location,
)
pyarrow_table: pa.Table = pa.Table.from_pylist(records, schema=schema)
table.append(pyarrow_table)
athena result:
"string_field_1","int_field_1","float_field_1","struct_field_1","list_field_1","list_field_2","list_field_3","map_field_1","map_field_2","map_field_3"
"field_1","123","1.23","{string_nested_1=nest_1, int_item_2=1234, float_item_2=1.234}","[a, b, c]","[1, 2, 3]","[0.1, 0.2, 0.3]","{a=b, b=c}","{a=1, b=2}","{a=0.1, b=0.2}"
"field_1_b",,,"{string_nested_1=, int_item_2=0, float_item_2=0.0}",,,,,,
Question
I'm using pyiceberg==0.7.1.
I inserted empty value to struct field in iceberg table (glue) using "append" function, but not null value is inserted.
It seems to occur only with struct field.
Why does this occur? How can I avoid it?
my script:
athena result: