diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp index 9263f1042246..75fe44315faa 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp @@ -1232,11 +1232,6 @@ void IcebergStorageSink::consume(Chunk & chunk) { auto [data_filename, data_filename_in_storage] = filename_generator.generateDataFileName(); data_filenames[partition_key] = data_filename; - if (!statistics.contains(partition_key)) - { - statistics.emplace(partition_key, current_schema->getArray(Iceberg::f_fields)); - } - statistics.at(partition_key).update(part_chunk); auto buffer = object_storage->writeObject( StoredObject(data_filename_in_storage), WriteMode::Rewrite, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, context->getWriteSettings()); @@ -1252,6 +1247,12 @@ void IcebergStorageSink::consume(Chunk & chunk) configuration->getFormat(), *write_buffers[partition_key], *sample_block, context, format_settings); } + if (!statistics.contains(partition_key)) + { + statistics.emplace(partition_key, current_schema->getArray(Iceberg::f_fields)); + } + statistics.at(partition_key).update(part_chunk); + writers[partition_key]->write(getHeader().cloneWithColumns(part_chunk.getColumns())); } } diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py index a5f08571c862..37a591c7cdbb 100644 --- a/tests/integration/test_storage_iceberg/test.py +++ b/tests/integration/test_storage_iceberg/test.py @@ -3850,6 +3850,7 @@ def test_system_tables_partition_sorting_keys(started_cluster, storage_type): assert res == '"bucket(16, id), day(ts)","id desc, hour(ts) asc"' + @pytest.mark.parametrize( "storage_type", ["s3", "azure", "local"], @@ -3916,3 +3917,19 @@ def check_validity_and_get_prunned_files(select_expression): ) == 3 ) + + + +def test_iceberg_write_minmax(started_cluster): + instance = started_cluster.instances["node1"] + TABLE_NAME = "test_iceberg_write_minmax_" + get_uuid_str() + + create_iceberg_table("local", instance, TABLE_NAME, started_cluster, "(x Int32, y Int32)", partition_by="identity(x)") + + instance.query(f"INSERT INTO {TABLE_NAME} VALUES (1, 1), (1, 2)", settings={"allow_experimental_insert_into_iceberg": 1}) + + res = instance.query(f"SELECT x,y FROM {TABLE_NAME} WHERE y=1 ORDER BY ALL").strip() + assert res == "1\t1" + + res = instance.query(f"SELECT x,y FROM {TABLE_NAME} WHERE y=2 ORDER BY ALL").strip() + assert res == "1\t2"