From 44faec6542e2a0d8c7d22558aa6525410e4d93b6 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 4 Apr 2025 09:44:31 -0300 Subject: [PATCH 1/8] implement clear parquet metadata cache command, no tests for cluster yet --- src/Access/Common/AccessType.h | 1 + src/Interpreters/InterpreterSystemQuery.cpp | 13 +++++++++++ src/Parsers/ASTSystemQuery.cpp | 1 + src/Parsers/ASTSystemQuery.h | 1 + ...et_object_storage_metadata_cache.reference | 3 +++ ..._parquet_object_storage_metadata_cache.sql | 22 +++++++++++++++++++ 6 files changed, 41 insertions(+) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index facb71134244..dfe0fb800374 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -178,6 +178,7 @@ enum class AccessType : uint8_t M(SYSTEM_DROP_SCHEMA_CACHE, "SYSTEM DROP SCHEMA CACHE, DROP SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_FORMAT_SCHEMA_CACHE, "SYSTEM DROP FORMAT SCHEMA CACHE, DROP FORMAT SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_S3_CLIENT_CACHE, "SYSTEM DROP S3 CLIENT, DROP S3 CLIENT CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ + M(SYSTEM_DROP_PARQUET_METADATA_CACHE, "SYSTEM DROP PARQUET METADATA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_CACHE, "DROP CACHE", GROUP, SYSTEM) \ M(SYSTEM_RELOAD_CONFIG, "RELOAD CONFIG", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_USERS, "RELOAD USERS", GLOBAL, SYSTEM_RELOAD) \ diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index ac5f03e3b243..9f47020e4b92 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -74,6 +74,10 @@ #include #endif +#if USE_PARQUET +#include +#endif + #if USE_AWS_S3 #include #endif @@ -410,6 +414,14 @@ BlockIO InterpreterSystemQuery::execute() break; } + case Type::DROP_PARQUET_METADATA_CACHE: + { +#if USE_PARQUET + getContext()->checkAccess(AccessType::SYSTEM_DROP_PARQUET_METADATA_CACHE); + ParquetFileMetaDataCache::instance()->clear(); +#endif + break; + } case Type::DROP_COMPILED_EXPRESSION_CACHE: #if USE_EMBEDDED_COMPILER getContext()->checkAccess(AccessType::SYSTEM_DROP_COMPILED_EXPRESSION_CACHE); @@ -1423,6 +1435,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() case Type::DROP_PAGE_CACHE: case Type::DROP_SCHEMA_CACHE: case Type::DROP_FORMAT_SCHEMA_CACHE: + case Type::DROP_PARQUET_METADATA_CACHE: case Type::DROP_S3_CLIENT_CACHE: { required_access.emplace_back(AccessType::SYSTEM_DROP_CACHE); diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 6702770f9186..8e754cc5f0ec 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -416,6 +416,7 @@ void ASTSystemQuery::formatImpl(WriteBuffer & ostr, const FormatSettings & setti case Type::DROP_INDEX_UNCOMPRESSED_CACHE: case Type::DROP_COMPILED_EXPRESSION_CACHE: case Type::DROP_S3_CLIENT_CACHE: + case Type::DROP_PARQUET_METADATA_CACHE: case Type::RESET_COVERAGE: case Type::RESTART_REPLICAS: case Type::JEMALLOC_PURGE: diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index bd3279d5faad..1faf1d78a91f 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -39,6 +39,7 @@ class ASTSystemQuery : public IAST, public ASTQueryWithOnCluster DROP_SCHEMA_CACHE, DROP_FORMAT_SCHEMA_CACHE, DROP_S3_CLIENT_CACHE, + DROP_PARQUET_METADATA_CACHE, STOP_LISTEN, START_LISTEN, RESTART_REPLICAS, diff --git a/tests/queries/0_stateless/03299_parquet_object_storage_metadata_cache.reference b/tests/queries/0_stateless/03299_parquet_object_storage_metadata_cache.reference index f5c1b1de44a4..c87ad9008b60 100644 --- a/tests/queries/0_stateless/03299_parquet_object_storage_metadata_cache.reference +++ b/tests/queries/0_stateless/03299_parquet_object_storage_metadata_cache.reference @@ -3,3 +3,6 @@ 10 10 10 +10 +0 +10 diff --git a/tests/queries/0_stateless/03299_parquet_object_storage_metadata_cache.sql b/tests/queries/0_stateless/03299_parquet_object_storage_metadata_cache.sql index 6153ad30b332..2a1934e7c963 100644 --- a/tests/queries/0_stateless/03299_parquet_object_storage_metadata_cache.sql +++ b/tests/queries/0_stateless/03299_parquet_object_storage_metadata_cache.sql @@ -36,4 +36,26 @@ AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1; +SYSTEM DROP PARQUET METADATA CACHE; + +SELECT COUNT(*) +FROM s3(s3_conn, filename = 'test_03262_*', format = Parquet) +SETTINGS input_format_parquet_use_metadata_cache=1, optimize_count_from_files=0, log_comment='test_03262_parquet_metadata_cache_cache_empty'; + +SYSTEM FLUSH LOGS; + +SELECT ProfileEvents['ParquetMetaDataCacheHits'] +FROM system.query_log +where log_comment = 'test_03262_parquet_metadata_cache_cache_empty' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; + +SELECT ProfileEvents['ParquetMetaDataCacheMisses'] +FROM system.query_log +where log_comment = 'test_03262_parquet_metadata_cache_cache_empty' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; + DROP TABLE t_parquet_03262; From 6971379b437e71743df59166496ec7bf05abb647 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Sun, 6 Apr 2025 12:35:14 -0300 Subject: [PATCH 2/8] add cluster tests --- .../__init__.py | 0 .../config.d/cluster.xml | 20 ++++++ .../test_parquet_drop_metadata_cache/test.py | 69 +++++++++++++++++++ 3 files changed, 89 insertions(+) create mode 100644 tests/integration/test_parquet_drop_metadata_cache/__init__.py create mode 100644 tests/integration/test_parquet_drop_metadata_cache/config.d/cluster.xml create mode 100644 tests/integration/test_parquet_drop_metadata_cache/test.py diff --git a/tests/integration/test_parquet_drop_metadata_cache/__init__.py b/tests/integration/test_parquet_drop_metadata_cache/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integration/test_parquet_drop_metadata_cache/config.d/cluster.xml b/tests/integration/test_parquet_drop_metadata_cache/config.d/cluster.xml new file mode 100644 index 000000000000..1388c0788fc0 --- /dev/null +++ b/tests/integration/test_parquet_drop_metadata_cache/config.d/cluster.xml @@ -0,0 +1,20 @@ + + + + + + node1 + 9000 + + + node2 + 9000 + + + node3 + 9000 + + + + + \ No newline at end of file diff --git a/tests/integration/test_parquet_drop_metadata_cache/test.py b/tests/integration/test_parquet_drop_metadata_cache/test.py new file mode 100644 index 000000000000..b775f7d8389e --- /dev/null +++ b/tests/integration/test_parquet_drop_metadata_cache/test.py @@ -0,0 +1,69 @@ + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance("node1", main_configs=["configs/config.d/cluster.xml"], with_zookeeper=True) +node2 = cluster.add_instance("node2", main_configs=["configs/config.d/cluster.xml"], with_zookeeper=True) +node3 = cluster.add_instance("node3", main_configs=["configs/config.d/cluster.xml"], with_zookeeper=True) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_clear_cache_on_cluster(started_cluster): + node1.query("INSERT INTO TABLE FUNCTION s3(s3_conn, filename='test_clear_cache/{_partition_id}.parquet', format=Parquet) PARTITION BY number SELECT number FROM numbers(3)") + + node1.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/1.parquet', format=Parquet) SETTINGS log_comment='cold_cache'") + node2.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/2.parquet', format=Parquet) SETTINGS log_comment='cold_cache'") + node3.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/3.parquet', format=Parquet) SETTINGS log_comment='cold_cache'") + + node1.query("SYSTEM FLUSH LOGS ON CLUSTER parquet_clear_cache_cluster") + + cold_cache_result_n1 = node1.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cold_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") + cold_cache_result_n2 = node2.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cold_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") + cold_cache_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cold_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") + + assert(cold_cache_result_n1 == cold_cache_result_n2 == cold_cache_result_n3) + assert(cold_cache_result_n1 == '0') + + + node1.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/1.parquet', format=Parquet) SETTINGS log_comment='hot_cache'") + node2.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/2.parquet', format=Parquet) SETTINGS log_comment='hot_cache'") + node3.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/3.parquet', format=Parquet) SETTINGS log_comment='hot_cache'") + + node1.query("SYSTEM FLUSH LOGS ON CLUSTER parquet_clear_cache_cluster") + + warm_cache_result_n1 = node1.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cold_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") + warm_cache_result_n2 = node2.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cold_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") + warm_cache_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cold_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") + + assert(warm_cache_result_n1 == warm_cache_result_n2 == warm_cache_result_n3) + assert(warm_cache_result_n1 == '1') + + node1.query("SYSTEM DROP PARQUET METADATA CACHE ON CLUSTER parquet_clear_cache_cluster") + + node1.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/1.parquet', format=Parquet) SETTINGS log_comment='cache_after_drop'") + node2.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/2.parquet', format=Parquet) SETTINGS log_comment='cache_after_drop'") + node3.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/3.parquet', format=Parquet) SETTINGS log_comment='cache_after_drop'") + + node1.query("SYSTEM FLUSH LOGS ON CLUSTER parquet_clear_cache_cluster") + + cache_after_drop_result_n1 = node1.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") + cache_after_drop_result_n2 = node2.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") + cache_after_drop_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") + + assert(cache_after_drop_result_n1 == cache_after_drop_result_n2 == cache_after_drop_result_n3) + assert(cache_after_drop_result_n1 == '0') + + misses_after_drop_result_n1 = node1.query("SELECT ProfileEvents['ParquetMetaDataCacheMisses'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") + misses_after_drop_result_n2 = node2.query("SELECT ProfileEvents['ParquetMetaDataCacheMisses'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") + misses_after_drop_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheMisses'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") + + assert(misses_after_drop_result_n1 == misses_after_drop_result_n2 == misses_after_drop_result_n3) + assert(misses_after_drop_result_n1 == '1') From 58f3cb10039cce719134ee9a278f1b57b6a38c54 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 7 Apr 2025 09:33:54 -0300 Subject: [PATCH 3/8] Update test.py --- tests/integration/test_parquet_drop_metadata_cache/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_parquet_drop_metadata_cache/test.py b/tests/integration/test_parquet_drop_metadata_cache/test.py index b775f7d8389e..3f57288a5486 100644 --- a/tests/integration/test_parquet_drop_metadata_cache/test.py +++ b/tests/integration/test_parquet_drop_metadata_cache/test.py @@ -1,4 +1,4 @@ - +import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) From 0baf53ddd602e1a2af2d58c0c71addf471a2c348 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 7 Apr 2025 09:57:35 -0300 Subject: [PATCH 4/8] Update test.py --- tests/integration/test_parquet_drop_metadata_cache/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_parquet_drop_metadata_cache/test.py b/tests/integration/test_parquet_drop_metadata_cache/test.py index 3f57288a5486..b7f0962f97cf 100644 --- a/tests/integration/test_parquet_drop_metadata_cache/test.py +++ b/tests/integration/test_parquet_drop_metadata_cache/test.py @@ -8,7 +8,7 @@ @pytest.fixture(scope="module") -def start_cluster(): +def started_cluster(): try: cluster.start() yield cluster From 7f60fc29ee3457715537e45a8100cf0ba34b0798 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 7 Apr 2025 10:08:10 -0300 Subject: [PATCH 5/8] move config dir --- .../{ => configs}/config.d/cluster.xml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/integration/test_parquet_drop_metadata_cache/{ => configs}/config.d/cluster.xml (100%) diff --git a/tests/integration/test_parquet_drop_metadata_cache/config.d/cluster.xml b/tests/integration/test_parquet_drop_metadata_cache/configs/config.d/cluster.xml similarity index 100% rename from tests/integration/test_parquet_drop_metadata_cache/config.d/cluster.xml rename to tests/integration/test_parquet_drop_metadata_cache/configs/config.d/cluster.xml From f612bd5508dcd921edee0b8174deb1551b532a2b Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 7 Apr 2025 11:16:54 -0300 Subject: [PATCH 6/8] fix minioendpoint and withminio --- .../test_parquet_drop_metadata_cache/test.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/integration/test_parquet_drop_metadata_cache/test.py b/tests/integration/test_parquet_drop_metadata_cache/test.py index b7f0962f97cf..89cd40b0ece8 100644 --- a/tests/integration/test_parquet_drop_metadata_cache/test.py +++ b/tests/integration/test_parquet_drop_metadata_cache/test.py @@ -2,7 +2,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance("node1", main_configs=["configs/config.d/cluster.xml"], with_zookeeper=True) +node1 = cluster.add_instance("node1", main_configs=["configs/config.d/cluster.xml"], with_zookeeper=True, with_minio=True) node2 = cluster.add_instance("node2", main_configs=["configs/config.d/cluster.xml"], with_zookeeper=True) node3 = cluster.add_instance("node3", main_configs=["configs/config.d/cluster.xml"], with_zookeeper=True) @@ -17,11 +17,11 @@ def started_cluster(): def test_clear_cache_on_cluster(started_cluster): - node1.query("INSERT INTO TABLE FUNCTION s3(s3_conn, filename='test_clear_cache/{_partition_id}.parquet', format=Parquet) PARTITION BY number SELECT number FROM numbers(3)") + node1.query("INSERT INTO TABLE FUNCTION s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', 'Parquet') PARTITION BY number SELECT number FROM numbers(3)") - node1.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/1.parquet', format=Parquet) SETTINGS log_comment='cold_cache'") - node2.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/2.parquet', format=Parquet) SETTINGS log_comment='cold_cache'") - node3.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/3.parquet', format=Parquet) SETTINGS log_comment='cold_cache'") + node1.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='cold_cache'") + node2.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='cold_cache'") + node3.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='cold_cache'") node1.query("SYSTEM FLUSH LOGS ON CLUSTER parquet_clear_cache_cluster") @@ -33,9 +33,9 @@ def test_clear_cache_on_cluster(started_cluster): assert(cold_cache_result_n1 == '0') - node1.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/1.parquet', format=Parquet) SETTINGS log_comment='hot_cache'") - node2.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/2.parquet', format=Parquet) SETTINGS log_comment='hot_cache'") - node3.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/3.parquet', format=Parquet) SETTINGS log_comment='hot_cache'") + node1.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='hot_cache'") + node2.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='hot_cache'") + node3.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='hot_cache'") node1.query("SYSTEM FLUSH LOGS ON CLUSTER parquet_clear_cache_cluster") @@ -48,9 +48,9 @@ def test_clear_cache_on_cluster(started_cluster): node1.query("SYSTEM DROP PARQUET METADATA CACHE ON CLUSTER parquet_clear_cache_cluster") - node1.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/1.parquet', format=Parquet) SETTINGS log_comment='cache_after_drop'") - node2.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/2.parquet', format=Parquet) SETTINGS log_comment='cache_after_drop'") - node3.query("SELECT * FROM s3(s3_conn, filename='test_clear_cache/3.parquet', format=Parquet) SETTINGS log_comment='cache_after_drop'") + node1.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='cache_after_drop'") + node2.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='cache_after_drop'") + node3.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='cache_after_drop'") node1.query("SYSTEM FLUSH LOGS ON CLUSTER parquet_clear_cache_cluster") From 32f5d5842b6fd4820800214930fc07b7a70668f3 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 7 Apr 2025 11:36:33 -0300 Subject: [PATCH 7/8] fix integ tests --- .../test_parquet_drop_metadata_cache/test.py | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/integration/test_parquet_drop_metadata_cache/test.py b/tests/integration/test_parquet_drop_metadata_cache/test.py index 89cd40b0ece8..03db9c05b081 100644 --- a/tests/integration/test_parquet_drop_metadata_cache/test.py +++ b/tests/integration/test_parquet_drop_metadata_cache/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import time cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance("node1", main_configs=["configs/config.d/cluster.xml"], with_zookeeper=True, with_minio=True) @@ -17,11 +18,11 @@ def started_cluster(): def test_clear_cache_on_cluster(started_cluster): - node1.query("INSERT INTO TABLE FUNCTION s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', 'Parquet') PARTITION BY number SELECT number FROM numbers(3)") + node1.query("INSERT INTO TABLE FUNCTION s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', 'Parquet') PARTITION BY number SELECT number FROM numbers(1, 3)") - node1.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='cold_cache'") - node2.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='cold_cache'") - node3.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='cold_cache'") + node1.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/1.parquet', 'minio', 'minio123', 'Parquet') SETTINGS log_comment='cold_cache'") + node2.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/2.parquet', 'minio', 'minio123', 'Parquet') SETTINGS log_comment='cold_cache'") + node3.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/3.parquet', 'minio', 'minio123', 'Parquet') SETTINGS log_comment='cold_cache'") node1.query("SYSTEM FLUSH LOGS ON CLUSTER parquet_clear_cache_cluster") @@ -30,27 +31,26 @@ def test_clear_cache_on_cluster(started_cluster): cold_cache_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cold_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") assert(cold_cache_result_n1 == cold_cache_result_n2 == cold_cache_result_n3) - assert(cold_cache_result_n1 == '0') + assert(cold_cache_result_n1 == '0\n') - - node1.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='hot_cache'") - node2.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='hot_cache'") - node3.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='hot_cache'") + node1.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/1.parquet', 'minio', 'minio123', 'Parquet') SETTINGS log_comment='hot_cache'") + node2.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/2.parquet', 'minio', 'minio123', 'Parquet') SETTINGS log_comment='hot_cache'") + node3.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/3.parquet', 'minio', 'minio123', 'Parquet') SETTINGS log_comment='hot_cache'") node1.query("SYSTEM FLUSH LOGS ON CLUSTER parquet_clear_cache_cluster") - warm_cache_result_n1 = node1.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cold_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") - warm_cache_result_n2 = node2.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cold_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") - warm_cache_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cold_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") + warm_cache_result_n1 = node1.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'hot_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") + warm_cache_result_n2 = node2.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'hot_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") + warm_cache_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'hot_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") assert(warm_cache_result_n1 == warm_cache_result_n2 == warm_cache_result_n3) - assert(warm_cache_result_n1 == '1') + assert(warm_cache_result_n1 == '1\n') node1.query("SYSTEM DROP PARQUET METADATA CACHE ON CLUSTER parquet_clear_cache_cluster") - node1.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='cache_after_drop'") - node2.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='cache_after_drop'") - node3.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/{_partition_id}.parquet', 'minio', 'minio123', format=Parquet) SETTINGS log_comment='cache_after_drop'") + node1.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/1.parquet', 'minio', 'minio123', 'Parquet') SETTINGS log_comment='cache_after_drop'") + node2.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/2.parquet', 'minio', 'minio123', 'Parquet') SETTINGS log_comment='cache_after_drop'") + node3.query("SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/3.parquet', 'minio', 'minio123', 'Parquet') SETTINGS log_comment='cache_after_drop'") node1.query("SYSTEM FLUSH LOGS ON CLUSTER parquet_clear_cache_cluster") @@ -59,11 +59,11 @@ def test_clear_cache_on_cluster(started_cluster): cache_after_drop_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") assert(cache_after_drop_result_n1 == cache_after_drop_result_n2 == cache_after_drop_result_n3) - assert(cache_after_drop_result_n1 == '0') + assert(cache_after_drop_result_n1 == '0\n') misses_after_drop_result_n1 = node1.query("SELECT ProfileEvents['ParquetMetaDataCacheMisses'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") misses_after_drop_result_n2 = node2.query("SELECT ProfileEvents['ParquetMetaDataCacheMisses'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") misses_after_drop_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheMisses'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") assert(misses_after_drop_result_n1 == misses_after_drop_result_n2 == misses_after_drop_result_n3) - assert(misses_after_drop_result_n1 == '1') + assert(misses_after_drop_result_n1 == '1\n') From 97ca59e1a2aca4016987ed933b5761b23dd3c9d2 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 7 Apr 2025 11:59:00 -0300 Subject: [PATCH 8/8] throw in case support is disabled and command is used --- src/Interpreters/InterpreterSystemQuery.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 910fae92c92c..955cf59d6e8b 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -424,8 +424,10 @@ BlockIO InterpreterSystemQuery::execute() #if USE_PARQUET getContext()->checkAccess(AccessType::SYSTEM_DROP_PARQUET_METADATA_CACHE); ParquetFileMetaDataCache::instance()->clear(); -#endif break; +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "The server was compiled without the support for Parquet"); +#endif } case Type::DROP_COMPILED_EXPRESSION_CACHE: #if USE_EMBEDDED_COMPILER