From 6e8903999e62d52ac7cbf9b72eef786c6c5072bc Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 30 Sep 2025 20:12:30 +0000
Subject: [PATCH 001/112] Backport #87442 to 25.8: Fix bool decoding in parquet
 reader v3

---
 src/Processors/Formats/Impl/Parquet/Decoding.cpp          | 2 +-
 .../queries/0_stateless/03630_parquet_bool_bug.reference  | 2 ++
 tests/queries/0_stateless/03630_parquet_bool_bug.sql      | 8 ++++++++
 3 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/03630_parquet_bool_bug.reference
 create mode 100644 tests/queries/0_stateless/03630_parquet_bool_bug.sql

diff --git a/src/Processors/Formats/Impl/Parquet/Decoding.cpp b/src/Processors/Formats/Impl/Parquet/Decoding.cpp
index 40f4efcf68c9..a3cfb47ee8f9 100644
--- a/src/Processors/Formats/Impl/Parquet/Decoding.cpp
+++ b/src/Processors/Formats/Impl/Parquet/Decoding.cpp
@@ -249,7 +249,7 @@ struct PlainBooleanDecoder : public PageDecoder
                 /// x = 00000000 000000hg 00000000 000000fe 00000000 000000dc 00000000 000000ba
                 x = (x | (x <<  7)) & 0x0101010101010101ul;
                 /// x = 0000000h 0000000g 0000000f 0000000e 0000000d 0000000c 0000000b 0000000a
-                memcpy(to + i * 8, &x, 8);
+                memcpy(to + i, &x, 8);
                 i += 8;
             }
             else
diff --git a/tests/queries/0_stateless/03630_parquet_bool_bug.reference b/tests/queries/0_stateless/03630_parquet_bool_bug.reference
new file mode 100644
index 000000000000..0404dc20010c
--- /dev/null
+++ b/tests/queries/0_stateless/03630_parquet_bool_bug.reference
@@ -0,0 +1,2 @@
+8
+256
diff --git a/tests/queries/0_stateless/03630_parquet_bool_bug.sql b/tests/queries/0_stateless/03630_parquet_bool_bug.sql
new file mode 100644
index 000000000000..e80f1dc30b5a
--- /dev/null
+++ b/tests/queries/0_stateless/03630_parquet_bool_bug.sql
@@ -0,0 +1,8 @@
+-- Tags: no-parallel, no-fasttest
+
+insert into function file('03630_parquet_bool_bug.parquet', Parquet, 'tags Array(Bool)') settings engine_file_truncate_on_insert=1 values ([false,false,false,false,false,false,false,false]), ([true,true,true,true,true,true,true,true]);
+select sum(tags) from file('03630_parquet_bool_bug.parquet') array join tags settings input_format_parquet_use_native_reader_v3=1;
+
+-- Try all 256 1-byte masks to verify the bit shifting nonsense in PlainBooleanDecoder.
+insert into function file('03630_parquet_bool_bug.parquet') select number as n, arrayMap(i -> toBool(bitShiftRight(number, i) % 2 = 1), range(8)) as bits from numbers(256) settings engine_file_truncate_on_insert=1;
+select sum(n = arraySum(arrayMap(i -> bitShiftLeft(bits[i+1], i), range(8)))) as ok from file('03630_parquet_bool_bug.parquet') settings input_format_parquet_use_native_reader_v3=1, schema_inference_make_columns_nullable=0;

From 20570cc516be8c6eef1ecbfce4d299ff3a17f278 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 7 Oct 2025 11:11:32 +0000
Subject: [PATCH 002/112] Backport #88105 to 25.8: Fix uncaught exception due
 noexcept tryGetCreateTableQuery()

---
 src/Databases/DataLake/DatabaseDataLake.cpp |  7 ++++---
 src/Databases/IDatabase.h                   | 11 +++++++----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/Databases/DataLake/DatabaseDataLake.cpp b/src/Databases/DataLake/DatabaseDataLake.cpp
index fd41602a3a40..90260079ac7b 100644
--- a/src/Databases/DataLake/DatabaseDataLake.cpp
+++ b/src/Databases/DataLake/DatabaseDataLake.cpp
@@ -630,7 +630,7 @@ ASTPtr DatabaseDataLake::getCreateDatabaseQuery() const
 ASTPtr DatabaseDataLake::getCreateTableQueryImpl(
     const String & name,
     ContextPtr /* context_ */,
-    bool /* throw_on_error */) const
+    bool throw_on_error) const
 {
     auto catalog = getCatalog();
     auto table_metadata = DataLake::TableMetadata().withLocation().withSchema();
@@ -639,8 +639,9 @@ ASTPtr DatabaseDataLake::getCreateTableQueryImpl(
 
     if (!catalog->tryGetTableMetadata(namespace_name, table_name, table_metadata))
     {
-        throw Exception(
-            ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY, "Table `{}` doesn't exist", name);
+        if (throw_on_error)
+            throw Exception(ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY, "Table `{}` doesn't exist", name);
+        return {};
     }
 
     auto create_table_query = std::make_shared<ASTCreateQuery>();
diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h
index 8a9f412c00a0..eb7d5f2c75ce 100644
--- a/src/Databases/IDatabase.h
+++ b/src/Databases/IDatabase.h
@@ -346,15 +346,18 @@ class IDatabase : public std::enable_shared_from_this<IDatabase>
         return static_cast<time_t>(0);
     }
 
-    /// Get the CREATE TABLE query for the table. It can also provide information for detached tables for which there is metadata.
-    ASTPtr tryGetCreateTableQuery(const String & name, ContextPtr context) const noexcept
+    /// Get the CREATE TABLE query for the table.
+    /// It can also provide information for detached tables for which there is metadata.
+    ///
+    /// Does not throw if the table does not exist, but can throw on other errors.
+    ASTPtr tryGetCreateTableQuery(const String & name, ContextPtr context) const
     {
-        return getCreateTableQueryImpl(name, context, false);
+        return getCreateTableQueryImpl(name, context, /*throw_on_error=*/ false);
     }
 
     ASTPtr getCreateTableQuery(const String & name, ContextPtr context) const
     {
-        return getCreateTableQueryImpl(name, context, true);
+        return getCreateTableQueryImpl(name, context, /*throw_on_error=*/ true);
     }
 
     /// Get the CREATE DATABASE query for current database.

From 40cacd97f842172f2293f7577a4973a9abc75735 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 7 Oct 2025 11:50:53 +0000
Subject: [PATCH 003/112] Update autogenerated version to 25.8.9.20 and
 contributors

---
 cmake/autogenerated_versions.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
index c55e3f3097ad..f4b0bffe7db2 100644
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@@ -2,11 +2,11 @@
 
 # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION,
 # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
-SET(VERSION_REVISION 54509)
+SET(VERSION_REVISION 54510)
 SET(VERSION_MAJOR 25)
 SET(VERSION_MINOR 8)
-SET(VERSION_PATCH 9)
-SET(VERSION_GITHASH 8a2475033080b4a8d57b7771f52140af663dd4e0)
-SET(VERSION_DESCRIBE v25.8.9.1-lts)
-SET(VERSION_STRING 25.8.9.1)
+SET(VERSION_PATCH 10)
+SET(VERSION_GITHASH a1f4cd9c23f649b8891e952f973937f40eb9d273)
+SET(VERSION_DESCRIBE v25.8.10.1-lts)
+SET(VERSION_STRING 25.8.10.1)
 # end of autochange

From 0b375d79653ef924c81efcd59cc28b4fd248dca7 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 8 Oct 2025 07:13:22 +0000
Subject: [PATCH 004/112] Backport #86560 to 25.8: Fix stack overflow in
 quantileDD merge

---
 src/AggregateFunctions/DDSketch.h             | 64 ++++++-------
 .../DDSketch/DDSketchEncoding.h               |  8 +-
 src/AggregateFunctions/DDSketch/Mapping.h     | 59 ++++++------
 src/AggregateFunctions/DDSketch/Store.h       | 43 +++++----
 .../tests/gtest_ddsketch.cpp                  | 89 +++++++++++++++++++
 .../03568_ddsketch_merge.reference            |  1 +
 .../0_stateless/03568_ddsketch_merge.sql      |  8 ++
 7 files changed, 192 insertions(+), 80 deletions(-)
 create mode 100644 src/AggregateFunctions/tests/gtest_ddsketch.cpp
 create mode 100644 tests/queries/0_stateless/03568_ddsketch_merge.reference
 create mode 100644 tests/queries/0_stateless/03568_ddsketch_merge.sql

diff --git a/src/AggregateFunctions/DDSketch.h b/src/AggregateFunctions/DDSketch.h
index 16b6318181cc..7c2b042699a3 100644
--- a/src/AggregateFunctions/DDSketch.h
+++ b/src/AggregateFunctions/DDSketch.h
@@ -1,49 +1,49 @@
 #pragma once
 
-#include <memory> // for std::unique_ptr
-#include <cmath>
-#include <stdexcept>
 #include <limits>
-#include <iostream>
+#include <memory> // for std::unique_ptr
 #include <base/types.h>
 
-#include <IO/ReadBuffer.h>
-#include <IO/WriteBuffer.h>
-
+#include <AggregateFunctions/DDSketch/DDSketchEncoding.h>
 #include <AggregateFunctions/DDSketch/Mapping.h>
 #include <AggregateFunctions/DDSketch/Store.h>
-#include <AggregateFunctions/DDSketch/DDSketchEncoding.h>
+#include <IO/ReadBuffer.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteBuffer.h>
+#include <IO/WriteHelpers.h>
+#include <Common/Exception.h>
 
 namespace DB
 {
 
 namespace ErrorCodes
 {
-    extern const int BAD_ARGUMENTS;
-    extern const int INCORRECT_DATA;
+extern const int BAD_ARGUMENTS;
+extern const int INCORRECT_DATA;
 }
 
 class DDSketchDenseLogarithmic
 {
 public:
     explicit DDSketchDenseLogarithmic(Float64 relative_accuracy = 0.01)
-        : mapping(std::make_unique<DDSketchLogarithmicMapping>(relative_accuracy)),
-          store(std::make_unique<DDSketchDenseStore>()),
-          negative_store(std::make_unique<DDSketchDenseStore>()),
-          zero_count(0.0),
-          count(0.0)
+        : mapping(std::make_unique<DDSketchLogarithmicMapping>(relative_accuracy))
+        , store(std::make_unique<DDSketchDenseStore>())
+        , negative_store(std::make_unique<DDSketchDenseStore>())
+        , zero_count(0.0)
+        , count(0.0)
     {
     }
 
-    DDSketchDenseLogarithmic(std::unique_ptr<DDSketchLogarithmicMapping> mapping_,
-             std::unique_ptr<DDSketchDenseStore> store_,
-             std::unique_ptr<DDSketchDenseStore> negative_store_,
-             Float64 zero_count_)
-        : mapping(std::move(mapping_)),
-          store(std::move(store_)),
-          negative_store(std::move(negative_store_)),
-          zero_count(zero_count_),
-          count(store->count + negative_store->count + zero_count_)
+    DDSketchDenseLogarithmic(
+        std::unique_ptr<DDSketchLogarithmicMapping> mapping_,
+        std::unique_ptr<DDSketchDenseStore> store_,
+        std::unique_ptr<DDSketchDenseStore> negative_store_,
+        Float64 zero_count_)
+        : mapping(std::move(mapping_))
+        , store(std::move(store_))
+        , negative_store(std::move(negative_store_))
+        , zero_count(zero_count_)
+        , count(store->count + negative_store->count + zero_count_)
     {
     }
 
@@ -97,7 +97,11 @@ class DDSketchDenseLogarithmic
         return quantile_value;
     }
 
-    void copy(const DDSketchDenseLogarithmic& other)
+    Float64 getGamma() const { return mapping->getGamma(); }
+
+    Float64 getCount() const { return count; }
+
+    void copy(const DDSketchDenseLogarithmic & other)
     {
         Float64 rel_acc = (other.mapping->getGamma() - 1) / (other.mapping->getGamma() + 1);
         mapping = std::make_unique<DDSketchLogarithmicMapping>(rel_acc);
@@ -109,9 +113,9 @@ class DDSketchDenseLogarithmic
         count = other.count;
     }
 
-    void merge(const DDSketchDenseLogarithmic& other)
+    void merge(const DDSketchDenseLogarithmic & other)
     {
-        if (mapping->getGamma() != other.mapping->getGamma())
+        if (*mapping != *other.mapping)
         {
             // modify the one with higher precision to match the one with lower precision
             if (mapping->getGamma() > other.mapping->getGamma())
@@ -147,7 +151,7 @@ class DDSketchDenseLogarithmic
 
     /// NOLINTBEGIN(readability-static-accessed-through-instance)
 
-    void serialize(WriteBuffer& buf) const
+    void serialize(WriteBuffer & buf) const
     {
         // Write the mapping
         writeBinary(enc.FlagIndexMappingBaseLogarithmic.byte, buf);
@@ -165,7 +169,7 @@ class DDSketchDenseLogarithmic
         writeBinary(zero_count, buf);
     }
 
-    void deserialize(ReadBuffer& buf)
+    void deserialize(ReadBuffer & buf)
     {
         // Read the mapping
         UInt8 flag = 0;
@@ -219,7 +223,7 @@ class DDSketchDenseLogarithmic
         auto new_positive_store = std::make_unique<DDSketchDenseStore>();
         auto new_negative_store = std::make_unique<DDSketchDenseStore>();
 
-        auto remap_store = [this, &new_mapping](DDSketchDenseStore& old_store, std::unique_ptr<DDSketchDenseStore>& target_store)
+        auto remap_store = [this, &new_mapping](DDSketchDenseStore & old_store, std::unique_ptr<DDSketchDenseStore> & target_store)
         {
             for (int i = 0; i < old_store.length(); ++i)
             {
diff --git a/src/AggregateFunctions/DDSketch/DDSketchEncoding.h b/src/AggregateFunctions/DDSketch/DDSketchEncoding.h
index 477bc3f54495..64dc8c0e55ed 100644
--- a/src/AggregateFunctions/DDSketch/DDSketchEncoding.h
+++ b/src/AggregateFunctions/DDSketch/DDSketchEncoding.h
@@ -1,7 +1,6 @@
 #pragma once
 
-#include <vector>
-#include <stdexcept>
+#include <base/types.h>
 
 /**
   * An encoded DDSketch comprises multiple contiguous blocks (sequences of bytes).
@@ -36,7 +35,10 @@ class DDSketchEncoding
     {
     public:
         UInt8 byte;
-        Flag(UInt8 t, UInt8 s) : byte(t | s) { }
+        Flag(UInt8 t, UInt8 s)
+            : byte(t | s)
+        {
+        }
         [[maybe_unused]] UInt8 Type() const { return byte & flagTypeMask; }
         [[maybe_unused]] UInt8 SubFlag() const { return byte & subFlagMask; }
     };
diff --git a/src/AggregateFunctions/DDSketch/Mapping.h b/src/AggregateFunctions/DDSketch/Mapping.h
index 0d1ff785d59d..0f4d939f8f56 100644
--- a/src/AggregateFunctions/DDSketch/Mapping.h
+++ b/src/AggregateFunctions/DDSketch/Mapping.h
@@ -1,26 +1,29 @@
 #pragma once
 
-#include <base/types.h>
 #include <cmath>
-#include <stdexcept>
 #include <limits>
+#include <base/types.h>
 
 #include <IO/ReadBuffer.h>
+#include <IO/ReadHelpers.h>
 #include <IO/WriteBuffer.h>
+#include <IO/WriteHelpers.h>
+#include <Common/Exception.h>
 
 namespace DB
 {
 
 namespace ErrorCodes
 {
-    extern const int BAD_ARGUMENTS;
+extern const int BAD_ARGUMENTS;
 }
 
 class DDSketchLogarithmicMapping
 {
 public:
     explicit DDSketchLogarithmicMapping(Float64 relative_accuracy_, Float64 offset_ = 0.0)
-        : relative_accuracy(relative_accuracy_), offset(offset_)
+        : relative_accuracy(relative_accuracy_)
+        , offset(offset_)
     {
         if (relative_accuracy <= 0 || relative_accuracy >= 1)
         {
@@ -44,48 +47,40 @@ class DDSketchLogarithmicMapping
         return static_cast<int>(logGamma(value) + offset);
     }
 
-    Float64 value(int key) const
-    {
-        return lowerBound(key) * (1 + relative_accuracy);
-    }
+    Float64 value(int key) const { return lowerBound(key) * (1 + relative_accuracy); }
 
-    Float64 logGamma(Float64 value) const
-    {
-        return std::log(value) * multiplier;
-    }
+    Float64 logGamma(Float64 value) const { return std::log(value) * multiplier; }
 
-    Float64 powGamma(Float64 value) const
-    {
-        return std::exp(value / multiplier);
-    }
+    Float64 powGamma(Float64 value) const { return std::exp(value / multiplier); }
 
-    Float64 lowerBound(int index) const
-    {
-        return powGamma(static_cast<Float64>(index) - offset);
-    }
+    Float64 lowerBound(int index) const { return powGamma(static_cast<Float64>(index) - offset); }
 
-    Float64 getGamma() const
-    {
-        return gamma;
-    }
+    Float64 getGamma() const { return gamma; }
 
-    Float64 getMinPossible() const
-    {
-        return min_possible;
-    }
+    Float64 getMinPossible() const { return min_possible; }
 
-    [[maybe_unused]] Float64 getMaxPossible() const
+    [[maybe_unused]] Float64 getMaxPossible() const { return max_possible; }
+
+    bool operator==(const DDSketchLogarithmicMapping & other) const
     {
-        return max_possible;
+        if (gamma == other.gamma)
+        {
+            return true;
+        }
+
+        auto [min, max] = std::minmax(gamma, other.gamma);
+        const Float64 difference = max - min;
+        const Float64 acceptable = (std::nextafter(min, max) - min) * min;
+        return difference <= acceptable;
     }
 
-    void serialize(WriteBuffer& buf) const
+    void serialize(WriteBuffer & buf) const
     {
         writeBinary(gamma, buf);
         writeBinary(offset, buf);
     }
 
-    void deserialize(ReadBuffer& buf)
+    void deserialize(ReadBuffer & buf)
     {
         readBinary(gamma, buf);
         readBinary(offset, buf);
diff --git a/src/AggregateFunctions/DDSketch/Store.h b/src/AggregateFunctions/DDSketch/Store.h
index 0e499e445d2a..594746f73e10 100644
--- a/src/AggregateFunctions/DDSketch/Store.h
+++ b/src/AggregateFunctions/DDSketch/Store.h
@@ -1,13 +1,15 @@
 #pragma once
 
-#include <base/types.h>
-#include <vector>
 #include <cmath>
 #include <limits>
+#include <vector>
+#include <base/types.h>
 
+#include <AggregateFunctions/DDSketch/DDSketchEncoding.h>
 #include <IO/ReadBuffer.h>
+#include <IO/ReadHelpers.h>
 #include <IO/WriteBuffer.h>
-#include <AggregateFunctions/DDSketch/DDSketchEncoding.h>
+#include <IO/WriteHelpers.h>
 
 
 // We start with 128 bins and grow the number of bins by 128
@@ -18,6 +20,11 @@ constexpr UInt32 CHUNK_SIZE = 128;
 namespace DB
 {
 
+namespace ErrorCodes
+{
+extern const int INCORRECT_DATA;
+}
+
 class DDSketchDenseStore
 {
 public:
@@ -27,9 +34,12 @@ class DDSketchDenseStore
     int offset = 0;
     std::vector<Float64> bins;
 
-    explicit DDSketchDenseStore(UInt32 chunk_size_ = CHUNK_SIZE) : chunk_size(chunk_size_) {}
+    explicit DDSketchDenseStore(UInt32 chunk_size_ = CHUNK_SIZE)
+        : chunk_size(chunk_size_)
+    {
+    }
 
-    void copy(DDSketchDenseStore* other)
+    void copy(DDSketchDenseStore * other)
     {
         bins = other->bins;
         count = other->count;
@@ -38,10 +48,7 @@ class DDSketchDenseStore
         offset = other->offset;
     }
 
-    int length() const
-    {
-        return static_cast<int>(bins.size());
-    }
+    int length() const { return static_cast<int>(bins.size()); }
 
     void add(int key, Float64 weight)
     {
@@ -64,9 +71,10 @@ class DDSketchDenseStore
         return max_key;
     }
 
-    void merge(DDSketchDenseStore* other)
+    void merge(DDSketchDenseStore * other)
     {
-        if (other->count == 0) return;
+        if (other->count == 0)
+            return;
 
         if (count == 0)
         {
@@ -89,9 +97,8 @@ class DDSketchDenseStore
 
     /// NOLINTBEGIN(readability-static-accessed-through-instance)
 
-    void serialize(WriteBuffer& buf) const
+    void serialize(WriteBuffer & buf) const
     {
-
         // Calculate the size of the dense and sparse encodings to choose the smallest one
         UInt64 num_bins = 0;
         UInt64 num_non_empty_bins = 0;
@@ -144,8 +151,10 @@ class DDSketchDenseStore
         }
     }
 
-    void deserialize(ReadBuffer& buf)
+    void deserialize(ReadBuffer & buf)
     {
+        count = 0;
+
         UInt8 encoding_mode;
         readBinary(encoding_mode, buf);
         if (encoding_mode == enc.BinEncodingContiguousCounts)
@@ -165,7 +174,7 @@ class DDSketchDenseStore
                 start_key += index_delta;
             }
         }
-        else
+        else if (encoding_mode == enc.BinEncodingIndexDeltasAndCounts)
         {
             UInt64 num_non_empty_bins;
             readVarUInt(num_non_empty_bins, buf);
@@ -180,6 +189,10 @@ class DDSketchDenseStore
                 add(previous_index, bin_count);
             }
         }
+        else
+        {
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid flag for encoding mode");
+        }
     }
 
     /// NOLINTEND(readability-static-accessed-through-instance)
diff --git a/src/AggregateFunctions/tests/gtest_ddsketch.cpp b/src/AggregateFunctions/tests/gtest_ddsketch.cpp
new file mode 100644
index 000000000000..69c01d81fcd1
--- /dev/null
+++ b/src/AggregateFunctions/tests/gtest_ddsketch.cpp
@@ -0,0 +1,89 @@
+#include <string>
+#include <vector>
+#include <base/types.h>
+
+#include <AggregateFunctions/DDSketch.h>
+#include <IO/ReadBufferFromMemory.h>
+#include <IO/ReadBufferFromString.h>
+#include <IO/WriteBufferFromVector.h>
+#include <Common/Base64.h>
+
+#include <gtest/gtest.h>
+
+TEST(DDSketch, MergeDifferentGammasWithoutSegfault)
+{
+    using namespace DB;
+
+    DDSketchDenseLogarithmic lhs{};
+    DDSketchDenseLogarithmic rhs{};
+
+    /*
+    {
+      "mapping": {
+        "gamma": 2.0,
+        "index_offset": 0.0,
+        "interpolation": 0
+      },
+      "positive_values": {
+        "bin_counts": {},
+        "contiguous_bin_counts": [
+          1.0
+        ],
+        "contiguous_bin_index_offset": -8
+      },
+      "negative_values": {
+        "bin_counts": {},
+        "contiguous_bin_counts": [],
+        "contiguous_bin_index_offset": 0
+      },
+      "zero_count": 0.0
+    }
+    */
+    std::string lhs_data = base64Decode("AgAAAAAAAABAAAAAAAAAAAABDAEPAgAAAAAAAPA/AwwA/v///w8CBAAAAAAAAAAA");
+    ReadBufferFromString lhs_buffer{lhs_data};
+    lhs.deserialize(lhs_buffer);
+
+    ASSERT_DOUBLE_EQ(lhs.getCount(), 1);
+    ASSERT_DOUBLE_EQ(lhs.getGamma(), 2.0);
+
+    /*
+    {
+      "mapping": {
+        "gamma": 1.4142135623730951,
+        "index_offset": 0.0,
+        "interpolation": 0
+      },
+      "positive_values": {
+        "bin_counts": {},
+        "contiguous_bin_counts": [
+          1.0
+        ],
+        "contiguous_bin_index_offset": -18
+      },
+      "negative_values": {
+        "bin_counts": {},
+        "contiguous_bin_counts": [],
+        "contiguous_bin_index_offset": 0
+      },
+      "zero_count": 0.0
+    }
+    */
+    std::string rhs_data = base64Decode("As07f2aeoPY/AAAAAAAAAAABDAEjAgAAAAAAAPA/AwwA/v///w8CBAAAAAAAAAAA");
+    ReadBufferFromString rhs_buffer{rhs_data};
+    rhs.deserialize(rhs_buffer);
+
+    ASSERT_DOUBLE_EQ(rhs.getCount(), 1);
+    ASSERT_DOUBLE_EQ(rhs.getGamma(), 1.4142135623730951);
+
+    lhs.merge(rhs);
+    std::vector<UInt8> merge_buffer;
+
+    WriteBufferFromVector<std::vector<UInt8>> writer{merge_buffer};
+    lhs.serialize(writer);
+
+    ReadBufferFromMemory reader{merge_buffer.data(), merge_buffer.size()};
+    ASSERT_NO_THROW(rhs.deserialize(reader));
+
+    ASSERT_FLOAT_EQ(rhs.getCount(), 2);
+    ASSERT_DOUBLE_EQ(rhs.getGamma(), 2.0);
+}
diff --git a/tests/queries/0_stateless/03568_ddsketch_merge.reference b/tests/queries/0_stateless/03568_ddsketch_merge.reference
new file mode 100644
index 000000000000..d00491fd7e5b
--- /dev/null
+++ b/tests/queries/0_stateless/03568_ddsketch_merge.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/03568_ddsketch_merge.sql b/tests/queries/0_stateless/03568_ddsketch_merge.sql
new file mode 100644
index 000000000000..ccb81882809e
--- /dev/null
+++ b/tests/queries/0_stateless/03568_ddsketch_merge.sql
@@ -0,0 +1,8 @@
+-- Tags: no-fasttest
+-- Tag no-fasttest: Depends on base64Decode
+
+SELECT quantileDDMerge(0.01)(state) != 0 FROM format(
+    RowBinary,
+    'state AggregateFunction(quantileDD(0.01), Float64)',
+    base64Decode('AgAAAAAAAABAAAAAAAAAAAABDAEPAgAAAAAAAPA/AwwA/v///w8CBAAAAAAAAAAAAs07f2aeoPY/AAAAAAAAAAABDAEjAgAAAAAAAPA/AwwA/v///w8CBAAAAAAAAAAA')
+)

From b25dc060b206964612fbabc6a2a0f7ec9661ce0f Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 8 Oct 2025 16:13:00 +0000
Subject: [PATCH 005/112] Backport #88153 to 25.8: Fix redundant host
 resolution in DDL Worker

---
 src/Interpreters/DDLWorker.cpp | 37 +++++++++++++++-------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index adc8c01a0294..5590029fc66f 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -1139,6 +1139,7 @@ bool DDLWorker::initializeMainThread()
             auto zookeeper = getAndSetZooKeeper();
             zookeeper->createAncestors(fs::path(queue_dir) / "");
             initializeReplication();
+            markReplicasActive(true);
             initialized = true;
             return true;
         }
@@ -1211,14 +1212,6 @@ void DDLWorker::runMainThread()
             }
 
             cleanup_event->set();
-            try
-            {
-                markReplicasActive(reinitialized);
-            }
-            catch (...)
-            {
-                tryLogCurrentException(log, "An error occurred when markReplicasActive: ");
-            }
             scheduleTasks(reinitialized);
             subsequent_errors_count = 0;
 
@@ -1297,24 +1290,21 @@ void DDLWorker::createReplicaDirs(const ZooKeeperPtr & zookeeper, const NameSet
         zookeeper->createAncestors(fs::path(replicas_dir) / host_id / "");
 }
 
-void DDLWorker::markReplicasActive(bool reinitialized)
+void DDLWorker::markReplicasActive(bool /*reinitialized*/)
 {
     auto zookeeper = getZooKeeper();
 
-    if (reinitialized)
+    // Reset all active_node_holders
+    for (auto & it : active_node_holders)
     {
-        // Reset all active_node_holders
-        for (auto & it : active_node_holders)
-        {
-            auto & active_node_holder = it.second.second;
-            if (active_node_holder)
-                active_node_holder->setAlreadyRemoved();
-            active_node_holder.reset();
-        }
-
-        active_node_holders.clear();
+        auto & active_node_holder = it.second.second;
+        if (active_node_holder)
+            active_node_holder->setAlreadyRemoved();
+        active_node_holder.reset();
     }
 
+    active_node_holders.clear();
+
     for (auto it = active_node_holders.begin(); it != active_node_holders.end();)
     {
         auto & zk = it->second.first;
@@ -1394,7 +1384,12 @@ void DDLWorker::markReplicasActive(bool reinitialized)
         {
             zookeeper->deleteEphemeralNodeIfContentMatches(active_path, active_id);
         }
-        zookeeper->create(active_path, active_id, zkutil::CreateMode::Ephemeral);
+        Coordination::Requests ops;
+        ops.emplace_back(zkutil::makeCreateRequest(active_path, active_id, zkutil::CreateMode::Ephemeral));
+        /// To bump node mtime
+        ops.emplace_back(zkutil::makeSetRequest(fs::path(replicas_dir) / host_id, "", -1));
+        zookeeper->multi(ops);
+
         auto active_node_holder_zookeeper = zookeeper;
         auto active_node_holder = zkutil::EphemeralNodeHolder::existing(active_path, *active_node_holder_zookeeper);
         active_node_holders[host_id] = {active_node_holder_zookeeper, active_node_holder};

From b7c8d8b779be943de60dfedf3d9df0fcf8108f0e Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 9 Oct 2025 11:11:09 +0000
Subject: [PATCH 006/112] Backport #88213 to 25.8: Fix propagation of
 `is_shared` flag in `ColumnLowCardinality`

---
 src/Columns/ColumnLowCardinality.cpp            |  6 +++---
 src/Columns/ColumnLowCardinality.h              | 17 +++++++++--------
 src/DataTypes/DataTypeLowCardinality.cpp        |  2 +-
 .../SerializationLowCardinality.cpp             |  2 +-
 src/Functions/IFunction.cpp                     |  4 ++--
 .../Formats/Impl/ArrowColumnToCHColumn.cpp      |  2 +-
 .../Formats/Impl/DWARFBlockInputFormat.cpp      | 13 ++++++++-----
 .../Formats/Impl/NativeORCBlockInputFormat.cpp  |  2 +-
 8 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp
index 1db462151d32..02fe28c0eba6 100644
--- a/src/Columns/ColumnLowCardinality.cpp
+++ b/src/Columns/ColumnLowCardinality.cpp
@@ -350,7 +350,7 @@ MutableColumnPtr ColumnLowCardinality::cloneResized(size_t size) const
     if (size == 0)
         unique_ptr = unique_ptr->cloneEmpty();
 
-    return ColumnLowCardinality::create(IColumn::mutate(std::move(unique_ptr)), getIndexes().cloneResized(size));
+    return ColumnLowCardinality::create(IColumn::mutate(std::move(unique_ptr)), getIndexes().cloneResized(size), /*is_shared=*/false);
 }
 
 MutableColumnPtr ColumnLowCardinality::cloneNullable() const
@@ -584,7 +584,7 @@ std::vector<MutableColumnPtr> ColumnLowCardinality::scatter(ColumnIndex num_colu
     for (auto & column : columns)
     {
         auto unique_ptr = dictionary.getColumnUniquePtr();
-        column = ColumnLowCardinality::create(IColumn::mutate(std::move(unique_ptr)), std::move(column));
+        column = ColumnLowCardinality::create(IColumn::mutate(std::move(unique_ptr)), std::move(column), /*is_shared=*/false);
     }
 
     return columns;
@@ -603,7 +603,7 @@ ColumnLowCardinality::MutablePtr ColumnLowCardinality::cutAndCompact(size_t star
 {
     auto sub_positions = IColumn::mutate(idx.getPositions()->cut(start, length));
     auto new_column_unique = Dictionary::compact(getDictionary(), sub_positions);
-    return ColumnLowCardinality::create(std::move(new_column_unique), std::move(sub_positions));
+    return ColumnLowCardinality::create(std::move(new_column_unique), std::move(sub_positions), /*is_shared=*/false);
 }
 
 void ColumnLowCardinality::compactInplace()
diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h
index 54a1ee52a212..dcce5e27d9fc 100644
--- a/src/Columns/ColumnLowCardinality.h
+++ b/src/Columns/ColumnLowCardinality.h
@@ -28,7 +28,7 @@ class ColumnLowCardinality final : public COWHelper<IColumnHelper<ColumnLowCardi
 {
     friend class COWHelper<IColumnHelper<ColumnLowCardinality>, ColumnLowCardinality>;
 
-    ColumnLowCardinality(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared = false);
+    ColumnLowCardinality(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared);
     ColumnLowCardinality(const ColumnLowCardinality & other) = default;
 
 public:
@@ -36,12 +36,12 @@ class ColumnLowCardinality final : public COWHelper<IColumnHelper<ColumnLowCardi
       * Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
       */
     using Base = COWHelper<IColumnHelper<ColumnLowCardinality>, ColumnLowCardinality>;
-    static Ptr create(const ColumnPtr & column_unique_, const ColumnPtr & indexes_, bool is_shared = false)
+    static Ptr create(const ColumnPtr & column_unique_, const ColumnPtr & indexes_, bool is_shared)
     {
         return ColumnLowCardinality::create(column_unique_->assumeMutable(), indexes_->assumeMutable(), is_shared);
     }
 
-    static MutablePtr create(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared = false)
+    static MutablePtr create(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared)
     {
         return Base::create(std::move(column_unique), std::move(indexes), is_shared);
     }
@@ -75,7 +75,7 @@ class ColumnLowCardinality final : public COWHelper<IColumnHelper<ColumnLowCardi
     bool isNullAt(size_t n) const override { return getDictionary().isNullAt(getIndexes().getUInt(n)); }
     ColumnPtr cut(size_t start, size_t length) const override
     {
-        return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().cut(start, length));
+        return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().cut(start, length), isSharedDictionary());
     }
 
     void insert(const Field & x) override;
@@ -123,7 +123,8 @@ class ColumnLowCardinality final : public COWHelper<IColumnHelper<ColumnLowCardi
 
     ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override
     {
-        return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint));
+        return ColumnLowCardinality::create(
+            dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint), isSharedDictionary());
     }
 
     void expand(const Filter & mask, bool inverted) override
@@ -133,12 +134,12 @@ class ColumnLowCardinality final : public COWHelper<IColumnHelper<ColumnLowCardi
 
     ColumnPtr permute(const Permutation & perm, size_t limit) const override
     {
-        return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().permute(perm, limit));
+        return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().permute(perm, limit), isSharedDictionary());
     }
 
     ColumnPtr index(const IColumn & indexes_, size_t limit) const override
     {
-        return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit));
+        return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit), isSharedDictionary());
     }
 
 #if !defined(DEBUG_OR_SANITIZER_BUILD)
@@ -167,7 +168,7 @@ class ColumnLowCardinality final : public COWHelper<IColumnHelper<ColumnLowCardi
 
     ColumnPtr replicate(const Offsets & offsets) const override
     {
-        return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets));
+        return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets), isSharedDictionary());
     }
 
     std::vector<MutableColumnPtr> scatter(ColumnIndex num_columns, const Selector & selector) const override;
diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp
index 41aefce682db..210750724c1a 100644
--- a/src/DataTypes/DataTypeLowCardinality.cpp
+++ b/src/DataTypes/DataTypeLowCardinality.cpp
@@ -133,7 +133,7 @@ MutableColumnPtr DataTypeLowCardinality::createColumn() const
 {
     MutableColumnPtr indexes = DataTypeUInt8().createColumn();
     MutableColumnPtr dictionary = createColumnUnique(*dictionary_type);
-    return ColumnLowCardinality::create(std::move(dictionary), std::move(indexes));
+    return ColumnLowCardinality::create(std::move(dictionary), std::move(indexes), /*is_shared=*/false);
 }
 
 Field DataTypeLowCardinality::getDefault() const
diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp
index eec367faac1c..c7325aab43a8 100644
--- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp
+++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp
@@ -622,7 +622,7 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams(
             if (column_is_empty)
                 low_cardinality_column.setSharedDictionary(global_dictionary);
 
-            auto local_column = ColumnLowCardinality::create(global_dictionary, std::move(indexes_column));
+            auto local_column = ColumnLowCardinality::create(global_dictionary, std::move(indexes_column), /*is_shared=*/true);
             low_cardinality_column.insertRangeFrom(*local_column, 0, num_rows);
         }
         else
diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp
index 32f6ff57183a..dd4ad34a5366 100644
--- a/src/Functions/IFunction.cpp
+++ b/src/Functions/IFunction.cpp
@@ -406,9 +406,9 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(
             ColumnUniquePtr res_dictionary = std::move(res_mut_dictionary);
 
             if (indexes && !res_is_constant)
-                result = ColumnLowCardinality::create(res_dictionary, res_indexes->index(*indexes, 0));
+                result = ColumnLowCardinality::create(res_dictionary, res_indexes->index(*indexes, 0), /*is_shared=*/false);
             else
-                result = ColumnLowCardinality::create(res_dictionary, res_indexes);
+                result = ColumnLowCardinality::create(res_dictionary, res_indexes, /*is_shared=*/false);
 
             if (res_is_constant)
                 result = ColumnConst::create(std::move(result), input_rows_count);
diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index 3ed775e5d5c1..4dae97e0d58b 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -1314,7 +1314,7 @@ static ColumnWithTypeAndName readNonNullableColumnFromArrowColumn(
 
             auto arrow_indexes_column = std::make_shared<arrow::ChunkedArray>(indexes_array);
             auto indexes_column = readColumnWithIndexesData(arrow_indexes_column, dict_info.default_value_index, dict_info.dictionary_size, is_lc_nullable);
-            auto lc_column = ColumnLowCardinality::create(dict_info.values->column, indexes_column);
+            auto lc_column = ColumnLowCardinality::create(dict_info.values->column, indexes_column, /*is_shared=*/true);
             auto lc_type = std::make_shared<DataTypeLowCardinality>(is_lc_nullable ? makeNullable(dict_info.values->type) : dict_info.values->type);
             return {std::move(lc_column), std::move(lc_type), column_name};
         }
diff --git a/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp b/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp
index 2d599dd22dd8..c82984068b80 100644
--- a/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp
@@ -396,7 +396,10 @@ Chunk DWARFBlockInputFormat::parseEntries(UnitState & unit)
     auto col_attr_name = ColumnVector<UInt16>::create();
     auto col_attr_form = ColumnVector<UInt16>::create();
     auto col_attr_int = ColumnVector<UInt64>::create();
-    auto col_attr_str = ColumnLowCardinality::create(MutableColumnPtr(ColumnUnique<ColumnString>::create(ColumnString::create()->cloneResized(1), /*is_nullable*/ false)), MutableColumnPtr(ColumnVector<UInt16>::create()));
+    auto col_attr_str = ColumnLowCardinality::create(
+        MutableColumnPtr(ColumnUnique<ColumnString>::create(ColumnString::create()->cloneResized(1), /*is_nullable*/ false)),
+        MutableColumnPtr(ColumnVector<UInt16>::create()),
+        /*is_shared=*/false);
     auto col_attr_offsets = ColumnVector<UInt64>::create();
     size_t num_rows = 0;
     auto err = llvm::Error::success();
@@ -749,8 +752,8 @@ Chunk DWARFBlockInputFormat::parseEntries(UnitState & unit)
                 auto index = ColumnVector<UInt8>::create();
                 index->insert(1);
                 auto indices = index->replicate({num_rows});
-                cols.push_back(ColumnLowCardinality::create(ColumnUnique<ColumnString>::create(
-                    std::move(dict), /*is_nullable*/ false), indices));
+                cols.push_back(ColumnLowCardinality::create(
+                    ColumnUnique<ColumnString>::create(std::move(dict), /*is_nullable*/ false), indices, /*is_shared*/ false));
                 break;
             }
             case COL_UNIT_OFFSET:
@@ -761,8 +764,8 @@ Chunk DWARFBlockInputFormat::parseEntries(UnitState & unit)
                 auto index = ColumnVector<UInt8>::create();
                 index->insert(1);
                 auto indices = index->replicate({num_rows});
-                cols.push_back(ColumnLowCardinality::create(ColumnUnique<ColumnVector<UInt64>>::create(
-                    std::move(dict), /*is_nullable*/ false), indices));
+                cols.push_back(ColumnLowCardinality::create(
+                    ColumnUnique<ColumnVector<UInt64>>::create(std::move(dict), /*is_nullable*/ false), indices, /*is_shared*/ false));
                 break;
             }
             case COL_ANCESTOR_TAGS:
diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
index 1ba10e3d44a0..17e4a9426170 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@@ -1422,7 +1422,7 @@ static ColumnWithTypeAndName readColumnWithEncodedStringOrFixedStringData(
             }
         }
 
-        return ColumnLowCardinality::create(std::move(dictionary_column), std::move(new_index_column));
+        return ColumnLowCardinality::create(std::move(dictionary_column), std::move(new_index_column), /*is_shared=*/false);
     };
 
     MutableColumnPtr internal_column;

From b22201a5b1a16ed05594b5227f55d55a014b7e3c Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 9 Oct 2025 11:50:52 +0000
Subject: [PATCH 007/112] Update autogenerated version to 25.8.10.7 and
 contributors

---
 cmake/autogenerated_versions.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
index f4b0bffe7db2..de536b119436 100644
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@@ -2,11 +2,11 @@
 
 # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION,
 # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
-SET(VERSION_REVISION 54510)
+SET(VERSION_REVISION 54511)
 SET(VERSION_MAJOR 25)
 SET(VERSION_MINOR 8)
-SET(VERSION_PATCH 10)
-SET(VERSION_GITHASH a1f4cd9c23f649b8891e952f973937f40eb9d273)
-SET(VERSION_DESCRIBE v25.8.10.1-lts)
-SET(VERSION_STRING 25.8.10.1)
+SET(VERSION_PATCH 11)
+SET(VERSION_GITHASH 02ec3a1ea1e08fb18d2d9638f00b2b557fa1cc1c)
+SET(VERSION_DESCRIBE v25.8.11.1-lts)
+SET(VERSION_STRING 25.8.11.1)
 # end of autochange

From 0f2a8450374556a4c1db67454aa898ed5fb678ec Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 9 Oct 2025 14:12:54 +0000
Subject: [PATCH 008/112] Backport #88278 to 25.8: Update Azure sdk with
 Content-Length fix

---
 contrib/azure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/azure b/contrib/azure
index 9e62bd3c7645..0f7a2013f7d7 160000
--- a/contrib/azure
+++ b/contrib/azure
@@ -1 +1 @@
-Subproject commit 9e62bd3c7645fbf276d37bcf99d9b90230d8efc9
+Subproject commit 0f7a2013f7d79058047fc4bd35e94d20578c0d2b

From f796fe7b2afa77717852613c45450f65af068ccc Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 10 Oct 2025 10:13:12 +0000
Subject: [PATCH 009/112] Backport #88290 to 25.8: Fix object storages with
 correlated subqueries

---
 .../QueryPlan/ReadFromObjectStorageStep.cpp      |  5 +++++
 .../QueryPlan/ReadFromObjectStorageStep.h        |  1 +
 ...bject_storage_correlated_subqueries.reference |  9 +++++++++
 ...3644_object_storage_correlated_subqueries.sql | 16 ++++++++++++++++
 4 files changed, 31 insertions(+)
 create mode 100644 tests/queries/0_stateless/03644_object_storage_correlated_subqueries.reference
 create mode 100644 tests/queries/0_stateless/03644_object_storage_correlated_subqueries.sql

diff --git a/src/Processors/QueryPlan/ReadFromObjectStorageStep.cpp b/src/Processors/QueryPlan/ReadFromObjectStorageStep.cpp
index 49270683dfd9..bb37a5fea25a 100644
--- a/src/Processors/QueryPlan/ReadFromObjectStorageStep.cpp
+++ b/src/Processors/QueryPlan/ReadFromObjectStorageStep.cpp
@@ -53,6 +53,11 @@ ReadFromObjectStorageStep::ReadFromObjectStorageStep(
 {
 }
 
+QueryPlanStepPtr ReadFromObjectStorageStep::clone() const
+{
+    return std::make_unique<ReadFromObjectStorageStep>(*this);
+}
+
 void ReadFromObjectStorageStep::applyFilters(ActionDAGNodes added_filter_nodes)
 {
     SourceStepWithFilter::applyFilters(std::move(added_filter_nodes));
diff --git a/src/Processors/QueryPlan/ReadFromObjectStorageStep.h b/src/Processors/QueryPlan/ReadFromObjectStorageStep.h
index c6adc4a961ef..b0e5397ad688 100644
--- a/src/Processors/QueryPlan/ReadFromObjectStorageStep.h
+++ b/src/Processors/QueryPlan/ReadFromObjectStorageStep.h
@@ -33,6 +33,7 @@ class ReadFromObjectStorageStep : public SourceStepWithFilter
     void updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value) override;
 
     void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
+    QueryPlanStepPtr clone() const override;
 
 private:
     ObjectStoragePtr object_storage;
diff --git a/tests/queries/0_stateless/03644_object_storage_correlated_subqueries.reference b/tests/queries/0_stateless/03644_object_storage_correlated_subqueries.reference
new file mode 100644
index 000000000000..07193989308c
--- /dev/null
+++ b/tests/queries/0_stateless/03644_object_storage_correlated_subqueries.reference
@@ -0,0 +1,9 @@
+1
+2
+3
+4
+5
+6
+7
+8
+9
diff --git a/tests/queries/0_stateless/03644_object_storage_correlated_subqueries.sql b/tests/queries/0_stateless/03644_object_storage_correlated_subqueries.sql
new file mode 100644
index 000000000000..6ac7e423d185
--- /dev/null
+++ b/tests/queries/0_stateless/03644_object_storage_correlated_subqueries.sql
@@ -0,0 +1,16 @@
+-- Tags: no-fasttest
+-- Tag no-fasttest: needs s3
+
+-- Use correlated subqueries which are supported only by the new analyzer.
+set enable_analyzer = 1;
+
+INSERT INTO TABLE FUNCTION s3('http://localhost:11111/test/test-data-03644_object_storage.csv', 'test', 'testtest', 'CSV', 'number UInt64') SELECT number FROM numbers(10) SETTINGS s3_truncate_on_insert = 1;
+
+SELECT n1.c1
+FROM s3('http://localhost:11111/test/test-data-03644_object_storage.csv', 'test', 'testtest') AS n1
+WHERE n1.c1 > (
+    SELECT AVG(n2.c1)
+    FROM s3('http://localhost:11111/test/test-data-03644_object_storage.csv', 'test', 'testtest') AS n2
+    WHERE n2.c1 < n1.c1
+)
+SETTINGS allow_experimental_correlated_subqueries = 1;

From 3a7a768f653a2e893d1b39748a1234d30cccac9e Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 10 Oct 2025 16:13:30 +0000
Subject: [PATCH 010/112] Backport #88330 to 25.8: Add missing checks for
 canContainMergeTreeTables() into system tables

---
 src/Databases/DataLake/DatabaseDataLake.cpp              | 3 +--
 src/Storages/System/StorageSystemDataSkippingIndices.cpp | 2 ++
 src/Storages/System/StorageSystemProjections.cpp         | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/Databases/DataLake/DatabaseDataLake.cpp b/src/Databases/DataLake/DatabaseDataLake.cpp
index 90260079ac7b..15ca4b9dd3e7 100644
--- a/src/Databases/DataLake/DatabaseDataLake.cpp
+++ b/src/Databases/DataLake/DatabaseDataLake.cpp
@@ -500,7 +500,6 @@ DatabaseTablesIteratorPtr DatabaseDataLake::getTablesIterator(
                     }
                     catch (...)
                     {
-                        tryLogCurrentException(log, fmt::format("Ignoring table {}", table_name));
                         promise->set_exception(std::current_exception());
                     }
                 });
@@ -587,7 +586,7 @@ DatabaseTablesIteratorPtr DatabaseDataLake::getLightweightTablesIterator(
                     }
                     catch (...)
                     {
-                        tryLogCurrentException(log, fmt::format("ignoring table {}", table_name));
+                        tryLogCurrentException(log, fmt::format("Ignoring table {}", table_name));
                     }
                     promise->set_value(storage);
                 });
diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp
index 32ca0781c32f..fa70a42cce0d 100644
--- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp
+++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp
@@ -266,6 +266,8 @@ void ReadFromSystemDataSkippingIndices::initializePipeline(QueryPipelineBuilder
     {
         if (database_name == DatabaseCatalog::TEMPORARY_DATABASE)
             continue;
+        if (!database->canContainMergeTreeTables())
+            continue;
 
         /// Lazy database can contain only very primitive tables,
         /// it cannot contain tables with data skipping indices.
diff --git a/src/Storages/System/StorageSystemProjections.cpp b/src/Storages/System/StorageSystemProjections.cpp
index f3352b510748..e88763f2bf3f 100644
--- a/src/Storages/System/StorageSystemProjections.cpp
+++ b/src/Storages/System/StorageSystemProjections.cpp
@@ -253,6 +253,8 @@ void ReadFromSystemProjections::initializePipeline(QueryPipelineBuilder & pipeli
     {
         if (database_name == DatabaseCatalog::TEMPORARY_DATABASE)
             continue;
+        if (!database->canContainMergeTreeTables())
+            continue;
 
         /// Lazy database can contain only very primitive tables, it cannot contain tables with projections.
         /// Skip it to avoid unnecessary tables loading in the Lazy database.

From 5fbf50b47d6e3e7c4e3d025cebdfeb434ceece9e Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Sun, 12 Oct 2025 09:11:07 +0000
Subject: [PATCH 011/112] Backport #87826 to 25.8: Fix AzureBlobStorage copy

---
 src/Backups/BackupIO_AzureBlobStorage.cpp     |  68 +----
 .../AzureBlobStorageCommon.cpp                | 257 +++++++++---------
 .../AzureBlobStorage/AzureBlobStorageCommon.h |   4 +-
 .../AzureBlobStorage/AzureObjectStorage.cpp   |  32 ++-
 .../copyAzureBlobStorageFile.cpp              |  96 ++++---
 .../copyAzureBlobStorageFile.h                |   2 +-
 .../test.py                                   |  71 +++--
 7 files changed, 268 insertions(+), 262 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index cdc6b194f84a..7d95ba062740 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -22,70 +22,12 @@ namespace fs = std::filesystem;
 
 namespace DB
 {
+
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
 }
 
-/// This function compares the authorization methods used to access AzureBlobStorage
-/// It takes 2 variables of variant type as input and checks if they are the same type and value
-static bool compareAuthMethod (AzureBlobStorage::AuthMethod auth_method_a, AzureBlobStorage::AuthMethod auth_method_b)
-{
-    const auto * conn_string_a = std::get_if<AzureBlobStorage::ConnectionString>(&auth_method_a);
-    const auto * conn_string_b = std::get_if<AzureBlobStorage::ConnectionString>(&auth_method_b);
-
-    if (conn_string_a && conn_string_b)
-    {
-        return *conn_string_a == *conn_string_b;
-    }
-
-    const auto * shared_key_a = std::get_if<std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>>(&auth_method_a);
-    const auto * shared_key_b = std::get_if<std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>>(&auth_method_b);
-
-    if (shared_key_a && shared_key_b)
-    {
-        return (shared_key_a->get()->AccountName == shared_key_b->get()->AccountName);
-    }
-
-    try
-    {
-        const auto * workload_identity_a = std::get_if<std::shared_ptr<Azure::Identity::WorkloadIdentityCredential>>(&auth_method_a);
-        const auto * workload_identity_b = std::get_if<std::shared_ptr<Azure::Identity::WorkloadIdentityCredential>>(&auth_method_b);
-
-        if (workload_identity_a && workload_identity_b)
-        {
-            Azure::Core::Credentials::TokenRequestContext tokenRequestContext;
-            return workload_identity_a->get()->GetToken(tokenRequestContext, {}).Token == workload_identity_b->get()->GetToken(tokenRequestContext, {}).Token;
-        }
-
-        const auto * managed_identity_a = std::get_if<std::shared_ptr<Azure::Identity::ManagedIdentityCredential>>(&auth_method_a);
-        const auto * managed_identity_b = std::get_if<std::shared_ptr<Azure::Identity::ManagedIdentityCredential>>(&auth_method_b);
-
-        if (managed_identity_a && managed_identity_b)
-        {
-            Azure::Core::Credentials::TokenRequestContext tokenRequestContext;
-            return managed_identity_a->get()->GetToken(tokenRequestContext, {}).Token == managed_identity_b->get()->GetToken(tokenRequestContext, {}).Token;
-        }
-
-        const auto * static_credential_a = std::get_if<std::shared_ptr<AzureBlobStorage::StaticCredential>>(&auth_method_a);
-        const auto * static_credential_b = std::get_if<std::shared_ptr<AzureBlobStorage::StaticCredential>>(&auth_method_b);
-
-        if (static_credential_a && static_credential_b)
-        {
-            Azure::Core::Credentials::TokenRequestContext tokenRequestContext;
-            auto az_context = Azure::Core::Context();
-            return static_credential_a->get()->GetToken(tokenRequestContext, az_context).Token == static_credential_b->get()->GetToken(tokenRequestContext, az_context).Token;
-        }
-    }
-    catch (const Azure::Core::Credentials::AuthenticationException & e)
-    {
-        /// This is added to catch exception from GetToken. We want to log & fail silently i.e return false so that we can fallback to read & copy (i.e not native copy)
-        LOG_DEBUG(getLogger("compareAuthMethod"), "Exception caught while comparing credentials, error = {}", e.what());
-        return false;
-    }
-    return false;
-}
-
 BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
     const AzureBlobStorage::ConnectionParams & connection_params_,
     const String & blob_path_,
@@ -166,7 +108,7 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
                 /* dest_path */ dst_blob_path[0],
                 settings,
                 read_settings,
-                compareAuthMethod(connection_params.auth_method, destination_disk->getObjectStorage()->getAzureBlobStorageAuthMethod()),
+                std::optional<ObjectAttributes>(),
                 threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupRDAzure"));
 
             return file_size;
@@ -233,7 +175,7 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(
         /// In this case we can't use the native copy.
         if (auto src_blob_path = src_disk->getBlobPath(src_path); src_blob_path.size() == 2)
         {
-            LOG_TRACE(log, "Copying file {} from disk {} to AzureBlobStorag", src_path, src_disk->getName());
+            LOG_TRACE(log, "Copying file {} from disk {} to AzureBlobStorage", src_path, src_disk->getName());
             copyAzureBlobStorageFile(
                 src_disk->getObjectStorage()->getAzureBlobStorageClient(),
                 client,
@@ -245,7 +187,7 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(
                 fs::path(blob_path) / path_in_backup,
                 settings,
                 read_settings,
-                compareAuthMethod(src_disk->getObjectStorage()->getAzureBlobStorageAuthMethod(), connection_params.auth_method),
+                std::optional<ObjectAttributes>(),
                 threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
             return; /// copied!
         }
@@ -269,7 +211,7 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St
        /* dest_path */ destination,
        settings,
        read_settings,
-       true,
+       std::optional<ObjectAttributes>(),
        threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
 }
 
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp
index 8be2e4d7e141..8d3b3696be12 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp
@@ -93,8 +93,6 @@ namespace ErrorCodes
 namespace AzureBlobStorage
 {
 
-#if USE_AZURE_BLOB_STORAGE
-
 static void validateStorageAccountUrl(const String & storage_account_url)
 {
     const auto * storage_account_url_pattern_str = R"(http(()|s)://[a-z0-9-.:]+(()|/)[a-z0-9]*(()|/))";
@@ -121,6 +119,8 @@ static void validateContainerName(const String & container_name)
                         container_name_pattern_str, container_name);
 }
 
+#if USE_AZURE_BLOB_STORAGE
+
 static bool isConnectionString(const std::string & candidate)
 {
     return !candidate.starts_with("http");
@@ -223,113 +223,6 @@ std::unique_ptr<ContainerClient> ConnectionParams::createForContainer() const
     }, auth_method);
 }
 
-Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
-{
-    String storage_url;
-    String account_name;
-    String container_name;
-    String prefix;
-
-    auto get_container_name = [&]
-    {
-        if (config.has(config_prefix + ".container_name"))
-            return config.getString(config_prefix + ".container_name");
-
-        if (config.has(config_prefix + ".container"))
-            return config.getString(config_prefix + ".container");
-
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `container` or `container_name` parameter in config");
-    };
-
-    if (config.has(config_prefix + ".endpoint"))
-    {
-        String endpoint = config.getString(config_prefix + ".endpoint");
-
-        /// For some authentication methods account name is not present in the endpoint
-        /// 'endpoint_contains_account_name' bool is used to understand how to split the endpoint (default : true)
-        bool endpoint_contains_account_name = config.getBool(config_prefix + ".endpoint_contains_account_name", true);
-
-        size_t pos = endpoint.find("//");
-        if (pos == std::string::npos)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected '//' in endpoint");
-
-        if (endpoint_contains_account_name)
-        {
-            size_t acc_pos_begin = endpoint.find('/', pos + 2);
-            if (acc_pos_begin == std::string::npos)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected account_name in endpoint");
-
-            storage_url = endpoint.substr(0, acc_pos_begin);
-            size_t acc_pos_end = endpoint.find('/', acc_pos_begin + 1);
-
-            if (acc_pos_end == std::string::npos)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint");
-
-            account_name = endpoint.substr(acc_pos_begin + 1, acc_pos_end - acc_pos_begin - 1);
-
-            size_t cont_pos_end = endpoint.find('/', acc_pos_end + 1);
-
-            if (cont_pos_end != std::string::npos)
-            {
-                container_name = endpoint.substr(acc_pos_end + 1, cont_pos_end - acc_pos_end - 1);
-                prefix = endpoint.substr(cont_pos_end + 1);
-            }
-            else
-            {
-                container_name = endpoint.substr(acc_pos_end + 1);
-            }
-        }
-        else
-        {
-            size_t cont_pos_begin = endpoint.find('/', pos + 2);
-
-            if (cont_pos_begin == std::string::npos)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint");
-
-            storage_url = endpoint.substr(0, cont_pos_begin);
-            size_t cont_pos_end = endpoint.find('/', cont_pos_begin + 1);
-
-            if (cont_pos_end != std::string::npos)
-            {
-                container_name = endpoint.substr(cont_pos_begin + 1,cont_pos_end - cont_pos_begin - 1);
-                prefix = endpoint.substr(cont_pos_end + 1);
-            }
-            else
-            {
-                container_name = endpoint.substr(cont_pos_begin + 1);
-            }
-        }
-
-        if (config.has(config_prefix + ".endpoint_subpath"))
-        {
-            String endpoint_subpath = config.getString(config_prefix + ".endpoint_subpath");
-            prefix = fs::path(prefix) / endpoint_subpath;
-        }
-    }
-    else if (config.has(config_prefix + ".connection_string"))
-    {
-        storage_url = config.getString(config_prefix + ".connection_string");
-        container_name = get_container_name();
-    }
-    else if (config.has(config_prefix + ".storage_account_url"))
-    {
-        storage_url = config.getString(config_prefix + ".storage_account_url");
-        validateStorageAccountUrl(storage_url);
-        container_name = get_container_name();
-    }
-    else
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `storage_account_url` or `connection_string` or `endpoint` in config");
-
-    if (!container_name.empty())
-        validateContainerName(container_name);
-
-    std::optional<bool> container_already_exists {};
-    if (config.has(config_prefix + ".container_already_exists"))
-        container_already_exists = {config.getBool(config_prefix + ".container_already_exists")};
-
-    return {storage_url, account_name, container_name, prefix, "", container_already_exists};
-}
-
 void processURL(const String & url, const String & container_name, Endpoint & endpoint, AuthMethod & auth_method)
 {
     endpoint.container_name = container_name;
@@ -505,6 +398,113 @@ BlobClientOptions getClientOptions(
 
 #endif
 
+Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
+{
+    String storage_url;
+    String account_name;
+    String container_name;
+    String prefix;
+
+    auto get_container_name = [&]
+    {
+        if (config.has(config_prefix + ".container_name"))
+            return config.getString(config_prefix + ".container_name");
+
+        if (config.has(config_prefix + ".container"))
+            return config.getString(config_prefix + ".container");
+
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "Expected either `container` or `container_name` parameter in config");
+    };
+
+    if (config.has(config_prefix + ".endpoint"))
+    {
+        String endpoint = config.getString(config_prefix + ".endpoint");
+
+        /// For some authentication methods account name is not present in the endpoint
+        /// 'endpoint_contains_account_name' bool is used to understand how to split the endpoint (default : true)
+        bool endpoint_contains_account_name = config.getBool(config_prefix + ".endpoint_contains_account_name", true);
+
+        size_t pos = endpoint.find("//");
+        if (pos == std::string::npos)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected '//' in endpoint");
+
+        if (endpoint_contains_account_name)
+        {
+            size_t acc_pos_begin = endpoint.find('/', pos + 2);
+            if (acc_pos_begin == std::string::npos)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected account_name in endpoint");
+
+            storage_url = endpoint.substr(0, acc_pos_begin);
+            size_t acc_pos_end = endpoint.find('/', acc_pos_begin + 1);
+
+            if (acc_pos_end == std::string::npos)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint");
+
+            account_name = endpoint.substr(acc_pos_begin + 1, acc_pos_end - acc_pos_begin - 1);
+
+            size_t cont_pos_end = endpoint.find('/', acc_pos_end + 1);
+
+            if (cont_pos_end != std::string::npos)
+            {
+                container_name = endpoint.substr(acc_pos_end + 1, cont_pos_end - acc_pos_end - 1);
+                prefix = endpoint.substr(cont_pos_end + 1);
+            }
+            else
+            {
+                container_name = endpoint.substr(acc_pos_end + 1);
+            }
+        }
+        else
+        {
+            size_t cont_pos_begin = endpoint.find('/', pos + 2);
+
+            if (cont_pos_begin == std::string::npos)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint");
+
+            storage_url = endpoint.substr(0, cont_pos_begin);
+            size_t cont_pos_end = endpoint.find('/', cont_pos_begin + 1);
+
+            if (cont_pos_end != std::string::npos)
+            {
+                container_name = endpoint.substr(cont_pos_begin + 1,cont_pos_end - cont_pos_begin - 1);
+                prefix = endpoint.substr(cont_pos_end + 1);
+            }
+            else
+            {
+                container_name = endpoint.substr(cont_pos_begin + 1);
+            }
+        }
+        if (config.has(config_prefix + ".endpoint_subpath"))
+        {
+            String endpoint_subpath = config.getString(config_prefix + ".endpoint_subpath");
+            prefix = fs::path(prefix) / endpoint_subpath;
+        }
+    }
+    else if (config.has(config_prefix + ".connection_string"))
+    {
+        storage_url = config.getString(config_prefix + ".connection_string");
+        container_name = get_container_name();
+    }
+    else if (config.has(config_prefix + ".storage_account_url"))
+    {
+        storage_url = config.getString(config_prefix + ".storage_account_url");
+        validateStorageAccountUrl(storage_url);
+        container_name = get_container_name();
+    }
+    else
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `storage_account_url` or `connection_string` or `endpoint` in config");
+
+    if (!container_name.empty())
+        validateContainerName(container_name);
+
+    std::optional<bool> container_already_exists {};
+    if (config.has(config_prefix + ".container_already_exists"))
+        container_already_exists = {config.getBool(config_prefix + ".container_already_exists")};
+
+    return {storage_url, account_name, container_name, prefix, "", container_already_exists};
+}
+
 std::unique_ptr<RequestSettings> getRequestSettings(const Settings & query_settings)
 {
     auto settings = std::make_unique<RequestSettings>();
@@ -608,31 +608,41 @@ void AzureSettingsByEndpoint::loadFromConfig(
 
     for (const String & key : config_keys)
     {
-        const auto key_path = config_prefix + "." + key;
-        String endpoint_path = key_path + ".connection_string";
-
-        if (!config.has(endpoint_path))
+        if (config.has(config_prefix + "." + key + ".object_storage_type"))
         {
-            endpoint_path = key_path + ".storage_account_url";
+            const auto &object_storage_type = config.getString(config_prefix + "." + key + ".object_storage_type");
+            if (object_storage_type != "azure" && object_storage_type != "azure_blob_storage")
+            {
+                /// Then its not an azure config
+                continue;
+            }
+
+            const auto key_path = config_prefix + "." + key;
+            String endpoint_path = key_path + ".connection_string";
 
             if (!config.has(endpoint_path))
             {
-                endpoint_path = key_path + ".endpoint";
+                endpoint_path = key_path + ".storage_account_url";
 
                 if (!config.has(endpoint_path))
                 {
-                    /// Error, shouldn't hit this todo:: throw error
-                    continue;
+                    endpoint_path = key_path + ".endpoint";
+
+                    if (!config.has(endpoint_path))
+                    {
+                        throw Exception(ErrorCodes::LOGICAL_ERROR, "URL not provided for azure blob storage disk {}",
+                                        object_storage_type);
+                    }
                 }
             }
-        }
 
-        auto request_settings = AzureBlobStorage::getRequestSettings(config, key_path, settings);
-
-        azure_settings.emplace(
-                config.getString(endpoint_path),
-                std::move(*request_settings));
+            auto endpoint = AzureBlobStorage::processEndpoint(config, key_path);
+            auto request_settings = AzureBlobStorage::getRequestSettings(config, key_path, settings);
 
+            azure_settings.emplace(
+                    endpoint.storage_account_url,
+                    std::move(*request_settings));
+        }
     }
 }
 
@@ -654,4 +664,5 @@ std::optional<AzureBlobStorage::RequestSettings> AzureSettingsByEndpoint::getSet
     return {};
 }
 
+
 }
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h
index 5980255ac484..e0bd961b2f6f 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h
@@ -149,8 +149,6 @@ struct ConnectionParams
     std::unique_ptr<ContainerClient> createForContainer() const;
 };
 
-
-Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix);
 void processURL(const String & url, const String & container_name, Endpoint & endpoint, AuthMethod & auth_method);
 
 std::unique_ptr<ContainerClient> getContainerClient(const ConnectionParams & params, bool readonly);
@@ -165,6 +163,8 @@ AuthMethod getAuthMethod(const Poco::Util::AbstractConfiguration & config, const
 
 #endif
 
+Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix);
+
 std::unique_ptr<RequestSettings> getRequestSettings(const Settings & query_settings);
 std::unique_ptr<RequestSettings> getRequestSettingsForBackup(ContextPtr context, String endpoint, bool use_native_copy);
 std::unique_ptr<RequestSettings> getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings_ref);
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
index 4fe9c5c43117..0f4d533c2a1c 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@@ -327,26 +327,34 @@ ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) c
 void AzureObjectStorage::copyObject( /// NOLINT
     const StoredObject & object_from,
     const StoredObject & object_to,
-    const ReadSettings &,
+    const ReadSettings & read_settings,
     const WriteSettings &,
     std::optional<ObjectAttributes> object_to_attributes)
 {
+    auto settings_ptr = settings.get();
     auto client_ptr = client.get();
-    auto dest_blob_client = client_ptr->GetBlobClient(object_to.remote_path);
-    auto source_blob_client = client_ptr->GetBlobClient(object_from.remote_path);
-
-    Azure::Storage::Blobs::CopyBlobFromUriOptions copy_options;
-    if (object_to_attributes.has_value())
-    {
-        for (const auto & [key, value] : *object_to_attributes)
-            copy_options.Metadata[key] = value;
-    }
+    auto object_metadata = getObjectMetadata(object_from.remote_path);
 
     ProfileEvents::increment(ProfileEvents::AzureCopyObject);
     if (client_ptr->IsClientForDisk())
         ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject);
-
-    dest_blob_client.CopyFromUri(source_blob_client.GetUrl(), copy_options);
+    LOG_TRACE(log, "AzureObjectStorage::copyObject of size {}", object_metadata.size_bytes);
+
+    auto scheduler = threadPoolCallbackRunnerUnsafe<void>(getThreadPoolWriter(), "AzureObjCopy");
+
+    copyAzureBlobStorageFile(
+        client_ptr,
+        client_ptr,
+        connection_params.getContainer(),
+        object_from.remote_path,
+        0,
+        object_metadata.size_bytes,
+        connection_params.getContainer(),
+        object_to.remote_path,
+        settings_ptr,
+        read_settings,
+        object_to_attributes,
+        scheduler);
 }
 
 void AzureObjectStorage::applyNewSettings(
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index b49e34dd2d3a..7e584ab59e14 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -353,60 +353,92 @@ void copyAzureBlobStorageFile(
     const String & dest_blob,
     std::shared_ptr<const AzureBlobStorage::RequestSettings> settings,
     const ReadSettings & read_settings,
-    bool same_credentials,
+    const std::optional<ObjectAttributes> & object_to_attributes,
     ThreadPoolCallbackRunnerUnsafe<void> schedule)
 {
     auto log = getLogger("copyAzureBlobStorageFile");
+    bool is_native_copy_done = false;
 
-    if (settings->use_native_copy && same_credentials)
+    if (settings->use_native_copy)
     {
+        /// Do native copy
         LOG_TRACE(log, "Copying Blob: {} from Container: {} using native copy", src_blob, src_container_for_logging);
         ProfileEvents::increment(ProfileEvents::AzureCopyObject);
         if (dest_client->IsClientForDisk())
             ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject);
 
-        auto block_blob_client_src = src_client->GetBlockBlobClient(src_blob);
-        auto block_blob_client_dest = dest_client->GetBlockBlobClient(dest_blob);
+        try
+        {
+            auto block_blob_client_src = src_client->GetBlockBlobClient(src_blob);
+            auto block_blob_client_dest = dest_client->GetBlockBlobClient(dest_blob);
 
-        auto source_uri = block_blob_client_src.GetUrl();
+            auto source_uri = block_blob_client_src.GetUrl();
 
-        if (size < settings->max_single_part_copy_size)
-        {
-            LOG_TRACE(log, "Copy blob sync {} -> {}", src_blob, dest_blob);
-            block_blob_client_dest.CopyFromUri(source_uri);
-        }
-        else
-        {
-            Azure::Storage::Blobs::StartBlobCopyOperation operation = block_blob_client_dest.StartCopyFromUri(source_uri);
+            if (size < settings->max_single_part_copy_size)
+            {
+                Azure::Storage::Blobs::CopyBlobFromUriOptions copy_options;
+                if (object_to_attributes.has_value())
+                {
+                    for (const auto & [key, value] : *object_to_attributes)
+                        copy_options.Metadata[key] = value;
+                }
 
-            auto copy_response = operation.PollUntilDone(std::chrono::milliseconds(100));
-            auto properties_model = copy_response.Value;
+                LOG_TRACE(log, "Copy blob sync {} -> {}", src_blob, dest_blob);
+                block_blob_client_dest.CopyFromUri(source_uri, copy_options);
+            }
+            else
+            {
+                Azure::Storage::Blobs::StartBlobCopyFromUriOptions copy_options;
+                if (object_to_attributes.has_value())
+                {
+                    for (const auto & [key, value] : *object_to_attributes)
+                        copy_options.Metadata[key] = value;
+                }
 
-            auto copy_status = properties_model.CopyStatus;
-            auto copy_status_description = properties_model.CopyStatusDescription;
+                Azure::Storage::Blobs::StartBlobCopyOperation operation = block_blob_client_dest.StartCopyFromUri(source_uri, copy_options);
 
+                auto copy_response = operation.PollUntilDone(std::chrono::milliseconds(100));
+                auto properties_model = copy_response.Value;
 
-            if (copy_status.HasValue() && copy_status.Value() == Azure::Storage::Blobs::Models::CopyStatus::Success)
-            {
-                LOG_TRACE(log, "Copy of {} to {} finished", properties_model.CopySource.Value(), dest_blob);
+                auto copy_status = properties_model.CopyStatus;
+                auto copy_status_description = properties_model.CopyStatusDescription;
+
+
+                if (copy_status.HasValue() && copy_status.Value() == Azure::Storage::Blobs::Models::CopyStatus::Success)
+                {
+                    LOG_TRACE(log, "Copy of {} to {} finished", properties_model.CopySource.Value(), dest_blob);
+                }
+                else
+                {
+                    if (copy_status.HasValue())
+                        throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Copy from {} to {} failed with status {} description {} (operation is done {})",
+                                        src_blob, dest_blob, copy_status.Value().ToString(), copy_status_description.Value(), operation.IsDone());
+                    throw Exception(
+                        ErrorCodes::AZURE_BLOB_STORAGE_ERROR,
+                        "Copy from {} to {} didn't complete with success status (operation is done {})",
+                        src_blob,
+                        dest_blob,
+                        operation.IsDone());
+                }
             }
-            else
+            is_native_copy_done = true;
+        }
+        catch (const Azure::Storage::StorageException & e)
+        {
+            if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Unauthorized)
             {
-                if (copy_status.HasValue())
-                    throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Copy from {} to {} failed with status {} description {} (operation is done {})",
-                                    src_blob, dest_blob, copy_status.Value().ToString(), copy_status_description.Value(), operation.IsDone());
-                throw Exception(
-                    ErrorCodes::AZURE_BLOB_STORAGE_ERROR,
-                    "Copy from {} to {} didn't complete with success status (operation is done {})",
-                    src_blob,
-                    dest_blob,
-                    operation.IsDone());
+                LOG_TRACE(log, "Copy operation has thrown unauthorized access error, which indicates that the storage account of the source & destination are not the same. "
+                               "Will attempt to copy using read & write. source container = {} blob = {} and destination container = {} blob = {}",
+                          src_container_for_logging, src_blob, dest_container_for_logging, dest_blob);
             }
+            else
+                throw;
         }
     }
-    else
+    if (!is_native_copy_done)
     {
-        LOG_TRACE(log, "Copying Blob: {} from Container: {} native copy is disabled {}", src_blob, src_container_for_logging, same_credentials ? "" : " because of different credentials");
+        /// Copy through read and write
+        LOG_TRACE(log, "Reading and writing Blob: {} from Container: {}", src_blob, src_container_for_logging);
         auto create_read_buffer = [&]
         {
             return std::make_unique<ReadBufferFromAzureBlobStorage>(
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
index 036fbf34b0da..2059e715a7a7 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
@@ -30,7 +30,7 @@ void copyAzureBlobStorageFile(
     const String & dest_blob,
     std::shared_ptr<const AzureBlobStorage::RequestSettings> settings,
     const ReadSettings & read_settings,
-    bool same_credentials = true,
+    const std::optional<ObjectAttributes> & object_to_attributes,
     ThreadPoolCallbackRunnerUnsafe<void> schedule_ = {});
 
 
diff --git a/tests/integration/test_azure_blob_storage_native_copy/test.py b/tests/integration/test_azure_blob_storage_native_copy/test.py
index f5d24485b9e3..9e4c70da7c98 100644
--- a/tests/integration/test_azure_blob_storage_native_copy/test.py
+++ b/tests/integration/test_azure_blob_storage_native_copy/test.py
@@ -55,17 +55,16 @@ def generate_config(port):
                         <max_size>1000000000</max_size>
                         <cache_on_write_operations>1</cache_on_write_operations>
                     </disk_azure_cache>
-                    <disk_azure_different_auth>
+                    <disk_azure_small_native_copy>
                         <metadata_type>local</metadata_type>
                         <type>object_storage</type>
                         <object_storage_type>azure_blob_storage</object_storage_type>
-                        <use_native_copy>true</use_native_copy>
-                        <storage_account_url>http://azurite1:{port}/devstoreaccount1/</storage_account_url>
-                        <container_name>othercontainer</container_name>
+                        <connection_string>DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:{port}/devstoreaccount1;</connection_string>
+                        <container_name>cont</container_name>
                         <skip_access_check>false</skip_access_check>
-                        <account_name>devstoreaccount1</account_name>
-                        <account_key>Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==</account_key>
-                    </disk_azure_different_auth>
+                        <use_native_copy>true</use_native_copy>
+                        <max_single_part_copy_size>4</max_single_part_copy_size>
+                    </disk_azure_small_native_copy>
                 </disks>
                 <policies>
                     <policy_azure>
@@ -89,20 +88,12 @@ def generate_config(port):
                             </main>
                         </volumes>
                     </policy_azure_cache>
-                    <policy_azure_different_auth>
-                        <volumes>
-                            <main>
-                                <disk>disk_azure_different_auth</disk>
-                            </main>
-                        </volumes>
-                    </policy_azure_different_auth>
                 </policies>
             </storage_configuration>
             <backups>
                 <allowed_disk>disk_azure</allowed_disk>
                 <allowed_disk>disk_azure_cache</allowed_disk>
                 <allowed_disk>disk_azure_other_bucket</allowed_disk>
-                <allowed_disk>disk_azure_different_auth</allowed_disk>
             </backups>
         </clickhouse>
         """
@@ -295,21 +286,43 @@ def test_backup_restore_native_copy_disabled_in_query(cluster):
 
     assert not node4.contains_in_log("using native copy")
 
-def test_backup_restore_native_copy_disabled_due_to_different_auth(cluster):
+
+def test_clickhouse_disks_azure(cluster):
     node4 = cluster.instances["node4"]
-    azure_query(
-        node4,
-        f"CREATE TABLE test_simple_merge_tree_native_copy_disabled_due_to_different_auth(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='policy_azure_different_auth'",
+    disk = "disk_azure_small_native_copy"
+    node4.exec_in_container(
+        [
+            "bash",
+            "-c",
+            f"echo 'meow' | /usr/bin/clickhouse disks --disk {disk} --query 'write im_a_file.txt'",
+        ]
     )
-    azure_query(
-        node4, f"INSERT INTO test_simple_merge_tree_native_copy_disabled_due_to_different_auth VALUES (1, 'a')"
+    out = node4.exec_in_container(
+        [
+            "/usr/bin/clickhouse",
+            "disks",
+            "--disk",
+            disk,
+            "--query",
+            "read im_a_file.txt",
+        ]
     )
-
-    backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_merge_tree_native_copy_disabled_due_to_different_auth_backup')"
-    print("BACKUP DEST", backup_destination)
-    azure_query(
-        node4,
-        f"BACKUP TABLE test_simple_merge_tree_native_copy_disabled_due_to_different_auth TO {backup_destination}",
+    assert out == "meow\n\n"
+    node4.exec_in_container(
+        [
+            "bash",
+            "-c",
+            f"/usr/bin/clickhouse disks --disk {disk} --log-level trace --query 'copy im_a_file.txt another_file.txt'",
+        ]
     )
-
-    assert not node4.contains_in_log("using native copy")
\ No newline at end of file
+    out = node4.exec_in_container(
+        [
+            "/usr/bin/clickhouse",
+            "disks",
+            "--disk",
+            disk,
+            "--query",
+            "read another_file.txt",
+        ]
+    )
+    assert out == "meow\n\n"

From f9125f7bf3bafd6d1e04add18ce5aa7ea95521b3 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Sun, 12 Oct 2025 11:49:43 +0200
Subject: [PATCH 012/112] Fix includes in AzureObjectStorage

---
 src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
index 0f4d533c2a1c..83ca6950da69 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@@ -9,6 +9,7 @@
 #include <Disks/IO/WriteBufferFromAzureBlobStorage.h>
 #include <Disks/IO/ReadBufferFromRemoteFSGather.h>
 #include <Disks/IO/AsynchronousBoundedReadBuffer.h>
+#include <IO/AzureBlobStorage/copyAzureBlobStorageFile.h>
 
 #include <Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h>
 #include <Disks/ObjectStorages/ObjectStorageIteratorAsync.h>

From 3f5e3cb5428b784e9dc24f32287b6b6e8803d5ed Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 13 Oct 2025 05:11:59 +0000
Subject: [PATCH 013/112] Backport #88089 to 25.8: Add a URI normalization for
 the `SOURCE` grants filter.

---
 docs/en/sql-reference/statements/grant.md     | 17 ++++++++++++
 src/TableFunctions/ITableFunction.cpp         |  2 +-
 src/TableFunctions/ITableFunction.h           | 16 ++++++++++++
 src/TableFunctions/ITableFunctionXDBC.cpp     |  2 --
 src/TableFunctions/TableFunctionURL.cpp       |  2 +-
 ...6_normalize_url_in_source_grants.reference |  0
 .../03636_normalize_url_in_source_grants.sh   | 26 +++++++++++++++++++
 7 files changed, 61 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/03636_normalize_url_in_source_grants.reference
 create mode 100755 tests/queries/0_stateless/03636_normalize_url_in_source_grants.sh

diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md
index 9b0ea66d21b6..11a8a655cfaa 100644
--- a/docs/en/sql-reference/statements/grant.md
+++ b/docs/en/sql-reference/statements/grant.md
@@ -680,6 +680,23 @@ GRANT READ ON S3('s3://foo/.*') TO john
 GRANT READ ON S3('s3://bar/.*') TO john
 ```
 
+:::warning
+Source filter takes **regexp** as a parameter, so a grant
+`GRANT READ ON URL('http://www.google.com') TO john;`
+
+will allow queries
+```sql
+SELECT * FROM url('https://www.google.com');
+SELECT * FROM url('https://www-google.com');
+```
+
+because `.` is treated as an `Any Single Character` in the regexps. 
+This may lead to potential vulnerability. The correct grant should be
+```sql
+GRANT READ ON URL('https://www\.google\.com') TO john;
+```
+:::
+
 **Re-granting with GRANT OPTION:**
 
 If the original grant has `WITH GRANT OPTION`, it can be re-granted using `GRANT CURRENT GRANTS`:
diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp
index 3f59a8c48a11..8fc0211417bd 100644
--- a/src/TableFunctions/ITableFunction.cpp
+++ b/src/TableFunctions/ITableFunction.cpp
@@ -44,7 +44,7 @@ StoragePtr ITableFunction::execute(const ASTPtr & ast_function, ContextPtr conte
         if (is_insert_query)
             type_to_check = AccessType::WRITE;
 
-        context->getAccess()->checkAccessWithFilter(type_to_check, toStringSource(*access_object), getFunctionURI());
+        context->getAccess()->checkAccessWithFilter(type_to_check, toStringSource(*access_object), getFunctionURINormalized());
     }
 
     auto table_function_properties = TableFunctionFactory::instance().tryGetProperties(getName());
diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h
index b3701915a3b5..c4e7534c1e49 100644
--- a/src/TableFunctions/ITableFunction.h
+++ b/src/TableFunctions/ITableFunction.h
@@ -106,6 +106,8 @@ class ITableFunction : public std::enable_shared_from_this<ITableFunction>
     /// For example for s3Cluster the database storage name is S3Cluster, and we need to check
     /// privileges as if it was S3.
     virtual const char * getNonClusteredStorageEngineName() const;
+
+protected:
     /// The URI of function for permission checking. Can be empty string if not applicable.
     /// For example for url('https://foo.bar') URI would be 'https://foo.bar'.
     virtual const String & getFunctionURI() const
@@ -113,6 +115,20 @@ class ITableFunction : public std::enable_shared_from_this<ITableFunction>
         static const String empty;
         return empty;
     }
+
+    String getFunctionURINormalized() const
+    {
+        try
+        {
+            Poco::URI uri(getFunctionURI());
+            uri.normalize();
+            return uri.toString();
+        }
+        catch (const Poco::Exception &)
+        {
+            return "";
+        }
+    }
 };
 
 /// Properties of table function that are independent of argument types and parameters.
diff --git a/src/TableFunctions/ITableFunctionXDBC.cpp b/src/TableFunctions/ITableFunctionXDBC.cpp
index 6904f26b2878..810ba5c49a3b 100644
--- a/src/TableFunctions/ITableFunctionXDBC.cpp
+++ b/src/TableFunctions/ITableFunctionXDBC.cpp
@@ -62,8 +62,6 @@ class ITableFunctionXDBC : public ITableFunction
 
     void startBridgeIfNot(ContextPtr context) const;
 
-    const String & getFunctionURI() const override { return connection_string; }
-
     String connection_string;
     String schema_name;
     String remote_table_name;
diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp
index 0cbacdf42a16..e5833e1414a4 100644
--- a/src/TableFunctions/TableFunctionURL.cpp
+++ b/src/TableFunctions/TableFunctionURL.cpp
@@ -143,7 +143,7 @@ ColumnsDescription TableFunctionURL::getActualTableStructure(ContextPtr context,
         ColumnsDescription columns;
 
         if (const auto access_object = getSourceAccessObject())
-            context->getAccess()->checkAccessWithFilter(AccessType::READ, toStringSource(*access_object), getFunctionURI());
+            context->getAccess()->checkAccessWithFilter(AccessType::READ, toStringSource(*access_object), getFunctionURINormalized());
         if (format == "auto")
         {
             columns = StorageURL::getTableStructureAndFormatFromData(
diff --git a/tests/queries/0_stateless/03636_normalize_url_in_source_grants.reference b/tests/queries/0_stateless/03636_normalize_url_in_source_grants.reference
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/queries/0_stateless/03636_normalize_url_in_source_grants.sh b/tests/queries/0_stateless/03636_normalize_url_in_source_grants.sh
new file mode 100755
index 000000000000..f1a55f14f28e
--- /dev/null
+++ b/tests/queries/0_stateless/03636_normalize_url_in_source_grants.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+user="user03636_${CLICKHOUSE_DATABASE}_$RANDOM"
+
+${CLICKHOUSE_CLIENT} <<EOF
+-- Cleanup
+DROP USER IF EXISTS $user;
+CREATE USER $user;
+GRANT CREATE TEMPORARY TABLE ON *.* TO $user;
+EOF
+
+${CLICKHOUSE_CLIENT} --query "GRANT READ ON S3('http://localhost:11111/test/.*') TO $user WITH GRANT OPTION";
+
+${CLICKHOUSE_CLIENT} --user $user --query "SELECT * FROM s3('http://localhost:11111/test/a.tsv', 'TSV') FORMAT Null;";
+${CLICKHOUSE_CLIENT} --user $user --query "SELECT * FROM s3('http://localhost:11111/a.tsv', 'TSV') FORMAT Null; -- { serverError ACCESS_DENIED }";
+${CLICKHOUSE_CLIENT} --user $user --query "SELECT * FROM s3('http://localhost:11111/test/../a.tsv', 'TSV') FORMAT Null; -- { serverError ACCESS_DENIED }";
+${CLICKHOUSE_CLIENT} --user $user --query "SELECT * FROM s3('http://localhost:11111/test/%2e%2e/a.tsv', 'TSV') FORMAT Null; -- { serverError ACCESS_DENIED }";
+
+${CLICKHOUSE_CLIENT} <<EOF
+DROP USER IF EXISTS $user;
+EOF

From 74f8b185e6ddc94632971e1c39e87b2756a6402c Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 13 Oct 2025 09:14:05 +0000
Subject: [PATCH 014/112] Backport #88401 to 25.8: Fix quadratic complexity in
 `countMatches`

---
 src/Functions/countMatches.cpp                | 162 +++++++++++++++++-
 src/Functions/countMatches.h                  | 156 -----------------
 .../03666_count_matches_complexity.reference  |   2 +
 .../03666_count_matches_complexity.sql        |   2 +
 4 files changed, 162 insertions(+), 160 deletions(-)
 delete mode 100644 src/Functions/countMatches.h
 create mode 100644 tests/queries/0_stateless/03666_count_matches_complexity.reference
 create mode 100644 tests/queries/0_stateless/03666_count_matches_complexity.sql

diff --git a/src/Functions/countMatches.cpp b/src/Functions/countMatches.cpp
index 078823744dd0..79eaadeaa393 100644
--- a/src/Functions/countMatches.cpp
+++ b/src/Functions/countMatches.cpp
@@ -1,9 +1,166 @@
+#include <Core/Settings.h>
+#include <Functions/IFunction.h>
 #include <Functions/FunctionFactory.h>
-#include <Functions/countMatches.h>
+#include <Functions/FunctionHelpers.h>
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnsNumber.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeString.h>
+#include <Functions/Regexps.h>
+#include <Interpreters/Context.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+}
+
+namespace Setting
+{
+    extern const SettingsBool count_matches_stop_at_empty_match;
+}
 
 namespace
 {
 
+using Pos = const char *;
+
+template <typename CountMatchesBase>
+class FunctionCountMatches : public IFunction
+{
+    const bool count_matches_stop_at_empty_match;
+
+public:
+    static constexpr auto name = CountMatchesBase::name;
+    static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionCountMatches<CountMatchesBase>>(context); }
+
+    explicit FunctionCountMatches(ContextPtr context)
+        : count_matches_stop_at_empty_match(context->getSettingsRef()[Setting::count_matches_stop_at_empty_match])
+    {
+    }
+
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 2; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        FunctionArgumentDescriptors args
+        {
+            {"haystack", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"},
+            {"pattern", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "constant String"}
+        };
+        validateFunctionArguments(*this, arguments, args);
+
+        return std::make_shared<DataTypeUInt64>();
+    }
+
+    DataTypePtr getReturnTypeForDefaultImplementationForDynamic() const override
+    {
+        return std::make_shared<DataTypeUInt64>();
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
+    {
+        const IColumn * col_pattern = arguments[1].column.get();
+        const ColumnConst * col_pattern_const = checkAndGetColumnConst<ColumnString>(col_pattern);
+        if (col_pattern_const == nullptr)
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Pattern argument is not const");
+
+        const OptimizedRegularExpression re = Regexps::createRegexp</*is_like*/ false, /*no_capture*/ true, CountMatchesBase::case_insensitive>(col_pattern_const->getValue<String>());
+
+        const IColumn * col_haystack = arguments[0].column.get();
+        OptimizedRegularExpression::MatchVec matches;
+
+        if (const ColumnConst * col_haystack_const = checkAndGetColumnConstStringOrFixedString(col_haystack))
+        {
+            std::string_view str = col_haystack_const->getDataColumn().getDataAt(0).toView();
+            uint64_t matches_count = countMatches(str, re, matches);
+            return result_type->createColumnConst(input_rows_count, matches_count);
+        }
+        if (const ColumnString * col_haystack_string = checkAndGetColumn<ColumnString>(col_haystack))
+        {
+            auto col_res = ColumnUInt64::create();
+
+            const ColumnString::Chars & src_chars = col_haystack_string->getChars();
+            const ColumnString::Offsets & src_offsets = col_haystack_string->getOffsets();
+
+            ColumnUInt64::Container & vec_res = col_res->getData();
+            vec_res.resize(input_rows_count);
+
+            ColumnString::Offset current_src_offset = 0;
+
+            for (size_t i = 0; i < input_rows_count; ++i)
+            {
+                Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
+                current_src_offset = src_offsets[i];
+                Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
+
+                std::string_view str(pos, end - pos);
+                vec_res[i] = countMatches(str, re, matches);
+            }
+
+            return col_res;
+        }
+        if (const ColumnFixedString * col_haystack_fixedstring = checkAndGetColumn<ColumnFixedString>(col_haystack))
+        {
+            auto col_res = ColumnUInt64::create();
+
+            ColumnUInt64::Container & vec_res = col_res->getData();
+            vec_res.resize(input_rows_count);
+
+            for (size_t i = 0; i < input_rows_count; ++i)
+            {
+                std::string_view str = col_haystack_fixedstring->getDataAt(i).toView();
+                vec_res[i] = countMatches(str, re, matches);
+            }
+
+            return col_res;
+        }
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Could not cast haystack argument to String or FixedString");
+    }
+
+    uint64_t countMatches(std::string_view src, const OptimizedRegularExpression & re, OptimizedRegularExpression::MatchVec & matches) const
+    {
+        /// Only one match is required, no need to copy more.
+        static const unsigned matches_limit = 1;
+
+        Pos pos = reinterpret_cast<Pos>(src.data());
+        Pos end = reinterpret_cast<Pos>(src.data() + src.size());
+
+        uint64_t match_count = 0;
+        while (pos < end)
+        {
+            if (re.match(pos, end - pos, matches, matches_limit))
+            {
+                if (matches[0].length > 0)
+                {
+                    pos += matches[0].offset + matches[0].length;
+                    ++match_count;
+                }
+                else
+                {
+                    if (count_matches_stop_at_empty_match)
+                        /// Progress should be made, but with empty match the progress will not be done.
+                        break;
+
+                    /// Progress is made by a single character in case the pattern does not match or have zero-byte match.
+                    /// The reason is simply because the pattern could match another part of input when forwarded.
+                    ++pos;
+                }
+            }
+            else
+                break;
+        }
+
+        return match_count;
+    }
+};
+
 struct FunctionCountMatchesCaseSensitive
 {
     static constexpr auto name = "countMatches";
@@ -17,9 +174,6 @@ struct FunctionCountMatchesCaseInsensitive
 
 }
 
-namespace DB
-{
-
 REGISTER_FUNCTION(CountMatches)
 {
     factory.registerFunction<FunctionCountMatches<FunctionCountMatchesCaseSensitive>>();
diff --git a/src/Functions/countMatches.h b/src/Functions/countMatches.h
deleted file mode 100644
index 7a77ad4ec3ab..000000000000
--- a/src/Functions/countMatches.h
+++ /dev/null
@@ -1,156 +0,0 @@
-#pragma once
-
-#include <Core/Settings.h>
-#include <Functions/IFunction.h>
-#include <Functions/FunctionFactory.h>
-#include <Functions/FunctionHelpers.h>
-#include <Columns/ColumnFixedString.h>
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnsNumber.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeString.h>
-#include <Functions/Regexps.h>
-#include <Interpreters/Context.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_COLUMN;
-}
-
-namespace Setting
-{
-    extern const SettingsBool count_matches_stop_at_empty_match;
-}
-
-using Pos = const char *;
-
-template <typename CountMatchesBase>
-class FunctionCountMatches : public IFunction
-{
-    const bool count_matches_stop_at_empty_match;
-
-public:
-    static constexpr auto name = CountMatchesBase::name;
-    static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionCountMatches<CountMatchesBase>>(context); }
-
-    explicit FunctionCountMatches(ContextPtr context)
-        : count_matches_stop_at_empty_match(context->getSettingsRef()[Setting::count_matches_stop_at_empty_match])
-    {
-    }
-
-    String getName() const override { return name; }
-    size_t getNumberOfArguments() const override { return 2; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        FunctionArgumentDescriptors args{
-            {"haystack", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"},
-            {"pattern", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "constant String"}
-        };
-        validateFunctionArguments(*this, arguments, args);
-
-        return std::make_shared<DataTypeUInt64>();
-    }
-
-    DataTypePtr getReturnTypeForDefaultImplementationForDynamic() const override
-    {
-        return std::make_shared<DataTypeUInt64>();
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
-    {
-        const IColumn * col_pattern = arguments[1].column.get();
-        const ColumnConst * col_pattern_const = checkAndGetColumnConst<ColumnString>(col_pattern);
-        if (col_pattern_const == nullptr)
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Pattern argument is not const");
-
-        const OptimizedRegularExpression re = Regexps::createRegexp</*is_like*/ false, /*no_capture*/ true, CountMatchesBase::case_insensitive>(col_pattern_const->getValue<String>());
-
-        const IColumn * col_haystack = arguments[0].column.get();
-        OptimizedRegularExpression::MatchVec matches;
-
-        if (const ColumnConst * col_haystack_const = checkAndGetColumnConstStringOrFixedString(col_haystack))
-        {
-            std::string_view str = col_haystack_const->getDataColumn().getDataAt(0).toView();
-            uint64_t matches_count = countMatches(str, re, matches);
-            return result_type->createColumnConst(input_rows_count, matches_count);
-        }
-        if (const ColumnString * col_haystack_string = checkAndGetColumn<ColumnString>(col_haystack))
-        {
-            auto col_res = ColumnUInt64::create();
-
-            const ColumnString::Chars & src_chars = col_haystack_string->getChars();
-            const ColumnString::Offsets & src_offsets = col_haystack_string->getOffsets();
-
-            ColumnUInt64::Container & vec_res = col_res->getData();
-            vec_res.resize(input_rows_count);
-
-            ColumnString::Offset current_src_offset = 0;
-
-            for (size_t i = 0; i < input_rows_count; ++i)
-            {
-                Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
-                current_src_offset = src_offsets[i];
-                Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
-
-                std::string_view str(pos, end - pos);
-                vec_res[i] = countMatches(str, re, matches);
-            }
-
-            return col_res;
-        }
-        if (const ColumnFixedString * col_haystack_fixedstring = checkAndGetColumn<ColumnFixedString>(col_haystack))
-        {
-            auto col_res = ColumnUInt64::create();
-
-            ColumnUInt64::Container & vec_res = col_res->getData();
-            vec_res.resize(input_rows_count);
-
-            for (size_t i = 0; i < input_rows_count; ++i)
-            {
-                std::string_view str = col_haystack_fixedstring->getDataAt(i).toView();
-                vec_res[i] = countMatches(str, re, matches);
-            }
-
-            return col_res;
-        }
-        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Could not cast haystack argument to String or FixedString");
-    }
-
-    uint64_t countMatches(std::string_view src, const OptimizedRegularExpression & re, OptimizedRegularExpression::MatchVec & matches) const
-    {
-        /// Only one match is required, no need to copy more.
-        static const unsigned matches_limit = 1;
-
-        Pos pos = reinterpret_cast<Pos>(src.data());
-        Pos end = reinterpret_cast<Pos>(src.data() + src.size());
-
-        uint64_t match_count = 0;
-        while (pos < end)
-        {
-            if (re.match(pos, end - pos, matches, matches_limit) && matches[0].length > 0)
-            {
-                pos += matches[0].offset + matches[0].length;
-                ++match_count;
-            }
-            else
-            {
-                if (count_matches_stop_at_empty_match)
-                    /// Progress should be made, but with empty match the progress will not be done.
-                    break;
-
-                /// Progress is made by a single character in case the pattern does not match or have zero-byte match.
-                /// The reason is simply because the pattern could match another part of input when forwarded.
-                ++pos;
-            }
-        }
-
-        return match_count;
-    }
-};
-
-}
diff --git a/tests/queries/0_stateless/03666_count_matches_complexity.reference b/tests/queries/0_stateless/03666_count_matches_complexity.reference
new file mode 100644
index 000000000000..32aa8df235fb
--- /dev/null
+++ b/tests/queries/0_stateless/03666_count_matches_complexity.reference
@@ -0,0 +1,2 @@
+0
+1000000
diff --git a/tests/queries/0_stateless/03666_count_matches_complexity.sql b/tests/queries/0_stateless/03666_count_matches_complexity.sql
new file mode 100644
index 000000000000..d866543b5d8e
--- /dev/null
+++ b/tests/queries/0_stateless/03666_count_matches_complexity.sql
@@ -0,0 +1,2 @@
+SELECT countMatches(repeat('\0\0\0\0\0\0\0\0\0\0', 1000000), 'a');
+SELECT countMatches(repeat('\0\0\0\0\0\0\0\0\0\0a', 1000000), 'a');

From e3979ed1ff153e3d5fc26a92dfe3fb1552c01c78 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 13 Oct 2025 09:15:38 +0000
Subject: [PATCH 015/112] Backport #87987 to 25.8: Fix index analysis with
 session_timezone specified

---
 src/Storages/MergeTree/KeyCondition.cpp       |  54 +++++----
 ...3173_set_transformed_partition_pruning.sql |   4 +-
 ...6_index_analysis_with_session_tz.reference |  41 +++++++
 .../03636_index_analysis_with_session_tz.sql  | 103 ++++++++++++++++++
 4 files changed, 178 insertions(+), 24 deletions(-)
 create mode 100644 tests/queries/0_stateless/03636_index_analysis_with_session_tz.reference
 create mode 100644 tests/queries/0_stateless/03636_index_analysis_with_session_tz.sql

diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index 3258fbc4dcea..f85d173a94c0 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -53,6 +53,7 @@ namespace Setting
 {
     extern const SettingsBool analyze_index_with_space_filling_curves;
     extern const SettingsDateTimeOverflowBehavior date_time_overflow_behavior;
+    extern const SettingsTimezone session_timezone;
 }
 
 namespace ErrorCodes
@@ -1093,7 +1094,7 @@ bool applyFunctionChainToColumn(
     }
 
     // And cast it to the argument type of the first function in the chain
-    auto in_argument_type = getArgumentTypeOfMonotonicFunction(*functions[0]);
+    auto in_argument_type = removeLowCardinality(getArgumentTypeOfMonotonicFunction(*functions[0]));
     if (canBeSafelyCast(result_type, in_argument_type))
     {
         result_column = castColumnAccurate({result_column, result_type, ""}, in_argument_type);
@@ -1122,13 +1123,13 @@ bool applyFunctionChainToColumn(
         if (func->getArgumentTypes().empty())
             return false;
 
-        auto argument_type = getArgumentTypeOfMonotonicFunction(*func);
+        auto argument_type = removeLowCardinality(getArgumentTypeOfMonotonicFunction(*func));
         if (!canBeSafelyCast(result_type, argument_type))
             return false;
 
         result_column = castColumnAccurate({result_column, result_type, ""}, argument_type);
-        result_column = func->execute({{result_column, argument_type, ""}}, func->getResultType(), result_column->size(), /* dry_run = */ false);
-        result_type = func->getResultType();
+        result_type = removeLowCardinality(func->getResultType());
+        result_column = func->execute({{result_column, argument_type, ""}}, result_type, result_column->size(), /* dry_run = */ false);
 
         // Transforming nullable columns to the nested ones, in case no nulls found
         if (result_column->isNullable())
@@ -1141,7 +1142,7 @@ bool applyFunctionChainToColumn(
                     return false;
             }
             result_column = result_column_nullable.getNestedColumnPtr();
-            result_type = removeNullable(func->getResultType());
+            result_type = removeNullable(result_type);
         }
     }
     out_column = result_column;
@@ -1903,48 +1904,57 @@ bool KeyCondition::extractMonotonicFunctionsChainFromKey(
                     auto func_name = func->function_base->getName();
                     auto func_base = func->function_base;
 
-                    ColumnsWithTypeAndName arguments;
                     ColumnWithTypeAndName const_arg;
                     FunctionWithOptionalConstArg::Kind kind = FunctionWithOptionalConstArg::Kind::NO_CONST;
 
                     if (date_time_parsing_functions.contains(func_name))
                     {
-                        const auto & arg_types = func_base->getArgumentTypes();
-                        if (!arg_types.empty() && isStringOrFixedString(arg_types[0]))
-                            func_name = func_name + "OrNull";
-                    }
+                        const auto & func_arg_types = func_base->getArgumentTypes();
 
-                    auto func_builder = FunctionFactory::instance().tryGet(func_name, context);
+                        const bool has_string_argument = !func_arg_types.empty() && isStringOrFixedString(func_arg_types[0]);
+                        const bool has_session_timezone = !context->getSettingsRef()[Setting::session_timezone].value.empty();
 
-                    if (func->children.size() == 1)
-                    {
-                        arguments.push_back({nullptr, removeLowCardinality(func->children[0]->result_type), ""});
+                        // Skipping analysis in case when is requires parsing datetime from string
+                        // with `session_timezone` specified
+                        if (has_string_argument && has_session_timezone)
+                            return false;
+
+                        // Otherwise, in case when datetime parsing is required, rebuilding the function,
+                        // to get its "-OrNull" version required for safe parsing, and not failing on
+                        // values with incorrect format
+                        if (has_string_argument)
+                        {
+                            ColumnsWithTypeAndName new_args;
+                            for (const auto & type : func->function_base->getArgumentTypes())
+                                new_args.push_back({nullptr, type, ""});
+
+                            const auto func_builder = FunctionFactory::instance().tryGet(func_name + "OrNull", context);
+                            func_base = func_builder->build(new_args);
+                        }
                     }
-                    else if (func->children.size() == 2)
+
+                    // For single argument functions, the input may be used as-is, for binary functions,
+                    // we'll produce a partially applied version of `func` with the reduced arity
+                    if (func->children.size() == 2)
                     {
                         const auto * left = func->children[0];
                         const auto * right = func->children[1];
                         if (left->column && isColumnConst(*left->column))
                         {
                             const_arg = {left->result_type->createColumnConst(0, (*left->column)[0]), left->result_type, ""};
-                            arguments.push_back(const_arg);
-                            arguments.push_back({nullptr, removeLowCardinality(right->result_type), ""});
                             kind = FunctionWithOptionalConstArg::Kind::LEFT_CONST;
                         }
                         else
                         {
                             const_arg = {right->result_type->createColumnConst(0, (*right->column)[0]), right->result_type, ""};
-                            arguments.push_back({nullptr, removeLowCardinality(left->result_type), ""});
-                            arguments.push_back(const_arg);
                             kind = FunctionWithOptionalConstArg::Kind::RIGHT_CONST;
                         }
                     }
 
-                    auto out_func = func_builder->build(arguments);
                     if (kind == FunctionWithOptionalConstArg::Kind::NO_CONST)
-                        out_functions_chain.push_back(out_func);
+                        out_functions_chain.push_back(func_base);
                     else
-                        out_functions_chain.push_back(std::make_shared<FunctionWithOptionalConstArg>(out_func, const_arg, kind));
+                        out_functions_chain.push_back(std::make_shared<FunctionWithOptionalConstArg>(func_base, const_arg, kind));
                 }
 
                 out_key_column_num = it->second;
diff --git a/tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql
index c49aafab237c..4d7b12bf5651 100644
--- a/tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql
+++ b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql
@@ -1,4 +1,4 @@
--- Tags: no-msan
+-- Tags: no-msan, long
 -- msan: too slow
 
 SELECT '-- Single partition by function';
@@ -234,7 +234,7 @@ SELECT toString(toDate('2000-01-01') + 10 * number) FROM numbers(50)
 UNION ALL
 SELECT toString(toDate('2100-01-01') + 10 * number) FROM numbers(50);
 
-SELECT count() FROM 03173_nested_date_parsing WHERE id IN ('2000-01-21', '2023-05-02') SETTINGS log_comment='03173_nested_date_parsing';
+SELECT count() FROM 03173_nested_date_parsing WHERE id IN ('2000-01-21', '2023-05-02') SETTINGS log_comment='03173_nested_date_parsing', session_timezone = '';
 SYSTEM FLUSH LOGS query_log;
 SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_date_parsing';
 SELECT count() FROM 03173_nested_date_parsing WHERE id IN ('not a date');
diff --git a/tests/queries/0_stateless/03636_index_analysis_with_session_tz.reference b/tests/queries/0_stateless/03636_index_analysis_with_session_tz.reference
new file mode 100644
index 000000000000..e3b96109a27d
--- /dev/null
+++ b/tests/queries/0_stateless/03636_index_analysis_with_session_tz.reference
@@ -0,0 +1,41 @@
+-- PK UTC timezone
+1
+Condition: (toStartOfDay(ts) in [1756857600, 1756857600])
+Parts: 1/1
+Granules: 1/1
+
+-- PK EST timezone
+1
+Condition: (toStartOfDay(ts) in [1756857600, 1756857600])
+Parts: 1/1
+Granules: 1/1
+
+-- Partitions UTC timezone
+1
+Condition: (ts in [1756882680, 1756882680])
+Parts: 1/1
+Granules: 1/1
+Condition: (toStartOfDay(ts) in [1756857600, 1756857600])
+Parts: 1/1
+Granules: 1/1
+
+-- Partitions EST timezone
+1
+Condition: (ts in [1756882680, 1756882680])
+Parts: 1/1
+Granules: 1/1
+Condition: (toStartOfDay(ts) in [1756857600, 1756857600])
+Parts: 1/1
+Granules: 1/1
+
+-- Partitions UTC timezone
+1
+Condition: true
+Parts: 1/1
+Granules: 1/1
+
+-- Partitions EST timezone
+1
+Condition: true
+Parts: 1/1
+Granules: 1/1
diff --git a/tests/queries/0_stateless/03636_index_analysis_with_session_tz.sql b/tests/queries/0_stateless/03636_index_analysis_with_session_tz.sql
new file mode 100644
index 000000000000..c897ab523dae
--- /dev/null
+++ b/tests/queries/0_stateless/03636_index_analysis_with_session_tz.sql
@@ -0,0 +1,103 @@
+SET session_timezone = 'UTC';
+-- For explain with indexes and key condition values verification
+SET parallel_replicas_local_plan = 1;
+
+DROP TABLE IF EXISTS 03636_data_pk, 03636_data_partitions, 03636_data_parsed;
+
+CREATE TABLE 03636_data_pk (ts DateTime) ENGINE = MergeTree ORDER BY toStartOfDay(ts)
+AS
+SELECT 1756882680;
+
+SELECT '-- PK UTC timezone';
+
+SELECT count() FROM 03636_data_pk WHERE ts = 1756882680;
+
+SELECT trim(explain)
+FROM (
+    EXPLAIN indexes = 1 SELECT count() FROM 03636_data_pk WHERE ts = 1756882680
+)
+WHERE trim(explain) ilike 'condition: %'
+   OR trim(explain) ilike 'parts: %'
+   OR trim(explain) ilike 'granules: %';
+
+SELECT '';
+SELECT '-- PK EST timezone';
+
+SELECT count() FROM 03636_data_pk WHERE ts = 1756882680 SETTINGS session_timezone = 'EST';
+
+SELECT trim(explain)
+FROM (
+    EXPLAIN indexes = 1 SELECT count() FROM 03636_data_pk WHERE ts = 1756882680
+)
+WHERE trim(explain) ilike 'condition: %'
+   OR trim(explain) ilike 'parts: %'
+   OR trim(explain) ilike 'granules: %'
+SETTINGS session_timezone = 'EST';
+
+DROP TABLE 03636_data_pk;
+
+CREATE TABLE 03636_data_partitions (ts DateTime) ENGINE = MergeTree ORDER BY tuple() PARTITION BY toStartOfDay(ts)
+AS
+SELECT 1756882680;
+
+SELECT '';
+SELECT '-- Partitions UTC timezone';
+
+SELECT count() FROM 03636_data_partitions WHERE ts = 1756882680;
+
+SELECT trim(explain)
+FROM (
+    EXPLAIN indexes = 1 SELECT count() FROM 03636_data_partitions WHERE ts = 1756882680
+)
+WHERE trim(explain) ilike 'condition: %'
+   OR trim(explain) ilike 'parts: %'
+   OR trim(explain) ilike 'granules: %';
+
+SELECT '';
+SELECT '-- Partitions EST timezone';
+
+SELECT count() FROM 03636_data_partitions WHERE ts = 1756882680 SETTINGS session_timezone = 'EST';
+
+SELECT trim(explain)
+FROM (
+    EXPLAIN indexes = 1 SELECT count() FROM 03636_data_partitions WHERE ts = 1756882680
+)
+WHERE trim(explain) ilike 'condition: %'
+   OR trim(explain) ilike 'parts: %'
+   OR trim(explain) ilike 'granules: %'
+SETTINGS session_timezone = 'EST';
+
+DROP TABLE 03636_data_partitions;
+
+CREATE TABLE 03636_data_parsed (ts String) ENGINE = MergeTree ORDER BY toStartOfDay(toDateTime(ts))
+AS
+SELECT '2025-09-02 19:00:00';
+
+SELECT '';
+SELECT '-- Partitions UTC timezone';
+
+SELECT count() FROM 03636_data_parsed WHERE ts = '2025-09-02 19:00:00';
+
+SELECT trim(explain)
+FROM (
+    EXPLAIN indexes = 1 SELECT count() FROM 03636_data_parsed WHERE ts = '2025-09-02 19:00:00'
+)
+WHERE trim(explain) ilike 'condition: %'
+   OR trim(explain) ilike 'parts: %'
+   OR trim(explain) ilike 'granules: %';
+
+SELECT '';
+SELECT '-- Partitions EST timezone';
+
+SELECT count() FROM 03636_data_parsed WHERE ts = '2025-09-02 19:00:00' SETTINGS session_timezone = 'EST';
+
+SELECT trim(explain)
+FROM (
+    EXPLAIN indexes = 1 SELECT count() FROM 03636_data_parsed WHERE ts = '2025-09-02 19:00:00'
+)
+WHERE trim(explain) ilike 'condition: %'
+   OR trim(explain) ilike 'parts: %'
+   OR trim(explain) ilike 'granules: %'
+SETTINGS session_timezone = 'EST';
+
+DROP TABLE 03636_data_parsed;

From 82c2b31e7e5ac6a92745ea595a8c84d414afa29e Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 13 Oct 2025 11:11:24 +0000
Subject: [PATCH 016/112] Backport #88339 to 25.8: fix threads count for
 inserts

---
 src/Interpreters/InterpreterInsertQuery.cpp   |  2 +-
 .../03652_threads_count_insert.reference      |  8 +++
 .../0_stateless/03652_threads_count_insert.sh | 56 +++++++++++++++++++
 3 files changed, 65 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/03652_threads_count_insert.reference
 create mode 100755 tests/queries/0_stateless/03652_threads_count_insert.sh

diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index 5b52cdbb9920..a0e9c1b6cccb 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -726,7 +726,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query
 
     QueryPipeline pipeline = QueryPipeline(std::move(chain));
 
-    pipeline.setNumThreads(max_insert_threads);
+    pipeline.setNumThreads(max_threads);
     pipeline.setConcurrencyControl(settings[Setting::use_concurrency_control]);
 
     if (query.hasInlinedData() && !async_insert)
diff --git a/tests/queries/0_stateless/03652_threads_count_insert.reference b/tests/queries/0_stateless/03652_threads_count_insert.reference
new file mode 100644
index 000000000000..2681d6258e98
--- /dev/null
+++ b/tests/queries/0_stateless/03652_threads_count_insert.reference
@@ -0,0 +1,8 @@
+max_threads: 1 max_insert_threads: 1
+1
+max_threads: 1 max_insert_threads: 5
+1
+max_threads: 10 max_insert_threads: 1
+10
+max_threads: 10 max_insert_threads: 5
+10
diff --git a/tests/queries/0_stateless/03652_threads_count_insert.sh b/tests/queries/0_stateless/03652_threads_count_insert.sh
new file mode 100755
index 000000000000..7a95b4ea833a
--- /dev/null
+++ b/tests/queries/0_stateless/03652_threads_count_insert.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+# Tags: no-object-storage, no-parallel, no-fasttest
+
+# no-object-storage: s3 has 20 more threads
+# no-parallel: it checks the number of threads, which can be lowered in presence of other queries
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+
+cat <<EOF | $CLICKHOUSE_CLIENT -n $SETTINGS
+drop table if exists testX;
+drop table if exists testXA;
+drop table if exists testXB;
+drop table if exists testXC;
+
+create table testX (A Int64) engine=MergeTree order by tuple();
+
+create materialized view testXA engine=MergeTree order by tuple() as select sleep(0.1) from testX;
+create materialized view testXB engine=MergeTree order by tuple() as select sleep(0.2), throwIf(A=1) from testX;
+create materialized view testXC engine=MergeTree order by tuple() as select sleep(0.1) from testX;
+EOF
+
+for max_threads in 1 10; do
+    for max_insert_threads in 1 5; do
+        echo "max_threads: $max_threads max_insert_threads: $max_insert_threads"
+
+        QUERY_ID="03652_query_id_$RANDOM"
+        SETTINGS="--query_id=$QUERY_ID "
+        SETTINGS="$SETTINGS --max_threads=$max_threads "
+        SETTINGS="$SETTINGS --max_insert_threads=$max_insert_threads "
+
+        SETTINGS="$SETTINGS --max_block_size=10 "
+        SETTINGS="$SETTINGS --min_insert_block_size_rows=10 "
+        SETTINGS="$SETTINGS --materialized_views_ignore_errors=1 "
+        SETTINGS="$SETTINGS --parallel_view_processing=1 "
+        SETTINGS="$SETTINGS --log_queries=1 "
+        SETTINGS="$SETTINGS --send_logs_level=error "
+
+        $CLICKHOUSE_CLIENT -q 'select * from numbers(50) format TSV' | $CLICKHOUSE_CLIENT $SETTINGS -q 'insert into testX FORMAT TSV'
+
+        $CLICKHOUSE_CLIENT -q 'system flush logs system.query_log;'
+
+        cat <<EOF | $CLICKHOUSE_CLIENT
+select
+    if(peak_threads_usage >= 10, 10, peak_threads_usage),
+from system.query_log where
+    current_database = currentDatabase() and
+    type != 'QueryStart' and
+    query_id = '$QUERY_ID'
+order by ALL;
+EOF
+
+    done
+done

From 02e8bae240d9da87bbe262aa1f7238e3acad1f0a Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 13 Oct 2025 14:13:24 +0000
Subject: [PATCH 017/112] Backport #88217 to 25.8: max_cpu_share alone should
 determine the hard cap in a workload setting, even if max_cpus is unset

---
 src/Common/Scheduler/IResourceManager.h       |  3 ++
 .../Scheduler/Nodes/CustomResourceManager.cpp |  6 ++++
 .../Scheduler/Nodes/CustomResourceManager.h   |  1 +
 .../Nodes/WorkloadResourceManager.cpp         | 28 +++++++++++++++++--
 .../Scheduler/Nodes/WorkloadResourceManager.h |  6 ++--
 .../tests/gtest_workload_resource_manager.cpp | 18 ++++++++++++
 src/Common/Scheduler/WorkloadSettings.cpp     |  2 +-
 .../Scheduler/createResourceManager.cpp       | 10 +++++++
 8 files changed, 68 insertions(+), 6 deletions(-)

diff --git a/src/Common/Scheduler/IResourceManager.h b/src/Common/Scheduler/IResourceManager.h
index 5cca911cdbb6..4b5537f73f00 100644
--- a/src/Common/Scheduler/IResourceManager.h
+++ b/src/Common/Scheduler/IResourceManager.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Common/Scheduler/ResourceLink.h>
+#include <Common/Scheduler/WorkloadSettings.h>
 
 #include <Poco/Util/AbstractConfiguration.h>
 
@@ -37,6 +38,8 @@ class IClassifier : private boost::noncopyable
     /// Returns ResourceLink that should be used to access resource.
     /// Returned link is valid until classifier destruction.
     virtual ResourceLink get(const String & resource_name) = 0;
+    /// Returns settings that should be used to limit workload on given resource.
+    virtual WorkloadSettings getWorkloadSettings(const String & resource_name) const = 0;
 };
 
 using ClassifierPtr = std::shared_ptr<IClassifier>;
diff --git a/src/Common/Scheduler/Nodes/CustomResourceManager.cpp b/src/Common/Scheduler/Nodes/CustomResourceManager.cpp
index 244a83374f8b..33f5d43de9e6 100644
--- a/src/Common/Scheduler/Nodes/CustomResourceManager.cpp
+++ b/src/Common/Scheduler/Nodes/CustomResourceManager.cpp
@@ -177,6 +177,12 @@ ResourceLink CustomResourceManager::Classifier::get(const String & resource_name
         return ResourceLink{}; // unlimited access
 }
 
+WorkloadSettings CustomResourceManager::Classifier::getWorkloadSettings(const String & resource_name) const
+{
+    UNUSED(resource_name);
+    return {};
+}
+
 CustomResourceManager::CustomResourceManager()
     : state(new State())
 {
diff --git a/src/Common/Scheduler/Nodes/CustomResourceManager.h b/src/Common/Scheduler/Nodes/CustomResourceManager.h
index e7559aaa77f5..a232aa32c60d 100644
--- a/src/Common/Scheduler/Nodes/CustomResourceManager.h
+++ b/src/Common/Scheduler/Nodes/CustomResourceManager.h
@@ -84,6 +84,7 @@ class CustomResourceManager : public IResourceManager
         Classifier(const ClassifierSettings & settings_, const StatePtr & state_, const String & classifier_name);
         bool has(const String & resource_name) override;
         ResourceLink get(const String & resource_name) override;
+        WorkloadSettings getWorkloadSettings(const String & resource_name) const override;
     private:
         const ClassifierSettings settings;
         std::unordered_map<String, ResourceLink> resources; // accessible resources by names
diff --git a/src/Common/Scheduler/Nodes/WorkloadResourceManager.cpp b/src/Common/Scheduler/Nodes/WorkloadResourceManager.cpp
index 986125503ef8..43aa4bb8720f 100644
--- a/src/Common/Scheduler/Nodes/WorkloadResourceManager.cpp
+++ b/src/Common/Scheduler/Nodes/WorkloadResourceManager.cpp
@@ -423,11 +423,32 @@ ResourceLink WorkloadResourceManager::Classifier::get(const String & resource_na
     }
 }
 
-void WorkloadResourceManager::Classifier::attach(const ResourcePtr & resource, const VersionPtr & version, ResourceLink link)
+WorkloadSettings WorkloadResourceManager::Classifier::getWorkloadSettings(const String & resource_name) const
+{
+    std::unique_lock lock{mutex};
+    auto iter = attachments.find(resource_name);
+    if (iter != attachments.end())
+    {
+        // Extract settings from the attached resource
+        return iter->second.settings;
+    }
+    return {};
+}
+
+void WorkloadResourceManager::Classifier::attach(const ResourcePtr & resource, const VersionPtr & version, UnifiedSchedulerNode * node)
 {
     std::unique_lock lock{mutex};
     chassert(!attachments.contains(resource->getName()));
-    attachments[resource->getName()] = Attachment{.resource = resource, .version = version, .link = link};
+    ResourceLink link;
+    WorkloadSettings wl_settings{};
+    if (node)
+    {
+        auto queue = node->getQueue();
+        if (queue)
+            link = ResourceLink{.queue = queue.get()};
+        wl_settings = node->getSettings();
+    }
+    attachments[resource->getName()] = Attachment{.resource = resource, .version = version, .link = link, .settings = wl_settings};
 }
 
 void WorkloadResourceManager::Resource::updateResource(const ASTPtr & new_resource_entity)
@@ -447,11 +468,12 @@ std::future<void> WorkloadResourceManager::Resource::attachClassifier(Classifier
         {
             if (auto iter = node_for_workload.find(workload_name); iter != node_for_workload.end())
             {
+                auto nodePtr = iter->second;
                 auto queue = iter->second->getQueue();
                 if (!queue)
                     throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Unable to use workload '{}' that have children for resource '{}'",
                         workload_name, resource_name);
-                classifier.attach(shared_from_this(), current_version, ResourceLink{.queue = queue.get()});
+                classifier.attach(shared_from_this(), current_version, nodePtr.get());
             }
             else
             {
diff --git a/src/Common/Scheduler/Nodes/WorkloadResourceManager.h b/src/Common/Scheduler/Nodes/WorkloadResourceManager.h
index def320eb37af..aea9d435c896 100644
--- a/src/Common/Scheduler/Nodes/WorkloadResourceManager.h
+++ b/src/Common/Scheduler/Nodes/WorkloadResourceManager.h
@@ -241,21 +241,23 @@ class WorkloadResourceManager : public IResourceManager
         /// NOTE: It is called from query threads (possibly multiple)
         bool has(const String & resource_name) override;
         ResourceLink get(const String & resource_name) override;
+        WorkloadSettings getWorkloadSettings(const String & resource_name) const override;
 
         /// Attaches/detaches a specific resource
         /// NOTE: It is called from scheduler threads (possibly multiple)
-        void attach(const ResourcePtr & resource, const VersionPtr & version, ResourceLink link);
+        void attach(const ResourcePtr & resource, const VersionPtr & version, UnifiedSchedulerNode * node);
         void detach(const ResourcePtr & resource);
 
     private:
         const ClassifierSettings settings;
         WorkloadResourceManager * resource_manager;
-        std::mutex mutex;
+        mutable std::mutex mutex;
         struct Attachment
         {
             ResourcePtr resource;
             VersionPtr version;
             ResourceLink link;
+            WorkloadSettings settings;
         };
         std::unordered_map<String, Attachment> attachments; // TSA_GUARDED_BY(mutex);
     };
diff --git a/src/Common/Scheduler/Nodes/tests/gtest_workload_resource_manager.cpp b/src/Common/Scheduler/Nodes/tests/gtest_workload_resource_manager.cpp
index e4d6164d34d5..3b3969340fe6 100644
--- a/src/Common/Scheduler/Nodes/tests/gtest_workload_resource_manager.cpp
+++ b/src/Common/Scheduler/Nodes/tests/gtest_workload_resource_manager.cpp
@@ -18,6 +18,7 @@
 #include <Common/Scheduler/Nodes/tests/ResourceTest.h>
 #include <Common/Scheduler/Workload/WorkloadEntityStorageBase.h>
 #include <Common/Scheduler/Nodes/WorkloadResourceManager.h>
+#include <Common/getNumberOfCPUCoresToUse.h>
 
 #include <base/scope_guard.h>
 
@@ -1268,6 +1269,23 @@ TEST(SchedulerWorkloadResourceManager, CPUSchedulingIndependentPools)
     t.wait();
 }
 
+TEST(SchedulerWorkloadResourceManager, MaxCPUsDerivedFromShare)
+{
+    ResourceTest t;
+
+    t.query("CREATE RESOURCE cpu (MASTER THREAD, WORKER THREAD)");
+    // Only max_cpu_share is set, max_cpus is unset
+    t.query("CREATE WORKLOAD all SETTINGS max_cpu_share = 0.5");
+    ClassifierPtr c = t.manager->acquire("all");
+
+    // The expected hard cap is max_cpu_share * getNumberOfCPUCoresToUse()
+    WorkloadSettings settings = c->getWorkloadSettings("cpu");
+    double expected_cap = 0.5 * getNumberOfCPUCoresToUse();
+    double actual_cap = settings.max_cpus;
+
+    EXPECT_DOUBLE_EQ(actual_cap, expected_cap);
+}
+
 auto getAcquired()
 {
     return CurrentMetrics::get(CurrentMetrics::ConcurrencyControlAcquired);
diff --git a/src/Common/Scheduler/WorkloadSettings.cpp b/src/Common/Scheduler/WorkloadSettings.cpp
index f5b9d98786d6..bba7975f078c 100644
--- a/src/Common/Scheduler/WorkloadSettings.cpp
+++ b/src/Common/Scheduler/WorkloadSettings.cpp
@@ -232,7 +232,7 @@ void WorkloadSettings::initFromChanges(CostUnit unit_, const ASTCreateWorkloadQu
         if (share_limit > 0)
         {
             Float64 value = share_limit * getNumberOfCPUCoresToUse();
-            if (value > 0 && value < limit)
+            if (value > 0 && (limit == 0 || value < limit))
                 limit = value;
         }
         max_cpus = limit;
diff --git a/src/Common/Scheduler/createResourceManager.cpp b/src/Common/Scheduler/createResourceManager.cpp
index c5a430024b6d..500aed901f80 100644
--- a/src/Common/Scheduler/createResourceManager.cpp
+++ b/src/Common/Scheduler/createResourceManager.cpp
@@ -55,6 +55,16 @@ class ResourceManagerDispatcher : public IResourceManager
                 return ResourceLink{};
         }
 
+        WorkloadSettings getWorkloadSettings(const String & resource_name) const override
+        {
+            for (const auto & classifier : classifiers)
+            {
+                if (classifier->has(resource_name))
+                    return classifier->getWorkloadSettings(resource_name);
+            }
+            return {};
+        }
+
     private:
         const ClassifierSettings settings;
         std::vector<ClassifierPtr> classifiers; // should be constant after initialization to avoid races

From 876b9615234cedc0a2245cf4329c151a7f7b7c2b Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 14 Oct 2025 06:15:04 +0000
Subject: [PATCH 018/112] Backport #87660 to 25.8: backups: use cloned storage
 client with overridden retry policy settings for native copy

---
 src/Backups/BackupIO_S3.cpp                   | 64 +++++++++++++++-
 src/Backups/BackupIO_S3.h                     | 25 +++++++
 src/Core/Settings.cpp                         |  7 +-
 src/Databases/DataLake/GlueCatalog.cpp        |  3 +-
 src/Disks/ObjectStorages/S3/diskSettings.cpp  |  7 +-
 src/IO/S3/Client.cpp                          | 56 ++++++++------
 src/IO/S3/Client.h                            |  3 +
 src/IO/S3/copyS3File.cpp                      |  2 +-
 src/IO/S3/copyS3File.h                        |  2 +-
 .../test_backup_restore_s3/test.py            | 75 +++++++++++++++++--
 10 files changed, 199 insertions(+), 45 deletions(-)

diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index 1738ca4607f4..51cf22a12eee 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -37,7 +37,7 @@ namespace Setting
     extern const SettingsUInt64 s3_max_connections;
     extern const SettingsUInt64 s3_max_redirects;
     extern const SettingsBool s3_slow_all_threads_after_network_error;
-    extern const SettingsBool s3_slow_all_threads_after_retryable_error;
+    extern const SettingsBool backup_slow_all_threads_after_retryable_s3_error;
 }
 
 namespace S3AuthSetting
@@ -76,6 +76,36 @@ namespace ErrorCodes
 
 namespace
 {
+class S3BackupClientCreator
+{
+public:
+    explicit S3BackupClientCreator(const ContextPtr & context)
+    {
+        const Settings & local_settings = context->getSettingsRef();
+        retry_strategy = S3::PocoHTTPClientConfiguration::RetryStrategy{
+            .max_retries = static_cast<unsigned>(local_settings[Setting::backup_restore_s3_retry_attempts]),
+            .initial_delay_ms = static_cast<unsigned>(local_settings[Setting::backup_restore_s3_retry_initial_backoff_ms]),
+            .max_delay_ms = static_cast<unsigned>(local_settings[Setting::backup_restore_s3_retry_max_backoff_ms]),
+            .jitter_factor = local_settings[Setting::backup_restore_s3_retry_jitter_factor]};
+        slow_all_threads_after_retryable_error = local_settings[Setting::backup_slow_all_threads_after_retryable_s3_error];
+    }
+
+    S3BackupDiskClientFactory::Entry operator()(DiskPtr disk) const
+    {
+        auto disk_client = disk->getS3StorageClient();
+
+        auto config = disk_client->getClientConfiguration();
+        config.retry_strategy = retry_strategy;
+        config.s3_slow_all_threads_after_retryable_error = slow_all_threads_after_retryable_error;
+
+        return {disk_client->cloneWithConfigurationOverride(config), disk_client};
+    }
+
+private:
+    S3::PocoHTTPClientConfiguration::RetryStrategy retry_strategy;
+    bool slow_all_threads_after_retryable_error = false;
+};
+
     std::shared_ptr<S3::Client> makeS3Client(
         const S3::URI & s3_uri,
         const String & access_key_id,
@@ -114,7 +144,7 @@ namespace
                 .jitter_factor = local_settings[Setting::backup_restore_s3_retry_jitter_factor]},
 
             local_settings[Setting::s3_slow_all_threads_after_network_error],
-            local_settings[Setting::s3_slow_all_threads_after_retryable_error],
+            local_settings[Setting::backup_slow_all_threads_after_retryable_s3_error],
             local_settings[Setting::enable_s3_requests_logging],
             /* for_disk_s3 = */ false,
             /* opt_disk_name = */ {},
@@ -180,6 +210,31 @@ namespace
 }
 
 
+S3BackupDiskClientFactory::S3BackupDiskClientFactory(const S3BackupDiskClientFactory::CreateFn & create_fn_)
+    : create_fn(create_fn_)
+{
+}
+
+std::shared_ptr<S3::Client> S3BackupDiskClientFactory::getOrCreate(DiskPtr disk)
+{
+    std::lock_guard lock(clients_mutex);
+
+    auto [it, inserted] = clients.try_emplace(disk->getName(), Entry{});
+    auto log = getLogger("S3BackupDiskClientFactory");
+    auto & entry = it->second;
+    if (inserted)
+        LOG_TRACE(log, "Creating S3 client for copy from disk '{}' to backup bucket", disk->getName());
+    else if (const_pointer_cast<const S3::Client>(entry.disk_reported_client.lock()) != disk->getS3StorageClient())
+        LOG_INFO(
+            log, "Updating S3 client for copy from disk '{}' to the backup bucket because the disk client was updated", disk->getName());
+
+    while (const_pointer_cast<const S3::Client>(entry.disk_reported_client.lock()) != disk->getS3StorageClient())
+        entry = create_fn(disk);
+
+    chassert(entry.backup_client);
+    return entry.backup_client;
+}
+
 BackupReaderS3::BackupReaderS3(
     const S3::URI & s3_uri_,
     const String & access_key_id_,
@@ -278,7 +333,6 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
     BackupReaderDefault::copyFileToDisk(path_in_backup, file_size, encrypted_in_backup, destination_disk, destination_path, write_mode);
 }
 
-
 BackupWriterS3::BackupWriterS3(
     const S3::URI & s3_uri_,
     const String & access_key_id_,
@@ -295,6 +349,7 @@ BackupWriterS3::BackupWriterS3(
     , s3_uri(s3_uri_)
     , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false, ""}
     , s3_capabilities(getCapabilitiesFromConfig(context_->getConfigRef(), "s3"))
+    , disk_client_factory(S3BackupClientCreator(context_))
 {
     s3_settings.loadFromConfig(context_->getConfigRef(), "s3", context_->getSettingsRef());
 
@@ -331,8 +386,9 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
         if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2)
         {
             LOG_TRACE(log, "Copying file {} from disk {} to S3", src_path, src_disk->getName());
+            /// Use storage client with overridden retry strategy settings.
             copyS3File(
-                src_disk->getS3StorageClient(),
+                /* src_s3_client */ disk_client_factory.getOrCreate(src_disk),
                 /* src_bucket */ blob_path[1],
                 /* src_key= */ blob_path[0],
                 start_pos,
diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h
index 6cedf04a67af..c8a3575243e7 100644
--- a/src/Backups/BackupIO_S3.h
+++ b/src/Backups/BackupIO_S3.h
@@ -6,15 +6,39 @@
 #include <Backups/BackupIO_Default.h>
 #include <Common/Logger.h>
 #include <Disks/DiskType.h>
+#include <Disks/IDisk.h>
 #include <IO/S3Common.h>
 #include <IO/S3Settings.h>
 #include <Interpreters/Context_fwd.h>
 #include <IO/S3/BlobStorageLogWriter.h>
 #include <IO/S3/S3Capabilities.h>
 
+#include <functional>
+
+
 namespace DB
 {
 
+class S3BackupDiskClientFactory
+{
+public:
+    struct Entry
+    {
+        std::shared_ptr<S3::Client> backup_client;
+        std::weak_ptr<const S3::Client> disk_reported_client;
+    };
+    using CreateFn = std::function<Entry(DiskPtr)>;
+    explicit S3BackupDiskClientFactory(const CreateFn & create_fn_);
+    std::shared_ptr<S3::Client> getOrCreate(DiskPtr disk);
+
+private:
+    const CreateFn create_fn;
+
+    mutable std::mutex clients_mutex;
+    /// Disk name to client entry;
+    std::unordered_map<std::string, Entry> clients TSA_GUARDED_BY(clients_mutex);
+};
+
 /// Represents a backup stored to AWS S3.
 class BackupReaderS3 : public BackupReaderDefault
 {
@@ -87,6 +111,7 @@ class BackupWriterS3 : public BackupWriterDefault
     S3Settings s3_settings;
     std::shared_ptr<S3::Client> client;
     S3Capabilities s3_capabilities;
+    S3BackupDiskClientFactory disk_client_factory;
     BlobStorageLogWriterPtr blob_storage_log;
 };
 
diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp
index 497760a1325d..903d984dee49 100644
--- a/src/Core/Settings.cpp
+++ b/src/Core/Settings.cpp
@@ -467,11 +467,11 @@ When set to `true`, all threads executing S3 requests to the same backup endpoin
 after any single s3 request encounters a retryable network error, such as socket timeout.
 When set to `false`, each thread handles S3 request backoff independently of the others.
 )", 0) \
-    DECLARE_WITH_ALIAS(Bool, s3_slow_all_threads_after_retryable_error, false, R"(
-When set to `true`, all threads executing S3 requests to the same endpoint are slowed down
+    DECLARE(Bool, backup_slow_all_threads_after_retryable_s3_error, false, R"(
+When set to `true`, all threads executing S3 requests to the same backup endpoint are slowed down
 after any single S3 request encounters a retryable S3 error, such as 'Slow Down'.
 When set to `false`, each thread handles s3 request backoff independently of the others.
-)", 0, backup_slow_all_threads_after_retryable_s3_error) \
+)", 0) \
     DECLARE(UInt64, azure_list_object_keys_size, 1000, R"(
 Maximum number of files that could be returned in batch by ListObject request
 )", 0) \
@@ -7091,6 +7091,7 @@ Sets the evaluation time to be used with promql dialect. 'auto' means the curren
     MAKE_OBSOLETE(M, Bool, enable_variant_type, true) \
     MAKE_OBSOLETE(M, Bool, enable_dynamic_type, true) \
     MAKE_OBSOLETE(M, Bool, enable_json_type, true) \
+    MAKE_OBSOLETE(M, Bool, s3_slow_all_threads_after_retryable_error, false) \
     \
     /* moved to config.xml: see also src/Core/ServerSettings.h */ \
     MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_buffer_flush_schedule_pool_size, 16) \
diff --git a/src/Databases/DataLake/GlueCatalog.cpp b/src/Databases/DataLake/GlueCatalog.cpp
index 0c95669f3d19..c0233f362ec7 100644
--- a/src/Databases/DataLake/GlueCatalog.cpp
+++ b/src/Databases/DataLake/GlueCatalog.cpp
@@ -61,7 +61,6 @@ namespace DB::Setting
     extern const SettingsUInt64 s3_max_redirects;
     extern const SettingsUInt64 s3_retry_attempts;
     extern const SettingsBool s3_slow_all_threads_after_network_error;
-    extern const SettingsBool s3_slow_all_threads_after_retryable_error;
     extern const SettingsBool enable_s3_requests_logging;
     extern const SettingsUInt64 s3_connect_timeout_ms;
     extern const SettingsUInt64 s3_request_timeout_ms;
@@ -111,7 +110,7 @@ GlueCatalog::GlueCatalog(
     int s3_max_redirects = static_cast<int>(global_settings[DB::Setting::s3_max_redirects]);
     int s3_retry_attempts = static_cast<int>(global_settings[DB::Setting::s3_retry_attempts]);
     bool s3_slow_all_threads_after_network_error = global_settings[DB::Setting::s3_slow_all_threads_after_network_error];
-    bool s3_slow_all_threads_after_retryable_error = global_settings[DB::Setting::s3_slow_all_threads_after_retryable_error];
+    bool s3_slow_all_threads_after_retryable_error = false;
     bool enable_s3_requests_logging = global_settings[DB::Setting::enable_s3_requests_logging];
 
     DB::S3::PocoHTTPClientConfiguration poco_config = DB::S3::ClientFactory::instance().createClientConfiguration(
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index cf653ffe561d..e9c4fe2df80a 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -32,7 +32,6 @@ namespace Setting
     extern const SettingsUInt64 s3_max_redirects;
     extern const SettingsUInt64 s3_retry_attempts;
     extern const SettingsBool s3_slow_all_threads_after_network_error;
-    extern const SettingsBool s3_slow_all_threads_after_retryable_error;
 }
 
 namespace S3AuthSetting
@@ -111,10 +110,6 @@ getClient(const S3::URI & url, const S3Settings & settings, ContextPtr context,
     if (!for_disk_s3 && local_settings.isChanged("s3_slow_all_threads_after_network_error"))
         s3_slow_all_threads_after_network_error = static_cast<int>(local_settings[Setting::s3_slow_all_threads_after_network_error]);
 
-    bool s3_slow_all_threads_after_retryable_error = static_cast<int>(global_settings[Setting::s3_slow_all_threads_after_retryable_error]);
-    if (!for_disk_s3 && local_settings.isChanged("s3_slow_all_threads_after_retryable_error"))
-        s3_slow_all_threads_after_retryable_error = static_cast<int>(local_settings[Setting::s3_slow_all_threads_after_retryable_error]);
-
     bool enable_s3_requests_logging = global_settings[Setting::enable_s3_requests_logging];
     if (!for_disk_s3 && local_settings.isChanged("enable_s3_requests_logging"))
         enable_s3_requests_logging = local_settings[Setting::enable_s3_requests_logging];
@@ -125,7 +120,7 @@ getClient(const S3::URI & url, const S3Settings & settings, ContextPtr context,
         s3_max_redirects,
         S3::PocoHTTPClientConfiguration::RetryStrategy{.max_retries = static_cast<unsigned>(s3_retry_attempts)},
         s3_slow_all_threads_after_network_error,
-        s3_slow_all_threads_after_retryable_error,
+        /* s3_slow_all_threads_after_retryable_error = */ false,
         enable_s3_requests_logging,
         for_disk_s3,
         opt_disk_name,
diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp
index 28bc4d52f45f..59a8d337f16f 100644
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@@ -225,7 +225,12 @@ std::unique_ptr<Client> Client::create(
 
 std::unique_ptr<Client> Client::clone() const
 {
-    return std::unique_ptr<Client>(new Client(*this, client_configuration));
+    return cloneWithConfigurationOverride(this->client_configuration);
+}
+
+std::unique_ptr<Client> Client::cloneWithConfigurationOverride(const PocoHTTPClientConfiguration & client_configuration_override) const
+{
+    return std::unique_ptr<Client>(new Client(*this, client_configuration_override));
 }
 
 namespace
@@ -286,27 +291,7 @@ Client::Client(
 
     LOG_TRACE(log, "API mode of the S3 client: {}", api_mode);
 
-    if (client_configuration.for_disk_s3)
-    {
-        LOG_TRACE(
-            log,
-            "S3 client for disk '{}' initialized with s3_retry_attempts: {}",
-            client_configuration.opt_disk_name.value_or(""),
-            client_configuration.retry_strategy.max_retries);
-        LOG_TRACE(
-            log,
-            "S3 client for disk '{}': slowing down threads on retryable errors is {}",
-            client_configuration.opt_disk_name.value_or(""),
-            client_configuration.s3_slow_all_threads_after_retryable_error ? "enabled" : "disabled");
-    }
-    else
-    {
-        LOG_TRACE(log, "S3 client initialized with s3_retry_attempts: {}", client_configuration.retry_strategy.max_retries);
-        LOG_TRACE(
-            log,
-            "S3 client: slowing down threads on retryable errors is {}",
-            client_configuration.s3_slow_all_threads_after_retryable_error ? "enabled" : "disabled");
-    }
+    logConfiguration();
 
     detect_region = provider_type == ProviderType::AWS && explicit_region == Aws::Region::AWS_GLOBAL;
 
@@ -335,6 +320,8 @@ Client::Client(
     cache = std::make_shared<ClientCache>(*other.cache);
     ClientCacheRegistry::instance().registerClient(cache);
 
+    logConfiguration();
+
     ProfileEvents::increment(ProfileEvents::TinyS3Clients);
 }
 
@@ -891,6 +878,31 @@ void Client::slowDownAfterRetryableError() const
     }
 }
 
+void Client::logConfiguration() const
+{
+    if (client_configuration.for_disk_s3)
+    {
+        LOG_TRACE(
+            log,
+            "S3 client for disk '{}' initialized with s3_retry_attempts: {}",
+            client_configuration.opt_disk_name.value_or(""),
+            client_configuration.retry_strategy.max_retries);
+        LOG_TRACE(
+            log,
+            "S3 client for disk '{}': slowing down threads on retryable errors is {}",
+            client_configuration.opt_disk_name.value_or(""),
+            client_configuration.s3_slow_all_threads_after_retryable_error ? "enabled" : "disabled");
+    }
+    else
+    {
+        LOG_TRACE(log, "S3 client initialized with s3_retry_attempts: {}", client_configuration.retry_strategy.max_retries);
+        LOG_TRACE(
+            log,
+            "S3 client: slowing down threads on retryable errors is {}",
+            client_configuration.s3_slow_all_threads_after_retryable_error ? "enabled" : "disabled");
+    }
+}
+
 bool Client::supportsMultiPartCopy() const
 {
     return provider_type != ProviderType::GCS;
diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h
index 48c26124e306..14bf90d854c2 100644
--- a/src/IO/S3/Client.h
+++ b/src/IO/S3/Client.h
@@ -132,6 +132,8 @@ class Client : private Aws::S3::S3Client
 
     std::unique_ptr<Client> clone() const;
 
+    std::unique_ptr<Client> cloneWithConfigurationOverride(const PocoHTTPClientConfiguration & client_configuration_override) const;
+
     Client & operator=(const Client &) = delete;
 
     Client(Client && other) = delete;
@@ -301,6 +303,7 @@ class Client : private Aws::S3::S3Client
     void updateNextTimeToRetryAfterRetryableError(Aws::Client::AWSError<Aws::Client::CoreErrors> error, Int64 attempt_no) const;
     void slowDownAfterRetryableError() const;
 
+    void logConfiguration() const;
     String initial_endpoint;
     std::shared_ptr<Aws::Auth::AWSCredentialsProvider> credentials_provider;
     PocoHTTPClientConfiguration client_configuration;
diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp
index f4c41e179142..25c1c3126e41 100644
--- a/src/IO/S3/copyS3File.cpp
+++ b/src/IO/S3/copyS3File.cpp
@@ -896,7 +896,7 @@ void copyDataToS3File(
 
 
 void copyS3File(
-    const std::shared_ptr<const S3::Client> & src_s3_client,
+    std::shared_ptr<const S3::Client> src_s3_client,
     const String & src_bucket,
     const String & src_key,
     size_t src_offset,
diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h
index 760498b6fa7a..994f8a7b1d84 100644
--- a/src/IO/S3/copyS3File.h
+++ b/src/IO/S3/copyS3File.h
@@ -31,7 +31,7 @@ using CreateReadBuffer = std::function<std::unique_ptr<SeekableReadBuffer>()>;
 ///
 /// read_settings - is used for throttling in case of native copy is not possible
 void copyS3File(
-    const std::shared_ptr<const S3::Client> & src_s3_client,
+    std::shared_ptr<const S3::Client> src_s3_client,
     const String & src_bucket,
     const String & src_key,
     size_t src_offset,
diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py
index 374d8acd868c..6486fa9ae61d 100644
--- a/tests/integration/test_backup_restore_s3/test.py
+++ b/tests/integration/test_backup_restore_s3/test.py
@@ -4,6 +4,7 @@
 
 import pytest
 
+from ast import literal_eval
 from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import TSV
 from helpers.config_cluster import minio_secret_key
@@ -384,8 +385,16 @@ def test_backup_to_s3_multipart():
         size=1000000,
     )
     node = cluster.instances["node"]
-    assert node.contains_in_log(
-        f"copyDataToS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}"
+
+    node.query("SYSTEM FLUSH LOGS")
+    pattern = f"Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}"
+    assert (
+        int(
+            node.query(
+                f"SELECT count() FROM system.text_log WHERE logger_name='copyDataToS3File' AND message like '{pattern}%'",
+            )
+        )
+        > 0
     )
 
     backup_query_id = backup_events["query_id"]
@@ -465,9 +474,56 @@ def test_backup_to_s3_native_copy(storage_policy):
     # single part upload
     assert backup_events["S3CopyObject"] > 0
     assert restore_events["S3CopyObject"] > 0
+
+    node.query("SYSTEM FLUSH LOGS")
+    pattern = f"Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}"
+    assert (
+        int(
+            node.query(
+                f"SELECT count() FROM system.text_log WHERE logger_name='copyS3File' AND message like '{pattern}%'",
+            )
+        )
+        > 0
+    )
+
+
+@pytest.mark.parametrize(
+    "storage_policy",
+    [
+        "policy_s3",
+        "policy_s3_other_bucket",
+        "policy_s3_plain_rewritable",
+    ],
+)
+def test_backup_to_s3_native_copy_slow_down_all_threads(storage_policy):
+    backup_name = new_backup_name()
+    backup_destination = f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', '{minio_secret_key}')"
+    (backup_events, restore_events) = check_backup_and_restore(
+        cluster,
+        storage_policy,
+        backup_destination,
+        backup_settings={"backup_slow_all_threads_after_retryable_s3_error": True},
+    )
+    # single part upload
+    assert backup_events["S3CopyObject"] > 0
+    assert restore_events["S3CopyObject"] > 0
     node = cluster.instances["node"]
-    assert node.contains_in_log(
-        f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}"
+
+    disks = literal_eval(
+        node.query(
+            f"SELECT disks FROM system.storage_policies WHERE policy_name='{storage_policy}'"
+        )
+    )
+    assert len(disks) == 1
+    node.query("SYSTEM FLUSH LOGS")
+    pattern = f"S3 client for disk \\'{disks[0]}\\': slowing down threads on retryable errors is enabled"
+    assert (
+        int(
+            node.query(
+                f"SELECT count() FROM system.text_log WHERE logger_name='S3Client' AND message LIKE '%{pattern}%'",
+            )
+        )
+        > 0
     )
 
 
@@ -481,9 +537,16 @@ def test_backup_to_s3_native_copy_multipart():
     # multi part upload
     assert backup_events["S3CreateMultipartUpload"] > 0
     assert restore_events["S3CreateMultipartUpload"] > 0
+
     node = cluster.instances["node"]
-    assert node.contains_in_log(
-        f"copyS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}/"
+    pattern = f"Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}/"
+    assert (
+        int(
+            node.query(
+                f"SELECT count() FROM system.text_log WHERE logger_name='copyS3File' AND message like '{pattern}%'",
+            )
+        )
+        > 0
     )
 
 

From 9364aef4a82fef503730bbeaa0ee53c760ee2531 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 14 Oct 2025 11:11:48 +0000
Subject: [PATCH 019/112] Backport #87798 to 25.8: Fix reading null map
 subcolumn from Variants that cannot be inside Nullable

---
 src/DataTypes/DataTypeDynamic.cpp             |   2 +
 src/DataTypes/DataTypeVariant.cpp             |   2 +-
 .../Serializations/SerializationVariant.cpp   | 104 +--
 .../Serializations/SerializationVariant.h     |  12 +-
 ...03201_variant_null_map_subcolumn.reference | 642 +++++++++---------
 .../03201_variant_null_map_subcolumn.sh       |  12 +-
 .../03202_dynamic_null_map_subcolumn.sql.j2   |  10 +-
 ...variant_array_null_map_subcolumn.reference |   1 +
 ...03640_variant_array_null_map_subcolumn.sql |   6 +
 9 files changed, 405 insertions(+), 386 deletions(-)
 create mode 100644 tests/queries/0_stateless/03640_variant_array_null_map_subcolumn.reference
 create mode 100644 tests/queries/0_stateless/03640_variant_array_null_map_subcolumn.sql

diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp
index ea4a362f8d6a..186a806d5171 100644
--- a/src/DataTypes/DataTypeDynamic.cpp
+++ b/src/DataTypes/DataTypeDynamic.cpp
@@ -215,6 +215,8 @@ std::unique_ptr<IDataType::SubstreamData> DataTypeDynamic::getDynamicSubcolumnDa
     bool is_null_map_subcolumn = subcolumn_nested_name == "null";
     if (is_null_map_subcolumn)
     {
+        if (!subcolumn_type->canBeInsideNullable())
+            return nullptr;
         res->type = std::make_shared<DataTypeUInt8>();
     }
     else if (!subcolumn_nested_name.empty())
diff --git a/src/DataTypes/DataTypeVariant.cpp b/src/DataTypes/DataTypeVariant.cpp
index fe58b5e0f9f1..8c176fb5929b 100644
--- a/src/DataTypes/DataTypeVariant.cpp
+++ b/src/DataTypes/DataTypeVariant.cpp
@@ -182,7 +182,7 @@ SerializationPtr DataTypeVariant::doGetDefaultSerialization() const
         variant_names.push_back(variant->getName());
     }
 
-    return std::make_shared<SerializationVariant>(std::move(serializations), std::move(variant_names), SerializationVariant::getVariantsDeserializeTextOrder(variants), getName());
+    return std::make_shared<SerializationVariant>(variants, getName());
 }
 
 void DataTypeVariant::forEachChild(const DB::IDataType::ChildCallback & callback) const
diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp
index 143b7a14fa9d..7471cdf10ca8 100644
--- a/src/DataTypes/Serializations/SerializationVariant.cpp
+++ b/src/DataTypes/Serializations/SerializationVariant.cpp
@@ -57,6 +57,19 @@ struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBin
     }
 };
 
+SerializationVariant::SerializationVariant(const DataTypes & variant_types_, const String & variant_name_) : variant_types(variant_types_), deserialize_text_order(getVariantsDeserializeTextOrder(variant_types_)), variant_name(variant_name_)
+{
+    variant_serializations.reserve(variant_serializations.size());
+    variant_names.reserve(variant_serializations.size());
+
+    for (const auto & variant : variant_types)
+    {
+        variant_serializations.push_back(variant->getDefaultSerialization());
+        variant_names.push_back(variant->getName());
+    }
+}
+
+
 void SerializationVariant::enumerateStreams(
     EnumerateStreamsSettings & settings,
     const StreamCallback & callback,
@@ -89,7 +102,7 @@ void SerializationVariant::enumerateStreams(
     settings.path.push_back(Substream::VariantElements);
     settings.path.back().data = data;
 
-    for (size_t i = 0; i < variants.size(); ++i)
+    for (size_t i = 0; i < variant_serializations.size(); ++i)
     {
         DataTypePtr type = type_variant ? type_variant->getVariant(i) : nullptr;
         settings.path.back().creator = std::make_shared<SerializationVariantElement::VariantSubcolumnCreator>(
@@ -99,7 +112,7 @@ void SerializationVariant::enumerateStreams(
             column_variant ? column_variant->localDiscriminatorByGlobal(i) : i,
             !type || type->canBeInsideNullable() || type->lowCardinality());
 
-        auto variant_data = SubstreamData(variants[i])
+        auto variant_data = SubstreamData(variant_serializations[i])
                              .withType(type)
                              .withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr)
                              .withSerializationInfo(data.serialization_info)
@@ -107,7 +120,7 @@ void SerializationVariant::enumerateStreams(
 
         addVariantElementToPath(settings.path, i);
         settings.path.back().data = variant_data;
-        variants[i]->enumerateStreams(settings, callback, variant_data);
+        variant_serializations[i]->enumerateStreams(settings, callback, variant_data);
         settings.path.pop_back();
     }
 
@@ -119,8 +132,11 @@ void SerializationVariant::enumerateStreams(
                              .withType(type_variant ? std::make_shared<DataTypeUInt8>() : nullptr)
                              .withColumn(column_variant ? ColumnUInt8::create() : nullptr);
 
-    for (size_t i = 0; i < variants.size(); ++i)
+    for (size_t i = 0; i < variant_serializations.size(); ++i)
     {
+        if (!variant_types[i]->canBeInsideNullable())
+            continue;
+
         settings.path.back().creator = std::make_shared<SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator>(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i);
         settings.path.push_back(Substream::VariantElementNullMap);
         settings.path.back().variant_element_name = variant_names[i];
@@ -149,14 +165,14 @@ void SerializationVariant::serializeBinaryBulkStatePrefix(
 
     const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
     auto variant_state = std::make_shared<SerializeBinaryBulkStateVariant>(mode);
-    variant_state->variant_states.resize(variants.size());
+    variant_state->variant_states.resize(variant_serializations.size());
 
     settings.path.push_back(Substream::VariantElements);
 
-    for (size_t i = 0; i < variants.size(); ++i)
+    for (size_t i = 0; i < variant_serializations.size(); ++i)
     {
         addVariantElementToPath(settings.path, i);
-        variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->variant_states[i]);
+        variant_serializations[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->variant_states[i]);
         settings.path.pop_back();
     }
 
@@ -172,10 +188,10 @@ void SerializationVariant::serializeBinaryBulkStateSuffix(
     auto * variant_state = checkAndGetState<SerializeBinaryBulkStateVariant>(state);
 
     settings.path.push_back(Substream::VariantElements);
-    for (size_t i = 0; i < variants.size(); ++i)
+    for (size_t i = 0; i < variant_serializations.size(); ++i)
     {
         addVariantElementToPath(settings.path, i);
-        variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->variant_states[i]);
+        variant_serializations[i]->serializeBinaryBulkStateSuffix(settings, variant_state->variant_states[i]);
         settings.path.pop_back();
     }
     settings.path.pop_back();
@@ -193,13 +209,13 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix(
 
     auto variant_state = std::make_shared<DeserializeBinaryBulkStateVariant>();
     variant_state->discriminators_state = discriminators_state;
-    variant_state->variant_states.resize(variants.size());
+    variant_state->variant_states.resize(variant_serializations.size());
 
     settings.path.push_back(Substream::VariantElements);
-    for (size_t i = 0; i < variants.size(); ++i)
+    for (size_t i = 0; i < variant_serializations.size(); ++i)
     {
         addVariantElementToPath(settings.path, i);
-        variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->variant_states[i], cache);
+        variant_serializations[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->variant_states[i], cache);
         settings.path.pop_back();
     }
 
@@ -260,10 +276,10 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
     if (limit == 0)
     {
         settings.path.push_back(Substream::VariantElements);
-        for (size_t i = 0; i != variants.size(); ++i)
+        for (size_t i = 0; i != variant_serializations.size(); ++i)
         {
             addVariantElementToPath(settings.path, i);
-            variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), col.getVariantByGlobalDiscriminator(i).size(), 0, settings, variant_state->variant_states[i]);
+            variant_serializations[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), col.getVariantByGlobalDiscriminator(i).size(), 0, settings, variant_state->variant_states[i]);
             settings.path.pop_back();
         }
         settings.path.pop_back();
@@ -294,14 +310,14 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
         }
 
         settings.path.push_back(Substream::VariantElements);
-        for (size_t i = 0; i != variants.size(); ++i)
+        for (size_t i = 0; i != variant_serializations.size(); ++i)
         {
             addVariantElementToPath(settings.path, i);
             /// We can use the same offset/limit as for whole Variant column
             if (i == non_empty_global_discr)
-                variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), offset, limit, settings, variant_state->variant_states[i]);
+                variant_serializations[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), offset, limit, settings, variant_state->variant_states[i]);
             else
-                variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), col.getVariantByGlobalDiscriminator(i).size(), 0, settings, variant_state->variant_states[i]);
+                variant_serializations[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), col.getVariantByGlobalDiscriminator(i).size(), 0, settings, variant_state->variant_states[i]);
             settings.path.pop_back();
         }
         variants_statistics[variant_names[non_empty_global_discr]] += limit;
@@ -326,10 +342,10 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
         }
 
         settings.path.push_back(Substream::VariantElements);
-        for (size_t i = 0; i != variants.size(); ++i)
+        for (size_t i = 0; i != variant_serializations.size(); ++i)
         {
             addVariantElementToPath(settings.path, i);
-            variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), col.getVariantByGlobalDiscriminator(i).size(), 0, settings, variant_state->variant_states[i]);
+            variant_serializations[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), col.getVariantByGlobalDiscriminator(i).size(), 0, settings, variant_state->variant_states[i]);
             settings.path.pop_back();
         }
         settings.path.pop_back();
@@ -360,10 +376,10 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
 
         /// Second, serialize variants in global order.
         settings.path.push_back(Substream::VariantElements);
-        for (size_t i = 0; i != variants.size(); ++i)
+        for (size_t i = 0; i != variant_serializations.size(); ++i)
         {
             addVariantElementToPath(settings.path, i);
-            variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->variant_states[i]);
+            variant_serializations[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->variant_states[i]);
             size_t variant_size = col.getVariantByGlobalDiscriminator(i).size();
             variants_statistics[variant_names[i]] += variant_size;
             total_size_of_variants += variant_size;
@@ -376,7 +392,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
     /// In general case we should iterate through local discriminators in range [offset, offset + limit] to serialize global discriminators and calculate offset/limit pair for each variant.
     const auto & local_discriminators = col.getLocalDiscriminators();
     const auto & offsets = col.getOffsets();
-    std::vector<std::pair<size_t, size_t>> variant_offsets_and_limits(variants.size(), {0, 0});
+    std::vector<std::pair<size_t, size_t>> variant_offsets_and_limits(variant_serializations.size(), {0, 0});
     size_t end = offset + limit;
     std::bitset<ColumnVariant::MAX_NESTED_COLUMNS> non_empty_variants_in_range;
     ColumnVariant::Discriminator last_non_empty_variant_discr = 0;
@@ -424,10 +440,10 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
 
     /// Serialize variants in global order.
     settings.path.push_back(Substream::VariantElements);
-    for (size_t i = 0; i != variants.size(); ++i)
+    for (size_t i = 0; i != variant_serializations.size(); ++i)
     {
         addVariantElementToPath(settings.path, i);
-        variants[i]->serializeBinaryBulkWithMultipleStreams(
+        variant_serializations[i]->serializeBinaryBulkWithMultipleStreams(
             col.getVariantByGlobalDiscriminator(i),
             variant_offsets_and_limits[i].second ? variant_offsets_and_limits[i].first : col.getVariantByGlobalDiscriminator(i).size(),
             variant_offsets_and_limits[i].second,
@@ -545,7 +561,7 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
     /// if we didn't do it during discriminators deserialization.
     if (variant_rows_offsets.empty())
     {
-        variant_rows_offsets.resize(variants.size(), 0);
+        variant_rows_offsets.resize(variant_serializations.size(), 0);
 
         if (rows_offset)
         {
@@ -571,7 +587,7 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
 
     if (variant_limits.empty())
     {
-        variant_limits.resize(variants.size(), 0);
+        variant_limits.resize(variant_serializations.size(), 0);
         auto & discriminators_data = col.getLocalDiscriminators();
 
         for (size_t i = discriminators_offset ; i != discriminators_data.size(); ++i)
@@ -584,10 +600,10 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
 
     /// Now we can deserialize variants according to their limits.
     settings.path.push_back(Substream::VariantElements);
-    for (size_t i = 0; i != variants.size(); ++i)
+    for (size_t i = 0; i != variant_serializations.size(); ++i)
     {
         addVariantElementToPath(settings.path, i);
-        variants[i]->deserializeBinaryBulkWithMultipleStreams(
+        variant_serializations[i]->deserializeBinaryBulkWithMultipleStreams(
             col.getVariantPtrByLocalDiscriminator(i), variant_rows_offsets[i], variant_limits[i],
             settings, variant_state->variant_states[i], cache);
         settings.path.pop_back();
@@ -608,10 +624,10 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
     if (settings.insert_only_rows_in_current_range_from_substreams_cache || !insertDataFromSubstreamsCacheIfAny(cache, settings, col.getOffsetsPtr()))
     {
         std::vector<size_t> variant_offsets;
-        variant_offsets.reserve(variants.size());
+        variant_offsets.reserve(variant_serializations.size());
         size_t num_non_empty_variants = 0;
         ColumnVariant::Discriminator last_non_empty_discr = 0;
-        for (size_t i = 0; i != variants.size(); ++i)
+        for (size_t i = 0; i != variant_serializations.size(); ++i)
         {
             if (variant_limits[i])
             {
@@ -674,8 +690,8 @@ std::pair<std::vector<size_t>, std::vector<size_t>> SerializationVariant::deseri
         state.remaining_rows_in_granule = 0;
 
     /// Calculate limits for variants during discriminators deserialization.
-    std::vector<size_t> variant_rows_offsets(variants.size(), 0);
-    std::vector<size_t> variant_limits(variants.size(), 0);
+    std::vector<size_t> variant_rows_offsets(variant_serializations.size(), 0);
+    std::vector<size_t> variant_limits(variant_serializations.size(), 0);
     limit += rows_offset;
 
     while (limit)
@@ -776,7 +792,7 @@ void SerializationVariant::serializeBinary(const IColumn & column, size_t row_nu
     auto global_discr = col.globalDiscriminatorAt(row_num);
     writeBinaryLittleEndian(global_discr, ostr);
     if (global_discr != ColumnVariant::NULL_DISCRIMINATOR)
-        variants[global_discr]->serializeBinary(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
+        variant_serializations[global_discr]->serializeBinary(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
 }
 
 void SerializationVariant::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@@ -788,10 +804,10 @@ void SerializationVariant::deserializeBinary(IColumn & column, ReadBuffer & istr
     {
         col.insertDefault();
     }
-    else if (global_discr < variants.size())
+    else if (global_discr < variant_serializations.size())
     {
         auto & variant_column = col.getVariantByGlobalDiscriminator(global_discr);
-        variants[global_discr]->deserializeBinary(variant_column, istr, settings);
+        variant_serializations[global_discr]->deserializeBinary(variant_column, istr, settings);
         col.getLocalDiscriminators().push_back(col.localDiscriminatorByGlobal(global_discr));
         col.getOffsets().push_back(variant_column.size() - 1);
     }
@@ -992,7 +1008,7 @@ bool SerializationVariant::tryDeserializeImpl(
         ReadBufferFromString variant_buf(field);
         auto & variant_column = column_variant.getVariantByGlobalDiscriminator(global_discr);
         size_t prev_size = variant_column.size();
-        if (try_deserialize_nested(variant_column, variants[global_discr], variant_buf, modified_settings) && variant_buf.eof())
+        if (try_deserialize_nested(variant_column, variant_serializations[global_discr], variant_buf, modified_settings) && variant_buf.eof())
         {
             column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(global_discr));
             column_variant.getOffsets().push_back(prev_size);
@@ -1014,7 +1030,7 @@ void SerializationVariant::serializeTextEscaped(const IColumn & column, size_t r
     if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
         SerializationNullable::serializeNullEscaped(ostr, settings);
     else
-        variants[global_discr]->serializeTextEscaped(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
+        variant_serializations[global_discr]->serializeTextEscaped(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
 }
 
 bool SerializationVariant::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@@ -1053,7 +1069,7 @@ void SerializationVariant::serializeTextRaw(const IColumn & column, size_t row_n
     if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
         SerializationNullable::serializeNullRaw(ostr, settings);
     else
-        variants[global_discr]->serializeTextRaw(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
+        variant_serializations[global_discr]->serializeTextRaw(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
 }
 
 bool SerializationVariant::tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@@ -1092,7 +1108,7 @@ void SerializationVariant::serializeTextQuoted(const IColumn & column, size_t ro
     if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
         SerializationNullable::serializeNullQuoted(ostr);
     else
-        variants[global_discr]->serializeTextQuoted(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
+        variant_serializations[global_discr]->serializeTextQuoted(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
 }
 
 bool SerializationVariant::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@@ -1132,7 +1148,7 @@ void SerializationVariant::serializeTextCSV(const IColumn & column, size_t row_n
     if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
         SerializationNullable::serializeNullCSV(ostr, settings);
     else
-        variants[global_discr]->serializeTextCSV(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
+        variant_serializations[global_discr]->serializeTextCSV(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
 }
 
 bool SerializationVariant::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@@ -1171,7 +1187,7 @@ void SerializationVariant::serializeText(const IColumn & column, size_t row_num,
     if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
         SerializationNullable::serializeNullText(ostr, settings);
     else
-        variants[global_discr]->serializeText(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
+        variant_serializations[global_discr]->serializeText(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
 }
 
 bool SerializationVariant::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@@ -1210,7 +1226,7 @@ void SerializationVariant::serializeTextJSON(const IColumn & column, size_t row_
     if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
         SerializationNullable::serializeNullJSON(ostr);
     else
-        variants[global_discr]->serializeTextJSON(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
+        variant_serializations[global_discr]->serializeTextJSON(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
 }
 
 void SerializationVariant::serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const
@@ -1220,7 +1236,7 @@ void SerializationVariant::serializeTextJSONPretty(const IColumn & column, size_
     if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
         SerializationNullable::serializeNullJSON(ostr);
     else
-        variants[global_discr]->serializeTextJSONPretty(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings, indent);
+        variant_serializations[global_discr]->serializeTextJSONPretty(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings, indent);
 }
 
 bool SerializationVariant::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@@ -1260,7 +1276,7 @@ void SerializationVariant::serializeTextXML(const IColumn & column, size_t row_n
     if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
         SerializationNullable::serializeNullXML(ostr);
     else
-        variants[global_discr]->serializeTextXML(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
+        variant_serializations[global_discr]->serializeTextXML(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
 }
 
 }
diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h
index b4eaf70f4201..450b442978ba 100644
--- a/src/DataTypes/Serializations/SerializationVariant.h
+++ b/src/DataTypes/Serializations/SerializationVariant.h
@@ -72,14 +72,7 @@ class SerializationVariant : public ISerialization
 
     using VariantSerializations = std::vector<SerializationPtr>;
 
-    explicit SerializationVariant(
-        const VariantSerializations & variants_,
-        const std::vector<String> & variant_names_,
-        const std::vector<size_t> & deserialize_text_order_,
-        const String & variant_name_)
-        : variants(variants_), variant_names(variant_names_), deserialize_text_order(deserialize_text_order_), variant_name(variant_name_)
-    {
-    }
+    explicit SerializationVariant(const DataTypes & variant_types_, const String & variant_name_);
 
     void enumerateStreams(
         EnumerateStreamsSettings & settings,
@@ -222,8 +215,9 @@ class SerializationVariant : public ISerialization
         std::function<bool(IColumn & variant_columm, const SerializationPtr & nested, ReadBuffer &, const FormatSettings &)> try_deserialize_nested,
         const FormatSettings & settings) const;
 
-    VariantSerializations variants;
+    VariantSerializations variant_serializations;
     std::vector<String> variant_names;
+    DataTypes variant_types;
     std::vector<size_t> deserialize_text_order;
     /// Name of Variant data type for better exception messages.
     String variant_name;
diff --git a/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference
index 8565fe3d0fa1..629957c24411 100644
--- a/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference
+++ b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference
@@ -1,113 +1,113 @@
 Memory
 test
-[]	1	0	0	[]
-1	0	1	0	[]
-\N	1	1	0	[]
-['str_3','str_3','str_3']	1	0	3	[1,1,1]
-4	0	1	0	[]
-\N	1	1	0	[]
-[6,6,6,6,6,6]	1	0	6	[0,0,0,0,0,0]
-7	0	1	0	[]
-\N	1	1	0	[]
-[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	0	9	[1,1,1,1,1,1,1,1,1]
-10	0	1	0	[]
-\N	1	1	0	[]
-['str_12','str_12']	1	0	2	[1,1]
-13	0	1	0	[]
-\N	1	1	0	[]
-[15,15,15,15,15]	1	0	5	[0,0,0,0,0]
-16	0	1	0	[]
-\N	1	1	0	[]
-[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	0	8	[1,1,1,1,1,1,1,1]
-19	0	1	0	[]
-\N	1	1	0	[]
-['str_21']	1	0	1	[1]
-22	0	1	0	[]
-\N	1	1	0	[]
-[24,24,24,24]	1	0	4	[0,0,0,0]
-25	0	1	0	[]
-\N	1	1	0	[]
-[NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	0	7	[1,1,1,1,1,1,1]
-28	0	1	0	[]
-\N	1	1	0	[]
-[]	1	0	0	[]
-31	0	1	0	[]
-\N	1	1	0	[]
-[33,33,33]	1	0	3	[0,0,0]
-34	0	1	0	[]
-\N	1	1	0	[]
+[]	1	0	[]
 1	0	0	[]
-0	1	0	[]
-1	1	0	[]
-1	0	3	[1,1,1]
-0	1	0	[]
-1	1	0	[]
-1	0	6	[0,0,0,0,0,0]
-0	1	0	[]
-1	1	0	[]
-1	0	9	[1,1,1,1,1,1,1,1,1]
-0	1	0	[]
-1	1	0	[]
-1	0	2	[1,1]
-0	1	0	[]
-1	1	0	[]
-1	0	5	[0,0,0,0,0]
-0	1	0	[]
-1	1	0	[]
-1	0	8	[1,1,1,1,1,1,1,1]
-0	1	0	[]
-1	1	0	[]
-1	0	1	[1]
-0	1	0	[]
-1	1	0	[]
-1	0	4	[0,0,0,0]
-0	1	0	[]
-1	1	0	[]
-1	0	7	[1,1,1,1,1,1,1]
-0	1	0	[]
-1	1	0	[]
-1	0	0	[]
-0	1	0	[]
-1	1	0	[]
-1	0	3	[0,0,0]
-0	1	0	[]
-1	1	0	[]
-0	0	[]	[]
-1	0	[]	[]
-1	0	[]	[]
-0	3	[1,1,1]	[0,0,0]
-1	0	[]	[]
-1	0	[]	[]
-0	6	[0,0,0,0,0,0]	[1,1,1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	9	[1,1,1,1,1,1,1,1,1]	[1,1,1,1,1,1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	2	[1,1]	[0,0]
-1	0	[]	[]
-1	0	[]	[]
-0	5	[0,0,0,0,0]	[1,1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	8	[1,1,1,1,1,1,1,1]	[1,1,1,1,1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	1	[1]	[0]
-1	0	[]	[]
-1	0	[]	[]
-0	4	[0,0,0,0]	[1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	7	[1,1,1,1,1,1,1]	[1,1,1,1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	0	[]	[]
-1	0	[]	[]
-1	0	[]	[]
-0	3	[0,0,0]	[1,1,1]
-1	0	[]	[]
-1	0	[]	[]
+\N	1	0	[]
+['str_3','str_3','str_3']	1	3	[1,1,1]
+4	0	0	[]
+\N	1	0	[]
+[6,6,6,6,6,6]	1	6	[0,0,0,0,0,0]
+7	0	0	[]
+\N	1	0	[]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	9	[1,1,1,1,1,1,1,1,1]
+10	0	0	[]
+\N	1	0	[]
+['str_12','str_12']	1	2	[1,1]
+13	0	0	[]
+\N	1	0	[]
+[15,15,15,15,15]	1	5	[0,0,0,0,0]
+16	0	0	[]
+\N	1	0	[]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	8	[1,1,1,1,1,1,1,1]
+19	0	0	[]
+\N	1	0	[]
+['str_21']	1	1	[1]
+22	0	0	[]
+\N	1	0	[]
+[24,24,24,24]	1	4	[0,0,0,0]
+25	0	0	[]
+\N	1	0	[]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	7	[1,1,1,1,1,1,1]
+28	0	0	[]
+\N	1	0	[]
+[]	1	0	[]
+31	0	0	[]
+\N	1	0	[]
+[33,33,33]	1	3	[0,0,0]
+34	0	0	[]
+\N	1	0	[]
+1	0	[]
+0	0	[]
+1	0	[]
+1	3	[1,1,1]
+0	0	[]
+1	0	[]
+1	6	[0,0,0,0,0,0]
+0	0	[]
+1	0	[]
+1	9	[1,1,1,1,1,1,1,1,1]
+0	0	[]
+1	0	[]
+1	2	[1,1]
+0	0	[]
+1	0	[]
+1	5	[0,0,0,0,0]
+0	0	[]
+1	0	[]
+1	8	[1,1,1,1,1,1,1,1]
+0	0	[]
+1	0	[]
+1	1	[1]
+0	0	[]
+1	0	[]
+1	4	[0,0,0,0]
+0	0	[]
+1	0	[]
+1	7	[1,1,1,1,1,1,1]
+0	0	[]
+1	0	[]
+1	0	[]
+0	0	[]
+1	0	[]
+1	3	[0,0,0]
+0	0	[]
+1	0	[]
+0	[]	[]
+0	[]	[]
+0	[]	[]
+3	[1,1,1]	[0,0,0]
+0	[]	[]
+0	[]	[]
+6	[0,0,0,0,0,0]	[1,1,1,1,1,1]
+0	[]	[]
+0	[]	[]
+9	[1,1,1,1,1,1,1,1,1]	[1,1,1,1,1,1,1,1,1]
+0	[]	[]
+0	[]	[]
+2	[1,1]	[0,0]
+0	[]	[]
+0	[]	[]
+5	[0,0,0,0,0]	[1,1,1,1,1]
+0	[]	[]
+0	[]	[]
+8	[1,1,1,1,1,1,1,1]	[1,1,1,1,1,1,1,1]
+0	[]	[]
+0	[]	[]
+1	[1]	[0]
+0	[]	[]
+0	[]	[]
+4	[0,0,0,0]	[1,1,1,1]
+0	[]	[]
+0	[]	[]
+7	[1,1,1,1,1,1,1]	[1,1,1,1,1,1,1]
+0	[]	[]
+0	[]	[]
+0	[]	[]
+0	[]	[]
+0	[]	[]
+3	[0,0,0]	[1,1,1]
+0	[]	[]
+0	[]	[]
 0
 2
 3
@@ -134,114 +134,114 @@ test
 35
 MergeTree compact
 test
-[]	1	0	0	[]
-1	0	1	0	[]
-\N	1	1	0	[]
-['str_3','str_3','str_3']	1	0	3	[1,1,1]
-4	0	1	0	[]
-\N	1	1	0	[]
-[6,6,6,6,6,6]	1	0	6	[0,0,0,0,0,0]
-7	0	1	0	[]
-\N	1	1	0	[]
-[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	0	9	[1,1,1,1,1,1,1,1,1]
-10	0	1	0	[]
-\N	1	1	0	[]
-['str_12','str_12']	1	0	2	[1,1]
-13	0	1	0	[]
-\N	1	1	0	[]
-[15,15,15,15,15]	1	0	5	[0,0,0,0,0]
-16	0	1	0	[]
-\N	1	1	0	[]
-[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	0	8	[1,1,1,1,1,1,1,1]
-19	0	1	0	[]
-\N	1	1	0	[]
-['str_21']	1	0	1	[1]
-22	0	1	0	[]
-\N	1	1	0	[]
-[24,24,24,24]	1	0	4	[0,0,0,0]
-25	0	1	0	[]
-\N	1	1	0	[]
-[NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	0	7	[1,1,1,1,1,1,1]
-28	0	1	0	[]
-\N	1	1	0	[]
-[]	1	0	0	[]
-31	0	1	0	[]
-\N	1	1	0	[]
-[33,33,33]	1	0	3	[0,0,0]
-34	0	1	0	[]
-\N	1	1	0	[]
-1	0	0	[]
-0	1	0	[]
-1	1	0	[]
-1	0	3	[1,1,1]
-0	1	0	[]
-1	1	0	[]
-1	0	6	[0,0,0,0,0,0]
-0	1	0	[]
-1	1	0	[]
-1	0	9	[1,1,1,1,1,1,1,1,1]
-0	1	0	[]
-1	1	0	[]
-1	0	2	[1,1]
-0	1	0	[]
-1	1	0	[]
-1	0	5	[0,0,0,0,0]
-0	1	0	[]
-1	1	0	[]
-1	0	8	[1,1,1,1,1,1,1,1]
-0	1	0	[]
-1	1	0	[]
-1	0	1	[1]
-0	1	0	[]
-1	1	0	[]
-1	0	4	[0,0,0,0]
-0	1	0	[]
-1	1	0	[]
-1	0	7	[1,1,1,1,1,1,1]
-0	1	0	[]
-1	1	0	[]
+[]	1	0	[]
 1	0	0	[]
-0	1	0	[]
-1	1	0	[]
-1	0	3	[0,0,0]
-0	1	0	[]
-1	1	0	[]
-0	0	[]	[]
-1	0	[]	[]
-1	0	[]	[]
-0	3	[1,1,1]	[0,0,0]
-1	0	[]	[]
-1	0	[]	[]
-0	6	[0,0,0,0,0,0]	[1,1,1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	9	[1,1,1,1,1,1,1,1,1]	[1,1,1,1,1,1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	2	[1,1]	[0,0]
-1	0	[]	[]
-1	0	[]	[]
-0	5	[0,0,0,0,0]	[1,1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	8	[1,1,1,1,1,1,1,1]	[1,1,1,1,1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	1	[1]	[0]
-1	0	[]	[]
-1	0	[]	[]
-0	4	[0,0,0,0]	[1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	7	[1,1,1,1,1,1,1]	[1,1,1,1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	0	[]	[]
-1	0	[]	[]
-1	0	[]	[]
-0	3	[0,0,0]	[1,1,1]
-1	0	[]	[]
-1	0	[]	[]
+\N	1	0	[]
+['str_3','str_3','str_3']	1	3	[1,1,1]
+4	0	0	[]
+\N	1	0	[]
+[6,6,6,6,6,6]	1	6	[0,0,0,0,0,0]
+7	0	0	[]
+\N	1	0	[]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	9	[1,1,1,1,1,1,1,1,1]
+10	0	0	[]
+\N	1	0	[]
+['str_12','str_12']	1	2	[1,1]
+13	0	0	[]
+\N	1	0	[]
+[15,15,15,15,15]	1	5	[0,0,0,0,0]
+16	0	0	[]
+\N	1	0	[]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	8	[1,1,1,1,1,1,1,1]
+19	0	0	[]
+\N	1	0	[]
+['str_21']	1	1	[1]
+22	0	0	[]
+\N	1	0	[]
+[24,24,24,24]	1	4	[0,0,0,0]
+25	0	0	[]
+\N	1	0	[]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	7	[1,1,1,1,1,1,1]
+28	0	0	[]
+\N	1	0	[]
+[]	1	0	[]
+31	0	0	[]
+\N	1	0	[]
+[33,33,33]	1	3	[0,0,0]
+34	0	0	[]
+\N	1	0	[]
+1	0	[]
+0	0	[]
+1	0	[]
+1	3	[1,1,1]
+0	0	[]
+1	0	[]
+1	6	[0,0,0,0,0,0]
+0	0	[]
+1	0	[]
+1	9	[1,1,1,1,1,1,1,1,1]
+0	0	[]
+1	0	[]
+1	2	[1,1]
+0	0	[]
+1	0	[]
+1	5	[0,0,0,0,0]
+0	0	[]
+1	0	[]
+1	8	[1,1,1,1,1,1,1,1]
+0	0	[]
+1	0	[]
+1	1	[1]
+0	0	[]
+1	0	[]
+1	4	[0,0,0,0]
+0	0	[]
+1	0	[]
+1	7	[1,1,1,1,1,1,1]
+0	0	[]
+1	0	[]
+1	0	[]
+0	0	[]
+1	0	[]
+1	3	[0,0,0]
+0	0	[]
+1	0	[]
+0	[]	[]
+0	[]	[]
+0	[]	[]
+3	[1,1,1]	[0,0,0]
+0	[]	[]
+0	[]	[]
+6	[0,0,0,0,0,0]	[1,1,1,1,1,1]
+0	[]	[]
+0	[]	[]
+9	[1,1,1,1,1,1,1,1,1]	[1,1,1,1,1,1,1,1,1]
+0	[]	[]
+0	[]	[]
+2	[1,1]	[0,0]
+0	[]	[]
+0	[]	[]
+5	[0,0,0,0,0]	[1,1,1,1,1]
+0	[]	[]
+0	[]	[]
+8	[1,1,1,1,1,1,1,1]	[1,1,1,1,1,1,1,1]
+0	[]	[]
+0	[]	[]
+1	[1]	[0]
+0	[]	[]
+0	[]	[]
+4	[0,0,0,0]	[1,1,1,1]
+0	[]	[]
+0	[]	[]
+7	[1,1,1,1,1,1,1]	[1,1,1,1,1,1,1]
+0	[]	[]
+0	[]	[]
+0	[]	[]
+0	[]	[]
+0	[]	[]
+3	[0,0,0]	[1,1,1]
+0	[]	[]
+0	[]	[]
 0
 2
 3
@@ -268,114 +268,114 @@ test
 35
 MergeTree wide
 test
-[]	1	0	0	[]
-1	0	1	0	[]
-\N	1	1	0	[]
-['str_3','str_3','str_3']	1	0	3	[1,1,1]
-4	0	1	0	[]
-\N	1	1	0	[]
-[6,6,6,6,6,6]	1	0	6	[0,0,0,0,0,0]
-7	0	1	0	[]
-\N	1	1	0	[]
-[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	0	9	[1,1,1,1,1,1,1,1,1]
-10	0	1	0	[]
-\N	1	1	0	[]
-['str_12','str_12']	1	0	2	[1,1]
-13	0	1	0	[]
-\N	1	1	0	[]
-[15,15,15,15,15]	1	0	5	[0,0,0,0,0]
-16	0	1	0	[]
-\N	1	1	0	[]
-[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	0	8	[1,1,1,1,1,1,1,1]
-19	0	1	0	[]
-\N	1	1	0	[]
-['str_21']	1	0	1	[1]
-22	0	1	0	[]
-\N	1	1	0	[]
-[24,24,24,24]	1	0	4	[0,0,0,0]
-25	0	1	0	[]
-\N	1	1	0	[]
-[NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	0	7	[1,1,1,1,1,1,1]
-28	0	1	0	[]
-\N	1	1	0	[]
-[]	1	0	0	[]
-31	0	1	0	[]
-\N	1	1	0	[]
-[33,33,33]	1	0	3	[0,0,0]
-34	0	1	0	[]
-\N	1	1	0	[]
-1	0	0	[]
-0	1	0	[]
-1	1	0	[]
-1	0	3	[1,1,1]
-0	1	0	[]
-1	1	0	[]
-1	0	6	[0,0,0,0,0,0]
-0	1	0	[]
-1	1	0	[]
-1	0	9	[1,1,1,1,1,1,1,1,1]
-0	1	0	[]
-1	1	0	[]
-1	0	2	[1,1]
-0	1	0	[]
-1	1	0	[]
-1	0	5	[0,0,0,0,0]
-0	1	0	[]
-1	1	0	[]
-1	0	8	[1,1,1,1,1,1,1,1]
-0	1	0	[]
-1	1	0	[]
-1	0	1	[1]
-0	1	0	[]
-1	1	0	[]
-1	0	4	[0,0,0,0]
-0	1	0	[]
-1	1	0	[]
-1	0	7	[1,1,1,1,1,1,1]
-0	1	0	[]
-1	1	0	[]
+[]	1	0	[]
 1	0	0	[]
-0	1	0	[]
-1	1	0	[]
-1	0	3	[0,0,0]
-0	1	0	[]
-1	1	0	[]
-0	0	[]	[]
-1	0	[]	[]
-1	0	[]	[]
-0	3	[1,1,1]	[0,0,0]
-1	0	[]	[]
-1	0	[]	[]
-0	6	[0,0,0,0,0,0]	[1,1,1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	9	[1,1,1,1,1,1,1,1,1]	[1,1,1,1,1,1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	2	[1,1]	[0,0]
-1	0	[]	[]
-1	0	[]	[]
-0	5	[0,0,0,0,0]	[1,1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	8	[1,1,1,1,1,1,1,1]	[1,1,1,1,1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	1	[1]	[0]
-1	0	[]	[]
-1	0	[]	[]
-0	4	[0,0,0,0]	[1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	7	[1,1,1,1,1,1,1]	[1,1,1,1,1,1,1]
-1	0	[]	[]
-1	0	[]	[]
-0	0	[]	[]
-1	0	[]	[]
-1	0	[]	[]
-0	3	[0,0,0]	[1,1,1]
-1	0	[]	[]
-1	0	[]	[]
+\N	1	0	[]
+['str_3','str_3','str_3']	1	3	[1,1,1]
+4	0	0	[]
+\N	1	0	[]
+[6,6,6,6,6,6]	1	6	[0,0,0,0,0,0]
+7	0	0	[]
+\N	1	0	[]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	9	[1,1,1,1,1,1,1,1,1]
+10	0	0	[]
+\N	1	0	[]
+['str_12','str_12']	1	2	[1,1]
+13	0	0	[]
+\N	1	0	[]
+[15,15,15,15,15]	1	5	[0,0,0,0,0]
+16	0	0	[]
+\N	1	0	[]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	8	[1,1,1,1,1,1,1,1]
+19	0	0	[]
+\N	1	0	[]
+['str_21']	1	1	[1]
+22	0	0	[]
+\N	1	0	[]
+[24,24,24,24]	1	4	[0,0,0,0]
+25	0	0	[]
+\N	1	0	[]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL]	1	7	[1,1,1,1,1,1,1]
+28	0	0	[]
+\N	1	0	[]
+[]	1	0	[]
+31	0	0	[]
+\N	1	0	[]
+[33,33,33]	1	3	[0,0,0]
+34	0	0	[]
+\N	1	0	[]
+1	0	[]
+0	0	[]
+1	0	[]
+1	3	[1,1,1]
+0	0	[]
+1	0	[]
+1	6	[0,0,0,0,0,0]
+0	0	[]
+1	0	[]
+1	9	[1,1,1,1,1,1,1,1,1]
+0	0	[]
+1	0	[]
+1	2	[1,1]
+0	0	[]
+1	0	[]
+1	5	[0,0,0,0,0]
+0	0	[]
+1	0	[]
+1	8	[1,1,1,1,1,1,1,1]
+0	0	[]
+1	0	[]
+1	1	[1]
+0	0	[]
+1	0	[]
+1	4	[0,0,0,0]
+0	0	[]
+1	0	[]
+1	7	[1,1,1,1,1,1,1]
+0	0	[]
+1	0	[]
+1	0	[]
+0	0	[]
+1	0	[]
+1	3	[0,0,0]
+0	0	[]
+1	0	[]
+0	[]	[]
+0	[]	[]
+0	[]	[]
+3	[1,1,1]	[0,0,0]
+0	[]	[]
+0	[]	[]
+6	[0,0,0,0,0,0]	[1,1,1,1,1,1]
+0	[]	[]
+0	[]	[]
+9	[1,1,1,1,1,1,1,1,1]	[1,1,1,1,1,1,1,1,1]
+0	[]	[]
+0	[]	[]
+2	[1,1]	[0,0]
+0	[]	[]
+0	[]	[]
+5	[0,0,0,0,0]	[1,1,1,1,1]
+0	[]	[]
+0	[]	[]
+8	[1,1,1,1,1,1,1,1]	[1,1,1,1,1,1,1,1]
+0	[]	[]
+0	[]	[]
+1	[1]	[0]
+0	[]	[]
+0	[]	[]
+4	[0,0,0,0]	[1,1,1,1]
+0	[]	[]
+0	[]	[]
+7	[1,1,1,1,1,1,1]	[1,1,1,1,1,1,1]
+0	[]	[]
+0	[]	[]
+0	[]	[]
+0	[]	[]
+0	[]	[]
+3	[0,0,0]	[1,1,1]
+0	[]	[]
+0	[]	[]
 0
 2
 3
diff --git a/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh
index c9dca46f41ed..3795d4423204 100755
--- a/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh
+++ b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh
@@ -11,14 +11,14 @@ function test()
 {
     echo "test"
     $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 2, NULL, number % 3 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10))) from numbers(36)"
-    $CH_CLIENT -q "select v, v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null,  v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id"
-    $CH_CLIENT -q "select v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null,  v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id"
-    $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`.null,  v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null, v.\`Array(Variant(String, UInt64))\`.String.null from test order by id"
+    $CH_CLIENT -q "select v, v.UInt64.null,  v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id"
+    $CH_CLIENT -q "select v.UInt64.null,  v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id"
+    $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null, v.\`Array(Variant(String, UInt64))\`.String.null from test order by id"
     $CH_CLIENT -q "select id from test where v.UInt64 is null order by id"
     $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 2, NULL, number % 3 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10))) from numbers(250000) settings min_insert_block_size_rows=100000, min_insert_block_size_bytes=0"
-    $CH_CLIENT -q "select v, v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null,  v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id format Null"
-    $CH_CLIENT -q "select v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null,  v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id format Null"
-    $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`.null,  v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null, v.\`Array(Variant(String, UInt64))\`.String.null from test order by id format Null"
+    $CH_CLIENT -q "select v, v.UInt64.null,  v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id format Null"
+    $CH_CLIENT -q "select v.UInt64.null,  v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id format Null"
+    $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null, v.\`Array(Variant(String, UInt64))\`.String.null from test order by id format Null"
     $CH_CLIENT -q "select id from test where v.UInt64 is null order by id format Null"
 }
 
diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sql.j2 b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sql.j2
index 21bf738dccb2..323de80588bf 100644
--- a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sql.j2
+++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sql.j2
@@ -35,11 +35,11 @@ select count() from test where not empty(d.`Array(Array(Dynamic))`);
 select count() from test where d is NULL;
 select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String);
 
-select d, d.UInt64.null, d.String.null, d.`Array(Variant(String, UInt64))`.null from test format Null;
-select d.UInt64.null, d.String.null, d.`Array(Variant(String, UInt64))`.null from test format Null;
-select d.Int8.null, d.Date.null, d.`Array(String)`.null from test format Null;
-select d, d.UInt64.null, d.Date.null, d.`Array(Variant(String, UInt64))`.null, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64.null from test format Null;
-select d.UInt64.null, d.Date.null, d.`Array(Variant(String, UInt64))`.null, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64.null, d.`Array(Variant(String, UInt64))`.String.null from test format Null;
+select d, d.UInt64.null, d.String.null from test format Null;
+select d.UInt64.null, d.String.null from test format Null;
+select d.Int8.null, d.Date.null from test format Null;
+select d, d.UInt64.null, d.Date.null, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64.null from test format Null;
+select d.UInt64.null, d.Date.null, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64.null, d.`Array(Variant(String, UInt64))`.String.null from test format Null;
 select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64.null, d.`Array(Variant(String, UInt64))`.UInt64.null from test format Null;
 select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64.null, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64.null from test format Null;
 select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64.null, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null;
diff --git a/tests/queries/0_stateless/03640_variant_array_null_map_subcolumn.reference b/tests/queries/0_stateless/03640_variant_array_null_map_subcolumn.reference
new file mode 100644
index 000000000000..c386f99e34af
--- /dev/null
+++ b/tests/queries/0_stateless/03640_variant_array_null_map_subcolumn.reference
@@ -0,0 +1 @@
+[0,1,0]
diff --git a/tests/queries/0_stateless/03640_variant_array_null_map_subcolumn.sql b/tests/queries/0_stateless/03640_variant_array_null_map_subcolumn.sql
new file mode 100644
index 000000000000..6afb40336005
--- /dev/null
+++ b/tests/queries/0_stateless/03640_variant_array_null_map_subcolumn.sql
@@ -0,0 +1,6 @@
+drop table if exists test;
+create table test (v Variant(Array(Nullable(String)))) engine=MergeTree order by tuple();
+insert into test select ['hello', null, 'world'];
+select v.`Array(Nullable(String))`.null from test;
+drop table test;
+

From 9e77704aef45867e5d74fefe641b407b8a757677 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 14 Oct 2025 16:13:43 +0000
Subject: [PATCH 020/112] Backport #87903 to 25.8: Add more information for
 Iceberg SELECT queries profiling

---
 src/Common/ProfileEvents.cpp                  |   4 +
 src/Interpreters/IcebergMetadataLog.cpp       |  16 +-
 src/Interpreters/IcebergMetadataLog.h         |   5 +-
 .../DataLakes/Iceberg/IcebergIterator.cpp     |  19 +-
 .../DataLakes/Iceberg/IcebergMetadata.cpp     |   8 +-
 .../DataLakes/Iceberg/ManifestFile.cpp        |   7 +-
 .../DataLakes/Iceberg/ManifestFile.h          |   1 +
 .../DataLakes/Iceberg/ManifestFilesPruning.h  |   8 +-
 .../Iceberg/StatelessMetadataFileGetter.cpp   |   5 +-
 .../integration/test_storage_iceberg/test.py  | 250 +++++++++++++-----
 10 files changed, 240 insertions(+), 83 deletions(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index e6dee19233e0..fb451601fc2d 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -87,6 +87,10 @@
     M(IcebergMetadataFilesCacheHits, "Number of times iceberg metadata files have been found in the cache.", ValueType::Number) \
     M(IcebergMetadataFilesCacheMisses, "Number of times iceberg metadata files have not been found in the iceberg metadata cache and had to be read from (remote) disk.", ValueType::Number) \
     M(IcebergMetadataFilesCacheWeightLost, "Approximate number of bytes evicted from the iceberg metadata cache.", ValueType::Number) \
+    M(IcebergMetadataReadWaitTimeMicroseconds, "Total time data readers spend waiting for iceberg metadata files to be read and parsed, summed across all reader threads.", ValueType::Microseconds) \
+    M(IcebergIteratorInitializationMicroseconds, "Total time spent on synchronous initialization of iceberg data iterators.", ValueType::Microseconds) \
+    M(IcebergMetadataUpdateMicroseconds, "Total time spent on synchronous initialization of iceberg data iterators.", ValueType::Microseconds) \
+    M(IcebergMetadataReturnedObjectInfos, "Total number of returned object infos from iceberg iterator.", ValueType::Number) \
     M(VectorSimilarityIndexCacheHits, "Number of times an index granule has been found in the vector index cache.", ValueType::Number) \
     M(VectorSimilarityIndexCacheMisses, "Number of times an index granule has not been found in the vector index cache and had to be read from disk.", ValueType::Number) \
     M(VectorSimilarityIndexCacheWeightLost, "Approximate number of bytes evicted from the vector index cache.", ValueType::Number) \
diff --git a/src/Interpreters/IcebergMetadataLog.cpp b/src/Interpreters/IcebergMetadataLog.cpp
index ab8266dfdc30..7388e55a4fcb 100644
--- a/src/Interpreters/IcebergMetadataLog.cpp
+++ b/src/Interpreters/IcebergMetadataLog.cpp
@@ -44,6 +44,12 @@ namespace
 
 const DataTypePtr rowType = makeNullable(std::make_shared<DataTypeUInt64>());
 
+auto iceberg_pruning_status_datatype = std::make_shared<DataTypeEnum8>(DataTypeEnum8::Values{
+    {"NotPruned", static_cast<Int8>(Iceberg::PruningReturnStatus::NOT_PRUNED)},
+    {"PartitionPruned", static_cast<Int8>(Iceberg::PruningReturnStatus::PARTITION_PRUNED)},
+    {"MinMaxIndexPruned", static_cast<Int8>(Iceberg::PruningReturnStatus::MIN_MAX_INDEX_PRUNED)}});
+
+const DataTypePtr iceberg_pruning_status_datatype_nullable = makeNullable(iceberg_pruning_status_datatype);
 }
 
 ColumnsDescription IcebergMetadataLogElement::getColumnsDescription()
@@ -64,7 +70,8 @@ ColumnsDescription IcebergMetadataLogElement::getColumnsDescription()
         {"table_path", std::make_shared<DataTypeString>(), "Table path."},
         {"file_path", std::make_shared<DataTypeString>(), "File path."},
         {"content", std::make_shared<DataTypeString>(), "Content in a JSON format (json file content, avro metadata or avro entry)."},
-        {"row_in_file", rowType, "Row in file."}};
+        {"row_in_file", rowType, "Row in file."},
+        {"pruning_status", iceberg_pruning_status_datatype_nullable, "Status of partition pruning or min-max index pruning for the file."}};
 }
 
 void IcebergMetadataLogElement::appendToBlock(MutableColumns & columns) const
@@ -79,6 +86,7 @@ void IcebergMetadataLogElement::appendToBlock(MutableColumns & columns) const
     columns[column_index++]->insert(file_path);
     columns[column_index++]->insert(metadata_content);
     columns[column_index++]->insert(row_in_file ? *row_in_file : rowType->getDefault());
+    columns[column_index++]->insert(pruning_status ? *pruning_status : iceberg_pruning_status_datatype_nullable->getDefault());
 }
 
 void insertRowToLogTable(
@@ -87,7 +95,8 @@ void insertRowToLogTable(
     IcebergMetadataLogLevel row_log_level,
     const String & table_path,
     const String & file_path,
-    std::optional<UInt64> row_in_file)
+    std::optional<UInt64> row_in_file,
+    std::optional<Iceberg::PruningReturnStatus> pruning_status)
 {
     IcebergMetadataLogLevel set_log_level = local_context->getSettingsRef()[Setting::iceberg_metadata_log_level].value;
     if (set_log_level < row_log_level)
@@ -104,6 +113,7 @@ void insertRowToLogTable(
             .table_path = table_path,
             .file_path = file_path,
             .metadata_content = row,
-            .row_in_file = row_in_file});
+            .row_in_file = row_in_file,
+            .pruning_status = pruning_status});
 }
 }
diff --git a/src/Interpreters/IcebergMetadataLog.h b/src/Interpreters/IcebergMetadataLog.h
index d4c65c4d6346..b43e2cfa47b2 100644
--- a/src/Interpreters/IcebergMetadataLog.h
+++ b/src/Interpreters/IcebergMetadataLog.h
@@ -2,6 +2,7 @@
 
 #include <Interpreters/SystemLog.h>
 #include <Storages/ColumnsDescription.h>
+#include <Storages/ObjectStorage/DataLakes/Iceberg/ManifestFilesPruning.h>
 
 namespace DB
 {
@@ -15,6 +16,7 @@ struct IcebergMetadataLogElement
     String file_path;
     String metadata_content;
     std::optional<UInt64> row_in_file;
+    std::optional<Iceberg::PruningReturnStatus> pruning_status;
 
     static std::string name() { return "IcebergMetadataLog"; }
 
@@ -29,7 +31,8 @@ void insertRowToLogTable(
     IcebergMetadataLogLevel row_log_level,
     const String & table_path,
     const String & file_path,
-    std::optional<UInt64> row_in_file);
+    std::optional<UInt64> row_in_file,
+    std::optional<Iceberg::PruningReturnStatus> pruning_status);
 
 class IcebergMetadataLog : public SystemLog<IcebergMetadataLogElement>
 {
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.cpp
index c45f082d68a4..fc3afcf3c883 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.cpp
@@ -1,4 +1,3 @@
-
 #include "config.h"
 #if USE_AVRO
 
@@ -45,13 +44,21 @@
 
 #include <Storages/ObjectStorage/DataLakes/Iceberg/StatelessMetadataFileGetter.h>
 
+#include <Common/ProfileEvents.h>
 #include <Common/SharedLockGuard.h>
 #include <Common/logger_useful.h>
 
+#include <Interpreters/IcebergMetadataLog.h>
+#include <base/wide_integer_to_string.h>
+#include <Common/ElapsedTimeProfileEventIncrement.h>
+
+
 namespace ProfileEvents
 {
 extern const Event IcebergPartitionPrunedFiles;
 extern const Event IcebergMinMaxIndexPrunedFiles;
+extern const Event IcebergMetadataReadWaitTimeMicroseconds;
+extern const Event IcebergMetadataReturnedObjectInfos;
 };
 
 
@@ -158,6 +165,14 @@ std::optional<ManifestFileEntry> SingleThreadIcebergKeysIterator::next()
                     local_context);
             }
             auto pruning_status = current_pruner ? current_pruner->canBePruned(manifest_file_entry) : PruningReturnStatus::NOT_PRUNED;
+            insertRowToLogTable(
+                local_context,
+                "",
+                DB::IcebergMetadataLogLevel::ManifestFileEntry,
+                configuration.lock()->getRawPath().path,
+                current_manifest_file_content->getPathToManifestFile(),
+                manifest_file_entry.row_number,
+                pruning_status);
             switch (pruning_status)
             {
                 case PruningReturnStatus::NOT_PRUNED:
@@ -321,6 +336,7 @@ IcebergIterator::IcebergIterator(
 
 ObjectInfoPtr IcebergIterator::next(size_t)
 {
+    ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::IcebergMetadataReadWaitTimeMicroseconds);
     Iceberg::ManifestFileEntry manifest_file_entry;
     if (blocking_queue.pop(manifest_file_entry))
     {
@@ -333,6 +349,7 @@ ObjectInfoPtr IcebergIterator::next(size_t)
         {
             object_info->addEqualityDeleteObject(equality_delete);
         }
+        ProfileEvents::increment(ProfileEvents::IcebergMetadataReturnedObjectInfos);
         return object_info;
     }
     {
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp
index a92630e1bb05..8cfc7ed86039 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp
@@ -68,7 +68,9 @@
 
 namespace ProfileEvents
 {
-    extern const Event IcebergTrivialCountOptimizationApplied;
+extern const Event IcebergIteratorInitializationMicroseconds;
+extern const Event IcebergMetadataUpdateMicroseconds;
+extern const Event IcebergTrivialCountOptimizationApplied;
 }
 
 namespace DB
@@ -249,6 +251,7 @@ bool IcebergMetadata::update(const ContextPtr & local_context)
         DB::IcebergMetadataLogLevel::Metadata,
         configuration_ptr->getRawPath().path,
         metadata_file_path,
+        std::nullopt,
         std::nullopt);
 
     if (previous_snapshot_id != relevant_snapshot_id)
@@ -587,6 +590,7 @@ DataLakeMetadataPtr IcebergMetadata::create(
         DB::IcebergMetadataLogLevel::Metadata,
         configuration_ptr->getRawPath().path,
         metadata_file_path,
+        std::nullopt,
         std::nullopt);
     return std::make_unique<IcebergMetadata>(object_storage, configuration_ptr, local_context, metadata_version, format_version, object, cache_ptr, compression_method);
 }
@@ -782,6 +786,8 @@ ObjectIterator IcebergMetadata::iterate(
 {
     SharedLockGuard lock(mutex);
 
+    ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::IcebergIteratorInitializationMicroseconds);
+
     auto table_snapshot
         = std::make_shared<IcebergTableStateSnapshot>(last_metadata_version, relevant_snapshot_schema_id, relevant_snapshot_id);
     return std::make_shared<IcebergIterator>(
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp
index 00c9e34d010c..98ffcc794fa1 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp
@@ -160,6 +160,7 @@ ManifestFileContent::ManifestFileContent(
         DB::IcebergMetadataLogLevel::ManifestFileMetadata,
         common_path,
         path_to_manifest_file,
+        std::nullopt,
         std::nullopt);
 
     for (const auto & column_name : {f_status, f_data_file})
@@ -235,7 +236,8 @@ ManifestFileContent::ManifestFileContent(
             DB::IcebergMetadataLogLevel::ManifestFileEntry,
             common_path,
             path_to_manifest_file,
-            i);
+            i,
+            std::nullopt);
         FileContentType content_type = FileContentType::DATA;
         if (format_version_ > 1)
             content_type = FileContentType(manifest_file_deserializer.getValueFromRowByName(i, c_data_file_content, TypeIndex::Int32).safeGet<UInt64>());
@@ -418,6 +420,7 @@ ManifestFileContent::ManifestFileContent(
                 this->data_files_without_deleted.emplace_back(
                     file_path_key,
                     file_path,
+                    i,
                     status,
                     added_sequence_number,
                     snapshot_id,
@@ -444,6 +447,7 @@ ManifestFileContent::ManifestFileContent(
                 this->position_deletes_files_without_deleted.emplace_back(
                     file_path_key,
                     file_path,
+                    i,
                     status,
                     added_sequence_number,
                     snapshot_id,
@@ -472,6 +476,7 @@ ManifestFileContent::ManifestFileContent(
                 this->equality_deletes_files.emplace_back(
                     file_path_key,
                     file_path,
+                    i,
                     status,
                     added_sequence_number,
                     snapshot_id,
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.h b/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.h
index ac12be343439..a045e976f3c6 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.h
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.h
@@ -61,6 +61,7 @@ struct ManifestFileEntry
     String file_path_key;
     // It's a processed file path to be used by Object Storage
     String file_path;
+    Int64 row_number;
 
     ManifestEntryStatus status;
     Int64 added_sequence_number;
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFilesPruning.h b/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFilesPruning.h
index bbe291f56150..f17a1ee97326 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFilesPruning.h
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFilesPruning.h
@@ -1,7 +1,6 @@
 #pragma once
 #include "config.h"
 
-#if USE_AVRO
 
 #include <Core/NamesAndTypes.h>
 #include <Parsers/IAST_fwd.h>
@@ -21,6 +20,13 @@ enum class PruningReturnStatus
     MIN_MAX_INDEX_PRUNED
 };
 
+}
+
+#if USE_AVRO
+
+namespace DB::Iceberg
+{
+
 struct ManifestFileEntry;
 class ManifestFileContent;
 
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/StatelessMetadataFileGetter.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/StatelessMetadataFileGetter.cpp
index a0f3c628d774..2f76d0a0bb03 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/StatelessMetadataFileGetter.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/StatelessMetadataFileGetter.cpp
@@ -153,6 +153,7 @@ ManifestFileCacheKeys getManifestList(
             DB::IcebergMetadataLogLevel::ManifestListMetadata,
             configuration_ptr->getRawPath().path,
             filename,
+            std::nullopt,
             std::nullopt);
 
         for (size_t i = 0; i < manifest_list_deserializer.rows(); ++i)
@@ -187,7 +188,8 @@ ManifestFileCacheKeys getManifestList(
                 DB::IcebergMetadataLogLevel::ManifestListEntry,
                 configuration_ptr->getRawPath().path,
                 filename,
-                i);
+                i,
+                std::nullopt);
         }
         /// We only return the list of {file name, seq number} for cache.
         /// Because ManifestList holds a list of ManifestFilePtr which consume much memory space.
@@ -248,7 +250,6 @@ std::pair<Poco::JSON::Object::Ptr, Int32> parseTableSchemaV1Method(const Poco::J
     auto current_schema_id = schema->getValue<int>(f_schema_id);
     return {schema, current_schema_id};
 }
-
 }
 }
 
diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py
index b8fa49f8b1aa..4ec8b39dc75d 100644
--- a/tests/integration/test_storage_iceberg/test.py
+++ b/tests/integration/test_storage_iceberg/test.py
@@ -2984,6 +2984,147 @@ def test_writes_mutate_delete(started_cluster, storage_type, partition_type):
     df = spark.read.format("iceberg").load(f"/iceberg_data/default/{TABLE_NAME}").collect()
     assert len(df) == 1
 
+class PrunedInfo:
+    def __init__(self, not_pruned, partition_pruned, min_max_index_pruned):
+        self.not_pruned = not_pruned
+        self.partition_pruned = partition_pruned
+        self.min_max_index_pruned = min_max_index_pruned
+
+    def __repr__(self):
+        return "PrunedInfo(not_pruned={}, partition_pruned={}, min_max_index_pruned={})".format(self.not_pruned, self.partition_pruned, self.min_max_index_pruned)
+    
+    def __eq__(self, other):
+        return (self.not_pruned == other.not_pruned and
+                self.partition_pruned == other.partition_pruned and
+                self.min_max_index_pruned == other.min_max_index_pruned)
+
+def get_date_and_time_columns(instance, query_id: str):
+    result = dict()
+    for name in ['event_date', 'event_time']:
+        query_result = instance.query(f"SELECT {name} FROM system.iceberg_metadata_log WHERE query_id = '{query_id}'")
+        result[name] = query_result.split('\n')
+        result[name] = list(filter(lambda x: len(x) > 0, result[name]))
+    return result
+
+def get_iceberg_metadata_to_dict(instance, query_id: str):
+    result = dict()
+    for name in ['content', 'content_type', 'table_path', 'file_path', 'row_in_file', 'pruning_status']:
+        # We are ok with duplicates in the table itself but for test purposes we want to remove duplicates here
+        select_distinct_expression = f"SELECT DISTINCT(*) FROM (SELECT content, content_type, table_path, file_path, row_in_file, pruning_status FROM system.iceberg_metadata_log WHERE query_id = '{query_id}') ORDER BY ALL"
+        query_result = instance.query(f"SELECT {name} FROM ({select_distinct_expression})")
+        print("Query result for {}: {}".format(name, query_result))
+        result[name] = query_result.split('\n')[:-1]
+    result['row_in_file'] = list(map(lambda x : int(x) if x.isdigit() else None, result['row_in_file']))
+    result['pruning_status'] = list(map(lambda x : x if x != '\\N' else None, result['pruning_status']))
+    print("Result dictionary: {}".format(result))
+    return result
+
+def verify_result_dictionary(diction : dict, allowed_content_types : set):
+    prunned_info = PrunedInfo(0, 0, 0)
+    # Expected content_type and only it is present
+    if set(diction['content_type']) != allowed_content_types:
+        raise ValueError("Content type mismatch. Expected: {}, got: {}".format(allowed_content_types, set(diction['content_type'])))
+    # For all entries we have the same table_path
+    if not (len(set(diction['table_path'])) == 1 or (len(allowed_content_types) == 0 and len(diction['table_path']) == 0)):
+        raise ValueError("Unexpected number of table paths are found for one query. Set: {}".format(set(diction['table_path'])))
+    extensions = list(map(lambda x: x.split('.')[-1], diction['file_path']))
+    for i in range(len(diction['content_type'])):
+        if diction['content_type'][i] == 'Metadata':
+            # File with content_type 'Metadata' has json extension
+            if extensions[i] != 'json':
+                raise ValueError("Unexpected file extension for Metadata. Expected: json, got: {}".format(extensions[i]))
+        else:
+            # File with content_types except 'Metadata' has avro extension
+            if extensions[i] != 'avro':
+                raise ValueError("Unexpected file extension for {}. Expected: avro, got: {}".format(diction['content_type'][i], extensions[i]))
+
+    # All content is json-serializable
+    for content in diction['content']:
+        if content == '':
+            continue
+        try:
+            json.loads(content)
+        except:
+            raise ValueError("Content is not valid JSON. Content: {}".format(content))
+    for file_path in set(diction['file_path']):
+        row_values = set()
+        number_of_missing_row_values = 0
+        number_of_rows = 0
+        partitioned_rows = set()
+        not_deleted_files = set()
+        for i in range(len(diction['file_path'])):
+            if file_path == diction['file_path'][i]:
+                if diction['row_in_file'][i] is not None:
+                    row_values.add(diction['row_in_file'][i])
+                    # If row is present the type is entry
+                    if diction['content_type'][i] not in ['ManifestFileEntry', 'ManifestListEntry']:
+                        raise ValueError("Row should not be specified for an entry {}, file_path: {}".format(diction['content_type'][i], file_path))
+                    if diction['content'][i] != '':
+                        number_of_rows += 1
+
+                    if diction['content_type'][i] == 'ManifestFileEntry':
+                        if diction['content'][i] == '':
+                            if diction['pruning_status'][i] is None:
+                                raise ValueError("Pruning status should be specified for this manifest file entry, file_path: {}".format(file_path))
+                            partitioned_rows.add(diction['row_in_file'][i])
+                            if diction['pruning_status'][i] == 'NotPruned':
+                                prunned_info.not_pruned += 1
+                            elif diction['pruning_status'][i] == 'PartitionPruned':
+                                prunned_info.partition_pruned += 1
+                            elif diction['pruning_status'][i] == 'MinMaxIndexPruned':
+                                prunned_info.min_max_index_pruned += 1
+                            else:
+                                raise ValueError("Unexpected pruning status: {}, file_path: {}".format(diction['pruning_status'][i], file_path))
+                        else:
+                            data_object = json.loads(diction['content'][i])
+                            print("Data object: {}".format(data_object))
+                            if data_object['status'] < 2:
+                                not_deleted_files.add(diction['row_in_file'][i])
+                else:
+                    # If row is not present that the type is metadata
+                    if diction['content_type'][i] not in ['Metadata', 'ManifestFileMetadata', 'ManifestListMetadata']:
+                        raise ValueError("Row should be specified for an entry {}, file_path: {}".format(diction['content_type'][i], file_path))
+
+                    number_of_missing_row_values += 1
+        if partitioned_rows != not_deleted_files:
+            raise ValueError("Partitioned rows are not consistent with not deleted files for file path: {}, partitioned rows: {}, not deleted files: {}".format(file_path, partitioned_rows, not_deleted_files))
+                
+        # We have exactly one metadata file
+        if number_of_missing_row_values != 1:
+            raise ValueError("Not a one row value (corresponding to metadata file) is missing for file path: {}".format(file_path))
+
+        # Rows in avro files are consistent
+        if len(row_values) != number_of_rows:
+            raise ValueError("Unexpected number of row values for file path: {}".format(file_path))
+        for i in range(number_of_rows):
+            if not i in row_values:
+                raise ValueError("Missing row value for file path: {}, missing row index: {}".format(file_path, i))
+    return prunned_info
+
+def get_prunned_info_from_profile_events(instance, query_id: str):
+    instance.query("SYSTEM FLUSH LOGS")
+
+    not_pruned = int(
+        instance.query(
+            f"SELECT ProfileEvents['IcebergMetadataReturnedObjectInfos'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
+        )
+    )
+
+    partition_pruned = int(
+        instance.query(
+            f"SELECT ProfileEvents['IcebergPartitionPrunedFiles'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
+        )
+    )
+
+    min_max_index_pruned = int(
+        instance.query(
+            f"SELECT ProfileEvents['IcebergMinMaxIndexPrunedFiles'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
+        )
+    )
+
+    return PrunedInfo(not_pruned, partition_pruned, min_max_index_pruned)
+
+
 @pytest.mark.parametrize("format_version", ["1", "2"])
 @pytest.mark.parametrize("storage_type", ["s3", "local", "azure"])
 def test_system_iceberg_metadata(started_cluster, format_version, storage_type):
@@ -2998,7 +3139,34 @@ def test_system_iceberg_metadata(started_cluster, format_version, storage_type):
         + get_uuid_str()
     )
 
-    write_iceberg_from_df(spark, generate_data(spark, 0, 100), TABLE_NAME)
+    def execute_spark_query(query: str):
+        return execute_spark_query_general(
+            spark,
+            started_cluster,
+            storage_type,
+            TABLE_NAME,
+            query,
+        )
+
+    execute_spark_query(
+        f"""
+            CREATE TABLE {TABLE_NAME} (
+                a INT,
+                b STRING
+            )
+            USING iceberg
+            PARTITIONED BY (identity(a))
+            OPTIONS('format-version'='2')
+        """
+    )
+
+    for i in range(5):
+        spark.sql(
+            f"""
+                INSERT INTO {TABLE_NAME} VALUES
+                ({i}, '{i}');
+            """
+        )
 
     default_upload_directory(
         started_cluster,
@@ -3007,91 +3175,27 @@ def test_system_iceberg_metadata(started_cluster, format_version, storage_type):
         f"/iceberg_data/default/{TABLE_NAME}/",
     )
 
-    def get_iceberg_metadata_to_dict(query_id: str):
-        instance = started_cluster.instances["node1"]
-        result = dict()
-        for name in ['content', 'content_type', 'table_path', 'file_path', 'row_in_file']:
-            # We are ok with duplicates in the table itself but for test purposes we want to remove duplicates here
-            select_distinct_expression = f"SELECT DISTINCT(*) FROM (SELECT content, content_type, table_path, file_path, row_in_file FROM system.iceberg_metadata_log WHERE query_id = '{query_id}') ORDER BY ALL"
-            query_result = instance.query(f"SELECT {name} FROM ({select_distinct_expression})")
-            result[name] = query_result.split('\n')
-            result[name] = list(filter(lambda x: len(x) > 0, result[name]))
-        result['row_in_file'] = list(map(lambda x : int(x) if x.isdigit() else None, result['row_in_file']))
-        return result
-    
-    def verify_result_dictionary(diction : dict, allowed_content_types : set):
-        # Expected content_type and only it is present
-        if set(diction['content_type']) != allowed_content_types:
-            raise ValueError("Content type mismatch. Expected: {}, got: {}".format(allowed_content_types, set(diction['content_type'])))
-        # For all entries we have the same table_path
-        if not (len(set(diction['table_path'])) == 1 or (len(allowed_content_types) == 0 and len(diction['table_path']) == 0)):
-            raise ValueError("Unexpected number of table paths are found for one query. Set: {}".format(set(diction['table_path'])))
-        extensions = list(map(lambda x: x.split('.')[-1], diction['file_path']))
-        for i in range(len(diction['content_type'])):
-            if diction['content_type'][i] == 'Metadata':
-                # File with content_type 'Metadata' has json extension
-                if extensions[i] != 'json':
-                    raise ValueError("Unexpected file extension for Metadata. Expected: json, got: {}".format(extensions[i]))
-            else:
-                # File with content_types except 'Metadata' has avro extension
-                if extensions[i] != 'avro':
-                    raise ValueError("Unexpected file extension for {}. Expected: avro, got: {}".format(diction['content_type'][i], extensions[i]))
-
-        # All content is json-serializable
-        for content in diction['content']:
-            try:
-                json.loads(content)
-            except:
-                raise ValueError("Content is not valid JSON. Content: {}".format(content))
-        for file_path in set(diction['file_path']):
-            row_values = set()
-            number_of_missing_row_values = 0
-            number_of_rows = 0
-            for i in range(len(diction['file_path'])):
-                if file_path == diction['file_path'][i]:
-                    if diction['row_in_file'][i] is not None:
-                        row_values.add(diction['row_in_file'][i])
-                        # If row is present the type is entry
-                        if diction['content_type'][i] not in ['ManifestFileEntry', 'ManifestListEntry']:
-                            raise ValueError("Row should not be specified for an entry {}, file_path: {}".format(diction['content_type'][i], file_path))
-                        number_of_rows += 1
-                    else:
-                        # If row is not present that the type is metadata
-                        if diction['content_type'][i] not in ['Metadata', 'ManifestFileMetadata', 'ManifestListMetadata']:
-                            raise ValueError("Row should be specified for an entry {}, file_path: {}".format(diction['content_type'][i], file_path))
-
-                        number_of_missing_row_values += 1
-                    
-            # We have exactly one metadata file
-            if number_of_missing_row_values != 1:
-                raise ValueError("Not a one row value (corresponding to metadata file) is missing for file path: {}".format(file_path))
-
-            # Rows in avro files are consistent
-            if len(row_values) != number_of_rows:
-                raise ValueError("Unexpected number of row values for file path: {}".format(file_path))
-            for i in range(number_of_rows):
-                if not i in row_values:
-                    raise ValueError("Missing row value for file path: {}, missing row index: {}".format(file_path, i))
-
-
     create_iceberg_table(storage_type, instance, TABLE_NAME, started_cluster)
 
     content_types = ["Metadata", "ManifestListMetadata", "ManifestListEntry", "ManifestFileMetadata", "ManifestFileEntry"]
     settings = ["none", "metadata", "manifest_list_metadata", "manifest_list_entry", "manifest_file_metadata", "manifest_file_entry"]
 
+
     for i in range(len(settings)):
         allowed_content_types = set(content_types[:i])
 
-        query_id = TABLE_NAME + "_" + str(i) + "_" + uuid.uuid4().hex
+        query_id = TABLE_NAME + "_" + str(i) + "_" + get_uuid_str()
 
-        assert instance.query(f"SELECT * FROM {TABLE_NAME}", query_id = query_id,  settings={"iceberg_metadata_log_level":settings[i]})
+        instance.query(f"SELECT * FROM {TABLE_NAME} WHERE a >= 2", query_id = query_id,  settings={"iceberg_metadata_log_level":settings[i], "use_iceberg_partition_pruning": 1})
 
-        instance.query("SYSTEM FLUSH LOGS iceberg_metadata_log")
+        expected_prunned_info = get_prunned_info_from_profile_events(instance, query_id)
 
-        diction = get_iceberg_metadata_to_dict(query_id)
+        diction = get_iceberg_metadata_to_dict(instance, query_id)
 
         try:
-            verify_result_dictionary(diction, allowed_content_types)
+            if settings[i] == 'manifest_file_entry':
+                table_prunned_info = verify_result_dictionary(diction, allowed_content_types)
+                assert table_prunned_info == expected_prunned_info, "Not prunned files count mismatch. Table: {}, ProfileEvents: {}".format(table_prunned_info, expected_prunned_info)
         except:
             print("Dictionary: {}, Allowed Content Types: {}".format(diction, allowed_content_types))
             raise

From 7d3738c58436d7193091c5a09d8707a687c5613f Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 15 Oct 2025 13:22:09 +0000
Subject: [PATCH 021/112] Backport #88544 to 25.8: Fix `ColumnBLOB should be
 converted to a regular column before usage` from `CREATE AS SELECT`

---
 src/Planner/Planner.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index bd33315d138d..65f27253906f 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -1977,15 +1977,18 @@ void Planner::buildPlanForQueryNode()
         addAdditionalFilterStepIfNeeded(query_plan, query_node, select_query_options, planner_context);
     }
 
+    const auto & client_info = query_context->getClientInfo();
+
     // Not all cases are supported here yet. E.g. for this query:
     // select * from remote('127.0.0.{1,2}', numbers_mt(1e6)) group by number
     // we will have `BlocksMarshallingStep` added to the query plan, but not for
     // select * from remote('127.0.0.{1,2}', numbers_mt(1e6))
     // because `to_stage` for it will be `QueryProcessingStage::Complete`.
     if (query_context->getSettingsRef()[Setting::enable_parallel_blocks_marshalling]
-        && query_context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY
+        && client_info.query_kind == ClientInfo::QueryKind::SECONDARY_QUERY
         && select_query_options.to_stage != QueryProcessingStage::Complete // Don't do it for INSERT SELECT, for example
-        && query_context->getClientInfo().distributed_depth <= 1 // Makes sense for higher depths too, just not supported
+        && client_info.distributed_depth <= 1 // Makes sense for higher depths too, just not supported
+        && !client_info.is_replicated_database_internal
     )
         query_plan.addStep(std::make_unique<BlocksMarshallingStep>(query_plan.getCurrentHeader()));
 

From 438a73f564128592e7cf735722d59a9fa65890a0 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 15 Oct 2025 16:14:09 +0000
Subject: [PATCH 022/112] Backport #87029 to 25.8: Fix auto cluster functions
 schema handling

---
 src/TableFunctions/TableFunctionURL.cpp               |  8 ++++----
 ...cluster_functions_with_parallel_replicas.reference |  1 +
 ..._auto_cluster_functions_with_parallel_replicas.sql | 11 ++++++++---
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp
index 0cbacdf42a16..2698128f5723 100644
--- a/src/TableFunctions/TableFunctionURL.cpp
+++ b/src/TableFunctions/TableFunctionURL.cpp
@@ -111,11 +111,11 @@ StoragePtr TableFunctionURL::getStorage(
         return std::make_shared<StorageURLCluster>(
             global_context,
             parallel_replicas_cluster_name,
-            filename,
-            format,
-            compression_method,
+            source,
+            format_,
+            compression_method_,
             StorageID(getDatabaseName(), table_name),
-            columns,
+            getActualTableStructure(global_context, true),
             ConstraintsDescription{},
             configuration);
     }
diff --git a/tests/queries/0_stateless/03275_auto_cluster_functions_with_parallel_replicas.reference b/tests/queries/0_stateless/03275_auto_cluster_functions_with_parallel_replicas.reference
index e60ede2aeaab..3fcc0142b38a 100644
--- a/tests/queries/0_stateless/03275_auto_cluster_functions_with_parallel_replicas.reference
+++ b/tests/queries/0_stateless/03275_auto_cluster_functions_with_parallel_replicas.reference
@@ -33,6 +33,7 @@ Expression ((Project names + Projection))
           ReadFromObjectStorage
 4
 4
+4
 Expression ((Project names + (Projection + Change column names to column identifiers)))
   ReadFromURL
 Expression ((Project names + (Projection + Change column names to column identifiers)))
diff --git a/tests/queries/0_stateless/03275_auto_cluster_functions_with_parallel_replicas.sql b/tests/queries/0_stateless/03275_auto_cluster_functions_with_parallel_replicas.sql
index 0814052a3811..92e0e112625c 100644
--- a/tests/queries/0_stateless/03275_auto_cluster_functions_with_parallel_replicas.sql
+++ b/tests/queries/0_stateless/03275_auto_cluster_functions_with_parallel_replicas.sql
@@ -20,10 +20,15 @@ EXPLAIN SELECT number FROM system.numbers n JOIN (SELECT * FROM s3('http://local
 SELECT count() FROM s3('http://localhost:11111/test/a.tsv', 'TSV');
 
 DROP TABLE IF EXISTS dupe_test_with_auto_functions;
-CREATE TABLE dupe_test_with_auto_functions (c1 String, c2 String, c3 String) ENGINE = MergeTree ORDER BY c1;
+CREATE TABLE dupe_test_with_auto_functions (n1 String, n2 String, n3 String) ENGINE = MergeTree ORDER BY n1;
 INSERT INTO dupe_test_with_auto_functions SELECT * FROM s3('http://localhost:11111/test/a.tsv', 'TSV');
 SELECT count() FROM dupe_test_with_auto_functions;
 
+DROP TABLE IF EXISTS insert_with_url_function;
+CREATE TABLE insert_with_url_function (n1 String, n2 String, n3 String) ENGINE = MergeTree ORDER BY n1;
+INSERT INTO insert_with_url_function SELECT * FROM url('http://localhost:11111/test/a.tsv', 'TSV');
+SELECT count() FROM insert_with_url_function;
+
 
 SET parallel_replicas_for_cluster_engines=false;
 
@@ -33,11 +38,11 @@ EXPLAIN SELECT * FROM s3('http://localhost:11111/test/a.tsv', 'TSV');
 SELECT count() FROM s3('http://localhost:11111/test/a.tsv', 'TSV');
 
 DROP TABLE IF EXISTS dupe_test_without_cluster_functions;
-CREATE TABLE dupe_test_without_cluster_functions (c1 String, c2 String, c3 String) ENGINE = MergeTree ORDER BY c1;
+CREATE TABLE dupe_test_without_cluster_functions (n1 String, n2 String, n3 String) ENGINE = MergeTree ORDER BY n1;
 INSERT INTO dupe_test_without_cluster_functions SELECT * FROM s3('http://localhost:11111/test/a.tsv', 'TSV');
 SELECT count() FROM dupe_test_without_cluster_functions;
 
 DROP TABLE IF EXISTS dupe_test_with_cluster_function;
-CREATE TABLE dupe_test_with_cluster_function (c1 String, c2 String, c3 String) ENGINE = MergeTree ORDER BY c1;
+CREATE TABLE dupe_test_with_cluster_function (n1 String, n2 String, n3 String) ENGINE = MergeTree ORDER BY n1;
 INSERT INTO dupe_test_with_cluster_function SELECT * FROM s3Cluster('test_cluster_two_shards', 'http://localhost:11111/test/a.tsv', 'TSV');
 SELECT count() FROM dupe_test_with_cluster_function;

From 4e6df7f240f930c023a914a166783a11ad38f277 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 16 Oct 2025 12:17:45 +0000
Subject: [PATCH 023/112] Backport #88484 to 25.8: Dynamic
 `backups.max_attempts_after_bad_version` to work on big clusters

---
 src/Backups/BackupCoordinationStageSync.cpp   |  3 +-
 .../concurrency_helper.py                     | 59 +++++++++++++
 .../test_concurrency.py                       | 61 +++++---------
 .../test_disallow_concurrency.py              | 83 ++++---------------
 .../test_huge_concurrent_restore.py           | 78 +++++++++++++++++
 5 files changed, 173 insertions(+), 111 deletions(-)
 create mode 100644 tests/integration/test_backup_restore_on_cluster/concurrency_helper.py
 create mode 100644 tests/integration/test_backup_restore_on_cluster/test_huge_concurrent_restore.py

diff --git a/src/Backups/BackupCoordinationStageSync.cpp b/src/Backups/BackupCoordinationStageSync.cpp
index 60b06be31226..b86c4309e104 100644
--- a/src/Backups/BackupCoordinationStageSync.cpp
+++ b/src/Backups/BackupCoordinationStageSync.cpp
@@ -115,7 +115,8 @@ BackupCoordinationStageSync::BackupCoordinationStageSync(
     , failure_after_host_disconnected_for_seconds(with_retries.getKeeperSettings().failure_after_host_disconnected_for_seconds)
     , finish_timeout_after_error(with_retries.getKeeperSettings().finish_timeout_after_error)
     , sync_period_ms(with_retries.getKeeperSettings().sync_period_ms)
-    , max_attempts_after_bad_version(with_retries.getKeeperSettings().max_attempts_after_bad_version)
+    // all_hosts.size() is added to max_attempts_after_bad_version since each host change the num_hosts node once, and it's a valid case
+    , max_attempts_after_bad_version(with_retries.getKeeperSettings().max_attempts_after_bad_version + all_hosts.size())
     , zookeeper_path(zookeeper_path_)
     , root_zookeeper_path(zookeeper_path.parent_path().parent_path())
     , operation_zookeeper_path(zookeeper_path.parent_path())
diff --git a/tests/integration/test_backup_restore_on_cluster/concurrency_helper.py b/tests/integration/test_backup_restore_on_cluster/concurrency_helper.py
new file mode 100644
index 000000000000..0442357c5e62
--- /dev/null
+++ b/tests/integration/test_backup_restore_on_cluster/concurrency_helper.py
@@ -0,0 +1,59 @@
+from pathlib import Path
+from typing import Callable, List
+
+from helpers.cluster import ClickHouseCluster, ClickHouseInstance
+
+
+def generate_cluster_def(file: str, num_nodes: int) -> str:
+    path = (
+        Path(__file__).parent / f"_gen/cluster_{Path(file).stem}_{num_nodes}_nodes.xml"
+    )
+    path.parent.mkdir(parents=True, exist_ok=True)
+    replicas = "\n".join(
+        f"""                <replica>
+                    <host>node{i}</host>
+                    <port>9000</port>
+                </replica>"""
+        for i in range(num_nodes)
+    )
+    path.write_text(
+        encoding="utf-8",
+        data=f"""<clickhouse>
+    <remote_servers>
+        <cluster>
+            <shard>
+{replicas}
+            </shard>
+        </cluster>
+    </remote_servers>
+</clickhouse>""",
+    )
+    return str(path.absolute())
+
+
+def add_nodes_to_cluster(
+    cluster: ClickHouseCluster,
+    num_nodes: int,
+    main_configs: List[str],
+    user_configs: List[str],
+) -> List[ClickHouseInstance]:
+    nodes = [
+        cluster.add_instance(
+            f"node{i}",
+            main_configs=main_configs,
+            user_configs=user_configs,
+            external_dirs=["/backups/"],
+            macros={"replica": f"node{i}", "shard": "shard1"},
+            with_zookeeper=True,
+        )
+        for i in range(num_nodes)
+    ]
+    return nodes
+
+
+def create_test_table(node: ClickHouseInstance) -> None:
+    node.query(
+        """CREATE TABLE tbl ON CLUSTER 'cluster' ( x UInt64 )
+ENGINE=ReplicatedMergeTree('/clickhouse/tables/tbl/', '{replica}')
+ORDER BY tuple()"""
+    )
diff --git a/tests/integration/test_backup_restore_on_cluster/test_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_concurrency.py
index 304ae4af752d..e9354a27fdd2 100644
--- a/tests/integration/test_backup_restore_on_cluster/test_concurrency.py
+++ b/tests/integration/test_backup_restore_on_cluster/test_concurrency.py
@@ -1,50 +1,32 @@
 import concurrent
-import os.path
 import time
 from random import randint, random
+from typing import List
 
 import pytest
 
-from helpers.cluster import ClickHouseCluster
+from helpers.cluster import ClickHouseCluster, ClickHouseInstance
 from helpers.test_tools import TSV, assert_eq_with_retry
 
+from .concurrency_helper import (
+    add_nodes_to_cluster,
+    create_test_table,
+    generate_cluster_def,
+)
+
 cluster = ClickHouseCluster(__file__)
 
 num_nodes = 10
 
 
-def generate_cluster_def():
-    path = os.path.join(
-        os.path.dirname(os.path.realpath(__file__)),
-        "./_gen/cluster_for_concurrency_test.xml",
-    )
-    os.makedirs(os.path.dirname(path), exist_ok=True)
-    with open(path, "w") as f:
-        f.write("<clickhouse>\n\t<remote_servers>\n\t\t<cluster>\n\t\t\t<shard>\n")
-        for i in range(num_nodes):
-            f.write(
-                f"\t\t\t\t<replica>\n\t\t\t\t\t<host>node{i}</host>\n\t\t\t\t\t<port>9000</port>\n\t\t\t\t</replica>\n"
-            )
-        f.write("\t\t\t</shard>\n\t\t</cluster>\n\t</remote_servers>\n</clickhouse>")
-    return path
-
-
-main_configs = ["configs/backups_disk.xml", generate_cluster_def()]
+main_configs = [
+    "configs/backups_disk.xml",
+    generate_cluster_def(__file__, num_nodes),
+]
 # No [Zoo]Keeper retries for tests with concurrency
 user_configs = ["configs/allow_database_types.xml"]
 
-nodes = []
-for i in range(num_nodes):
-    nodes.append(
-        cluster.add_instance(
-            f"node{i}",
-            main_configs=main_configs,
-            user_configs=user_configs,
-            external_dirs=["/backups/"],
-            macros={"replica": f"node{i}", "shard": "shard1"},
-            with_zookeeper=True,
-        )
-    )
+nodes = add_nodes_to_cluster(cluster, num_nodes, main_configs, user_configs)
 
 node0 = nodes[0]
 
@@ -70,23 +52,18 @@ def drop_after_test():
 backup_id_counter = 0
 
 
+def create_and_fill_table() -> None:
+    create_test_table(node0)
+    for i, node in enumerate(nodes):
+        node.query(f"INSERT INTO tbl VALUES ({i})")
+
+
 def new_backup_name():
     global backup_id_counter
     backup_id_counter += 1
     return f"Disk('backups', '{backup_id_counter}')"
 
 
-def create_and_fill_table():
-    node0.query(
-        "CREATE TABLE tbl ON CLUSTER 'cluster' ("
-        "x Int32"
-        ") ENGINE=ReplicatedMergeTree('/clickhouse/tables/tbl/', '{replica}')"
-        "ORDER BY tuple()"
-    )
-    for i in range(num_nodes):
-        nodes[i].query(f"INSERT INTO tbl VALUES ({i})")
-
-
 expected_sum = num_nodes * (num_nodes - 1) // 2
 
 
diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py
index 3dea986e3d97..6913a359d8da 100644
--- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py
+++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py
@@ -1,72 +1,24 @@
 import concurrent
-import os.path
-import re
-import time
-from random import randint
 
 import pytest
 
-from helpers.cluster import ClickHouseCluster
-from helpers.test_tools import TSV, assert_eq_with_retry
+from helpers.cluster import ClickHouseCluster, ClickHouseInstance
+from helpers.test_tools import assert_eq_with_retry
+
+from .concurrency_helper import (
+    add_nodes_to_cluster,
+    create_test_table,
+    generate_cluster_def,
+)
 
 cluster = ClickHouseCluster(__file__)
 
 num_nodes = 2
 
-
-def generate_cluster_def():
-    path = os.path.join(
-        os.path.dirname(os.path.realpath(__file__)),
-        "./_gen/cluster_for_test_disallow_concurrency.xml",
-    )
-    os.makedirs(os.path.dirname(path), exist_ok=True)
-    with open(path, "w") as f:
-        f.write(
-            """
-        <clickhouse>
-            <remote_servers>
-                <cluster>
-                    <shard>
-        """
-        )
-        for i in range(num_nodes):
-            f.write(
-                """
-                        <replica>
-                            <host>node"""
-                + str(i)
-                + """</host>
-                            <port>9000</port>
-                        </replica>
-            """
-            )
-        f.write(
-            """
-                    </shard>
-                </cluster>
-            </remote_servers>
-        </clickhouse>
-        """
-        )
-    return path
-
-
-main_configs = ["configs/disallow_concurrency.xml", generate_cluster_def()]
 # No [Zoo]Keeper retries for tests with concurrency
 user_configs = ["configs/allow_database_types.xml"]
 
-nodes = []
-for i in range(num_nodes):
-    nodes.append(
-        cluster.add_instance(
-            f"node{i}",
-            main_configs=main_configs,
-            user_configs=user_configs,
-            external_dirs=["/backups/"],
-            macros={"replica": f"node{i}", "shard": "shard1"},
-            with_zookeeper=True,
-        )
-    )
+nodes = add_nodes_to_cluster(cluster, num_nodes, main_configs, user_configs)
 
 node0 = nodes[0]
 
@@ -96,23 +48,18 @@ def drop_after_test():
 backup_id_counter = 0
 
 
+def create_and_fill_table() -> None:
+    create_test_table(node0)
+    for node in nodes:
+        node.query("INSERT INTO tbl SELECT number FROM numbers(40000000)")
+
+
 def new_backup_name():
     global backup_id_counter
     backup_id_counter += 1
     return f"Disk('backups', '{backup_id_counter}')"
 
 
-def create_and_fill_table():
-    node0.query(
-        "CREATE TABLE tbl ON CLUSTER 'cluster' ("
-        "x UInt64"
-        ") ENGINE=ReplicatedMergeTree('/clickhouse/tables/tbl/', '{replica}')"
-        "ORDER BY x"
-    )
-    for i in range(num_nodes):
-        nodes[i].query(f"INSERT INTO tbl SELECT number FROM numbers(40000000)")
-
-
 def get_status_and_error(node, backup_or_restore_id):
     return (
         node.query(
diff --git a/tests/integration/test_backup_restore_on_cluster/test_huge_concurrent_restore.py b/tests/integration/test_backup_restore_on_cluster/test_huge_concurrent_restore.py
new file mode 100644
index 000000000000..a6474c7a93d7
--- /dev/null
+++ b/tests/integration/test_backup_restore_on_cluster/test_huge_concurrent_restore.py
@@ -0,0 +1,78 @@
+from typing import List
+
+import pytest
+
+from helpers.cluster import ClickHouseCluster, ClickHouseInstance
+from helpers.test_tools import TSV
+
+from .concurrency_helper import (
+    add_nodes_to_cluster,
+    create_test_table,
+    generate_cluster_def,
+)
+
+cluster = ClickHouseCluster(__file__)
+
+# Testing backups.max_attempts_after_bad_version is dynamic, and depends on num_nodes
+num_nodes = 20
+
+main_configs = [
+    "configs/backups_disk.xml",
+    generate_cluster_def(__file__, num_nodes),
+]
+# No [Zoo]Keeper retries for tests with concurrency
+user_configs = ["configs/allow_database_types.xml"]
+
+nodes = add_nodes_to_cluster(cluster, num_nodes, main_configs, user_configs)
+
+node0 = nodes[0]
+
+
+@pytest.fixture(scope="module", autouse=True)
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+@pytest.fixture(autouse=True)
+def drop_after_test():
+    try:
+        yield
+    finally:
+        node0.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' SYNC")
+        node0.query("DROP DATABASE IF EXISTS mydb ON CLUSTER 'cluster' SYNC")
+
+
+backup_id_counter = 0
+
+
+def new_backup_name():
+    global backup_id_counter
+    backup_id_counter += 1
+    return f"Disk('backups', '{backup_id_counter}')"
+
+
+def create_and_fill_table() -> None:
+    create_test_table(node0)
+    for i, node in enumerate(nodes):
+        node.query(f"INSERT INTO tbl VALUES ({i})")
+
+
+expected_sum = num_nodes * (num_nodes - 1) // 2
+
+
+def test_backup_restore_huge_cluster():
+    create_and_fill_table()
+
+    backup_name = new_backup_name()
+    node0.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}")
+
+    node0.query("DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
+    node0.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
+    node0.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl")
+
+    for i in range(num_nodes):
+        assert nodes[i].query("SELECT sum(x) FROM tbl") == TSV([expected_sum])

From a39fb565e6f3fe53cc71ce20727088f6a7ad8893 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 16 Oct 2025 12:19:37 +0000
Subject: [PATCH 024/112] Backport #88154 to 25.8: Cleanup stale replicas from
 DDL Worker replicas set

---
 src/Interpreters/DDLWorker.cpp | 27 +++++++++++++++++++++++++++
 src/Interpreters/DDLWorker.h   |  1 +
 2 files changed, 28 insertions(+)

diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 5590029fc66f..b1922a694919 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -1396,6 +1396,32 @@ void DDLWorker::markReplicasActive(bool /*reinitialized*/)
     }
 }
 
+void DDLWorker::cleanupStaleReplicas(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper)
+{
+    auto replicas = zookeeper->getChildren(replicas_dir);
+    static constexpr Int64 REPLICA_MAX_INACTIVE_SECONDS = 86400;
+    for (const auto & replica : replicas)
+    {
+        auto replica_path = fs::path(replicas_dir) / replica;
+        auto responses = zookeeper->tryGet({replica_path, fs::path(replica_path) / "active"});
+        /// Replica not active
+        if (responses[1].error == Coordination::Error::ZNONODE)
+        {
+            auto stat = responses[0].stat;
+            /// Replica was not active for too long, let's cleanup to avoid polluting Keeper with
+            /// removed replicas
+            if (stat.mtime / 1000 + REPLICA_MAX_INACTIVE_SECONDS < current_time_seconds)
+            {
+                LOG_INFO(log, "Replica {} is stale, removing it", replica);
+                auto code = zookeeper->tryRemove(replica_path, -1);
+                if (code != Coordination::Error::ZOK)
+                    LOG_WARNING(log, "Cannot remove stale replica {}, code {}", replica, Coordination::errorMessage(code));
+            }
+        }
+    }
+
+}
+
 void DDLWorker::runCleanupThread()
 {
     setThreadName("DDLWorkerClnr");
@@ -1423,6 +1449,7 @@ void DDLWorker::runCleanupThread()
                 continue;
 
             cleanupQueue(current_time_seconds, zookeeper);
+            cleanupStaleReplicas(current_time_seconds, zookeeper);
             last_cleanup_time_seconds = current_time_seconds;
         }
         catch (...)
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 6717040fa033..abf0a9f84098 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -157,6 +157,7 @@ class DDLWorker
 
     /// Checks and cleanups queue's nodes
     void cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper);
+    void cleanupStaleReplicas(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper);
     virtual bool canRemoveQueueEntry(const String & entry_name, const Coordination::Stat & stat);
 
     /// Init task node

From 5200b9cbac5826a7b0d44dfb8466fc18cf7a9c58 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 16 Oct 2025 18:15:01 +0000
Subject: [PATCH 025/112] Backport #87958 to 25.8: Don't remove injective
 functions from GROUP BY if arguments types are not allowed in GROUP BY

---
 .../OptimizeGroupByInjectiveFunctionsPass.cpp | 27 ++++++++++++++++++-
 src/Interpreters/TreeOptimizer.cpp            |  7 +++++
 ...injective_functoon_bad_arguments.reference | 23 ++++++++++++++++
 ...up_by_injective_functoon_bad_arguments.sql |  8 ++++++
 4 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/03641_group_by_injective_functoon_bad_arguments.reference
 create mode 100644 tests/queries/0_stateless/03641_group_by_injective_functoon_bad_arguments.sql

diff --git a/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp b/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp
index 9e1c21375fae..5293ba0bf860 100644
--- a/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp
+++ b/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp
@@ -13,6 +13,7 @@ namespace Setting
 {
     extern const SettingsBool group_by_use_nulls;
     extern const SettingsBool optimize_injective_functions_in_group_by;
+    extern const SettingsBool allow_suspicious_types_in_group_by;
 }
 
 namespace
@@ -88,7 +89,8 @@ class OptimizeGroupByInjectiveFunctionsVisitor : public InDepthQueryTreeVisitorW
 
                 // Aggregate functions are not allowed in GROUP BY clause
                 auto function = function_node->getFunctionOrThrow();
-                bool can_be_eliminated = function->isInjective(function_node->getArgumentColumns());
+                auto arguments = function_node->getArgumentColumns();
+                bool can_be_eliminated = function->isInjective(arguments) && isValidGroupByKeyTypes(arguments);
 
                 if (can_be_eliminated)
                 {
@@ -106,6 +108,29 @@ class OptimizeGroupByInjectiveFunctionsVisitor : public InDepthQueryTreeVisitorW
 
         grouping_set = std::move(new_group_by_keys);
     }
+
+    bool isValidGroupByKeyTypes(const ColumnsWithTypeAndName & columns) const
+    {
+        if (getContext()->getSettingsRef()[Setting::allow_suspicious_types_in_group_by])
+            return true;
+
+        bool is_valid = true;
+        auto check = [&](const IDataType & type)
+        {
+            /// Dynamic and Variant types are not allowed in GROUP BY by default.
+            is_valid &= !isDynamic(type) && !isVariant(type);
+        };
+
+        for (const auto & column : columns)
+        {
+            check(*column.type);
+            column.type->forEachChild(check);
+            if (!is_valid)
+                break;
+        }
+
+        return is_valid;
+    }
 };
 
 }
diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp
index 7f794c3e90e8..c56b65ba7c69 100644
--- a/src/Interpreters/TreeOptimizer.cpp
+++ b/src/Interpreters/TreeOptimizer.cpp
@@ -60,6 +60,7 @@ namespace Setting
     extern const SettingsBool optimize_redundant_functions_in_order_by;
     extern const SettingsBool optimize_rewrite_array_exists_to_has;
     extern const SettingsBool optimize_or_like_chain;
+    extern const SettingsBool optimize_injective_functions_in_group_by;
 }
 
 namespace ErrorCodes
@@ -132,6 +133,12 @@ void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context)
     {
         if (const auto * function = group_exprs[i]->as<ASTFunction>())
         {
+            if (!settings[Setting::optimize_injective_functions_in_group_by])
+            {
+                ++i;
+                continue;
+            }
+
             /// assert function is injective
             if (possibly_injective_function_names.contains(function->name))
             {
diff --git a/tests/queries/0_stateless/03641_group_by_injective_functoon_bad_arguments.reference b/tests/queries/0_stateless/03641_group_by_injective_functoon_bad_arguments.reference
new file mode 100644
index 000000000000..95b069a1550e
--- /dev/null
+++ b/tests/queries/0_stateless/03641_group_by_injective_functoon_bad_arguments.reference
@@ -0,0 +1,23 @@
+1	str
+1	str
+1	str
+QUERY id: 0
+  PROJECTION COLUMNS
+    count() UInt64
+    toString(json.a) String
+  PROJECTION
+    LIST id: 1, nodes: 2
+      FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
+      FUNCTION id: 3, function_name: toString, function_type: ordinary, result_type: String
+        ARGUMENTS
+          LIST id: 4, nodes: 1
+            COLUMN id: 5, column_name: json.a, result_type: Dynamic, source_id: 6
+  JOIN TREE
+    TABLE id: 6, alias: __table1, table_name: default.test
+  GROUP BY
+    LIST id: 7, nodes: 1
+      FUNCTION id: 8, function_name: toString, function_type: ordinary, result_type: String
+        ARGUMENTS
+          LIST id: 9, nodes: 1
+            COLUMN id: 10, column_name: json.a, result_type: Dynamic, source_id: 6
+  SETTINGS enable_analyzer=1 optimize_injective_functions_in_group_by=1
diff --git a/tests/queries/0_stateless/03641_group_by_injective_functoon_bad_arguments.sql b/tests/queries/0_stateless/03641_group_by_injective_functoon_bad_arguments.sql
new file mode 100644
index 000000000000..0cacbda282bd
--- /dev/null
+++ b/tests/queries/0_stateless/03641_group_by_injective_functoon_bad_arguments.sql
@@ -0,0 +1,8 @@
+create table test (json JSON) engine=MergeTree order by tuple();
+insert into test select '{"a" : "str"}';
+select count(), toString(json.a) from test group by toString(json.a) settings enable_analyzer=0, optimize_injective_functions_in_group_by=0;
+select count(), toString(json.a) from test group by toString(json.a) settings enable_analyzer=1, optimize_injective_functions_in_group_by=0;
+select count(), toString(json.a) from test group by toString(json.a) settings enable_analyzer=1, optimize_injective_functions_in_group_by=1;
+explain query tree select count(), toString(json.a) from test group by toString(json.a) settings enable_analyzer=1, optimize_injective_functions_in_group_by=1;
+drop table test;
+

From dcdda50c7e316baa250049db9cc8d7fb5d490413 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 17 Oct 2025 08:00:24 +0200
Subject: [PATCH 026/112] Fix wrong conflict resolution

---
 .../test_disallow_concurrency.py                             | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py
index 6913a359d8da..fb985b61ac88 100644
--- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py
+++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py
@@ -15,6 +15,11 @@
 
 num_nodes = 2
 
+
+main_configs = [
+    "configs/disallow_concurrency.xml",
+    generate_cluster_def(__file__, num_nodes),
+]
 # No [Zoo]Keeper retries for tests with concurrency
 user_configs = ["configs/allow_database_types.xml"]
 

From 976a55951cf6669e1c51944f513407aa5681ada8 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 17 Oct 2025 00:00:50 +0200
Subject: [PATCH 027/112] Reuse cluster_conf in backup_restore_on_cluster

---
 .../concurrency_helper.py                     | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_backup_restore_on_cluster/concurrency_helper.py b/tests/integration/test_backup_restore_on_cluster/concurrency_helper.py
index 0442357c5e62..522e75b53b81 100644
--- a/tests/integration/test_backup_restore_on_cluster/concurrency_helper.py
+++ b/tests/integration/test_backup_restore_on_cluster/concurrency_helper.py
@@ -1,14 +1,15 @@
 from pathlib import Path
-from typing import Callable, List
+from typing import List
 
 from helpers.cluster import ClickHouseCluster, ClickHouseInstance
 
 
 def generate_cluster_def(file: str, num_nodes: int) -> str:
+    # For multiple workers, it has race and sometimes errors out,
+    # so we generate it once and reuse
     path = (
         Path(__file__).parent / f"_gen/cluster_{Path(file).stem}_{num_nodes}_nodes.xml"
     )
-    path.parent.mkdir(parents=True, exist_ok=True)
     replicas = "\n".join(
         f"""                <replica>
                     <host>node{i}</host>
@@ -16,9 +17,7 @@ def generate_cluster_def(file: str, num_nodes: int) -> str:
                 </replica>"""
         for i in range(num_nodes)
     )
-    path.write_text(
-        encoding="utf-8",
-        data=f"""<clickhouse>
+    config = f"""<clickhouse>
     <remote_servers>
         <cluster>
             <shard>
@@ -26,7 +25,15 @@ def generate_cluster_def(file: str, num_nodes: int) -> str:
             </shard>
         </cluster>
     </remote_servers>
-</clickhouse>""",
+</clickhouse>"""
+    if path.is_file():
+        existing = path.read_text(encoding="utf-8")
+        if existing == config:
+            return str(path.absolute())
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(
+        encoding="utf-8",
+        data=config,
     )
     return str(path.absolute())
 

From 5426bfcb1eb5635382b3d36259a0b4a99979cc27 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 17 Oct 2025 08:16:09 +0000
Subject: [PATCH 028/112] Backport #88605 to 25.8: Fix potential crash caused
 by concurrent mutation of underlying const PREWHERE columns

---
 src/Storages/MergeTree/MergeTreeReadTask.cpp             | 7 ++++++-
 .../03680_mergetree_shrink_const_from_prewhere.reference | 3 +++
 .../03680_mergetree_shrink_const_from_prewhere.sql       | 9 +++++++++
 3 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/03680_mergetree_shrink_const_from_prewhere.reference
 create mode 100644 tests/queries/0_stateless/03680_mergetree_shrink_const_from_prewhere.sql

diff --git a/src/Storages/MergeTree/MergeTreeReadTask.cpp b/src/Storages/MergeTree/MergeTreeReadTask.cpp
index 2da717539d77..0e8b05ba7957 100644
--- a/src/Storages/MergeTree/MergeTreeReadTask.cpp
+++ b/src/Storages/MergeTree/MergeTreeReadTask.cpp
@@ -255,7 +255,12 @@ MergeTreeReadTask::BlockAndProgress MergeTreeReadTask::read()
     if (read_result.num_rows != 0)
     {
         for (const auto & column : read_result.columns)
-            column->assumeMutableRef().shrinkToFit();
+        {
+            /// We may have columns that has other references, usually it is a constant column that has been created during analysis
+            /// (that will not be const here anymore, i.e. after materialize()), and we do not need to shrink it anyway.
+            if (column->use_count() == 1)
+                column->assumeMutableRef().shrinkToFit();
+        }
         block = sample_block.cloneWithColumns(read_result.columns);
     }
 
diff --git a/tests/queries/0_stateless/03680_mergetree_shrink_const_from_prewhere.reference b/tests/queries/0_stateless/03680_mergetree_shrink_const_from_prewhere.reference
new file mode 100644
index 000000000000..01e79c32a8c9
--- /dev/null
+++ b/tests/queries/0_stateless/03680_mergetree_shrink_const_from_prewhere.reference
@@ -0,0 +1,3 @@
+1
+2
+3
diff --git a/tests/queries/0_stateless/03680_mergetree_shrink_const_from_prewhere.sql b/tests/queries/0_stateless/03680_mergetree_shrink_const_from_prewhere.sql
new file mode 100644
index 000000000000..eaf1ee51216a
--- /dev/null
+++ b/tests/queries/0_stateless/03680_mergetree_shrink_const_from_prewhere.sql
@@ -0,0 +1,9 @@
+DROP TABLE IF EXISTS const_node;
+CREATE TABLE const_node (`v` Nullable(UInt8)) ENGINE = MergeTree ORDER BY tuple();
+SYSTEM STOP MERGES const_node;
+INSERT INTO const_node VALUES (1);
+INSERT INTO const_node VALUES (2);
+INSERT INTO const_node VALUES (3);
+-- Here we have condition with a constant "materialize(255)", for which convertToFullColumnIfConst() will return underlying column w/o copying,
+-- and later shrinkToFit() will be called from multiple threads on this column, and leads to UB
+SELECT v FROM const_node PREWHERE and(materialize(255), *) ORDER BY v;

From bfa674c2cac73108a9427a447257aad5c459a193 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 17 Oct 2025 11:11:25 +0000
Subject: [PATCH 029/112] Backport #88668 to 25.8: wrap `~PooledConnection` in
 try-catch

---
 .../Net/include/Poco/Net/HTTPChunkedStream.h  |  2 +-
 base/poco/Net/src/HTTPChunkedStream.cpp       |  4 +-
 src/Common/HTTPConnectionPool.cpp             | 59 +++++++++++--------
 .../tests/gtest_http_chunked_stream.cpp       | 16 +++++
 ...ed_losing_files_after_exception.reference} |  0
 ...buted_losing_files_after_exception.sql.j2} |  0
 6 files changed, 52 insertions(+), 29 deletions(-)
 create mode 100644 src/Common/tests/gtest_http_chunked_stream.cpp
 rename tests/queries/0_stateless/{02537_distributed_loosing_files_after_exception.reference => 02537_distributed_losing_files_after_exception.reference} (100%)
 rename tests/queries/0_stateless/{02537_distributed_loosing_files_after_exception.sql.j2 => 02537_distributed_losing_files_after_exception.sql.j2} (100%)

diff --git a/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h b/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h
index d5263319ed38..604e66da5786 100644
--- a/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h
+++ b/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h
@@ -45,7 +45,7 @@ namespace Net
         ~HTTPChunkedStreamBuf();
         void close();
 
-        bool isComplete(bool read_from_device_to_check_eof = false);
+        bool isComplete(bool read_from_device_to_check_eof = false) noexcept;
 
     protected:
         int readFromDevice(char * buffer, std::streamsize length);
diff --git a/base/poco/Net/src/HTTPChunkedStream.cpp b/base/poco/Net/src/HTTPChunkedStream.cpp
index 043c38ce9e71..ca0b73128ff4 100644
--- a/base/poco/Net/src/HTTPChunkedStream.cpp
+++ b/base/poco/Net/src/HTTPChunkedStream.cpp
@@ -140,7 +140,7 @@ int HTTPChunkedStreamBuf::readFromDevice(char* buffer, std::streamsize length)
 }
 
 
-bool HTTPChunkedStreamBuf::isComplete(bool read_from_device_to_check_eof)
+bool HTTPChunkedStreamBuf::isComplete(bool read_from_device_to_check_eof) noexcept
 {
 	if (read_from_device_to_check_eof)
 	{
@@ -150,7 +150,7 @@ bool HTTPChunkedStreamBuf::isComplete(bool read_from_device_to_check_eof)
 			/// "Unexpected EOF" exception would be thrown
 			readFromDevice(nullptr, 0);
 		}
-		catch (Poco::Net::MessageException &)
+		catch (...)
 		{
 			return false;
 		}
diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp
index 33a85b3a7c09..a6db50c05455 100644
--- a/src/Common/HTTPConnectionPool.cpp
+++ b/src/Common/HTTPConnectionPool.cpp
@@ -438,38 +438,45 @@ class EndpointConnectionPool : public std::enable_shared_from_this<EndpointConne
 
         ~PooledConnection() override
         {
-            if (bool(response_stream))
+            try
             {
-                if (auto * fixed_steam = dynamic_cast<Poco::Net::HTTPFixedLengthInputStream *>(response_stream))
+                if (bool(response_stream))
                 {
-                    response_stream_completed = fixed_steam->isComplete();
+                    if (auto * fixed_steam = dynamic_cast<Poco::Net::HTTPFixedLengthInputStream *>(response_stream))
+                    {
+                        response_stream_completed = fixed_steam->isComplete();
+                    }
+                    else if (auto * chunked_steam = dynamic_cast<Poco::Net::HTTPChunkedInputStream *>(response_stream))
+                    {
+                        response_stream_completed = chunked_steam->isComplete();
+                    }
+                    else if (auto * http_stream = dynamic_cast<Poco::Net::HTTPInputStream *>(response_stream))
+                    {
+                        response_stream_completed = http_stream->isComplete();
+                    }
+                    else
+                    {
+                        response_stream_completed = false;
+                    }
                 }
-                else if (auto * chunked_steam = dynamic_cast<Poco::Net::HTTPChunkedInputStream *>(response_stream))
-                {
-                    response_stream_completed = chunked_steam->isComplete();
-                }
-                else if (auto * http_stream = dynamic_cast<Poco::Net::HTTPInputStream *>(response_stream))
-                {
-                    response_stream_completed = http_stream->isComplete();
-                }
-                else
-                {
-                    response_stream_completed = false;
-                }
-            }
-            response_stream = nullptr;
-            Session::setSendDataHooks();
-            Session::setReceiveDataHooks();
-            Session::setSendThrottler();
-            Session::setReceiveThrottler();
+                response_stream = nullptr;
+                Session::setSendDataHooks();
+                Session::setReceiveDataHooks();
+                Session::setSendThrottler();
+                Session::setReceiveThrottler();
 
-            group->atConnectionDestroy();
+                group->atConnectionDestroy();
 
-            if (!isExpired)
-                if (auto lock = pool.lock())
-                    lock->atConnectionDestroy(*this);
+                if (!isExpired)
+                    if (auto lock = pool.lock())
+                        lock->atConnectionDestroy(*this);
 
-            CurrentMetrics::sub(metrics.active_count);
+                CurrentMetrics::sub(metrics.active_count);
+            }
+            catch (...)
+            {
+                tryLogCurrentException(__PRETTY_FUNCTION__);
+            }
         }
 
     private:
diff --git a/src/Common/tests/gtest_http_chunked_stream.cpp b/src/Common/tests/gtest_http_chunked_stream.cpp
new file mode 100644
index 000000000000..c737f4794105
--- /dev/null
+++ b/src/Common/tests/gtest_http_chunked_stream.cpp
@@ -0,0 +1,16 @@
+#include <gtest/gtest.h>
+
+#include <Poco/Net/HTTPChunkedStream.h>
+#include <Poco/Net/HTTPClientSession.h>
+
+
+TEST(HTTPChunkedStreamBuf, IsCompleteHandlesInvalidSocketException)
+{
+    Poco::Net::HTTPClientSession session;
+    Poco::Net::HTTPChunkedStreamBuf buf(session, std::ios::in);
+
+    /// Default-initialized socket throws InvalidSocketException in SocketImpl::receiveBytes,
+    /// which HTTPChunkedStreamBuf::isComplete should swallow and return false.
+    bool complete = buf.isComplete(true);
+    ASSERT_FALSE(complete);
+}
diff --git a/tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.reference b/tests/queries/0_stateless/02537_distributed_losing_files_after_exception.reference
similarity index 100%
rename from tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.reference
rename to tests/queries/0_stateless/02537_distributed_losing_files_after_exception.reference
diff --git a/tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.sql.j2 b/tests/queries/0_stateless/02537_distributed_losing_files_after_exception.sql.j2
similarity index 100%
rename from tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.sql.j2
rename to tests/queries/0_stateless/02537_distributed_losing_files_after_exception.sql.j2

From 11398954ad12e9f659b3727930307162fffa5de2 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 17 Oct 2025 14:12:12 +0000
Subject: [PATCH 030/112] Backport #88513 to 25.8: Keeper improvement: add
 config for checking node ACL on removal

---
 src/Coordination/CoordinationSettings.cpp     |  8 +-
 src/Coordination/KeeperStorage.cpp            |  7 ++
 .../test_keeper_remove_acl/__init__.py        |  0
 .../configs/check_node_acl_on_remove.xml      |  7 ++
 .../configs/enable_keeper1.xml                | 25 ++++++
 .../test_keeper_remove_acl/test.py            | 89 +++++++++++++++++++
 6 files changed, 135 insertions(+), 1 deletion(-)
 create mode 100644 tests/integration/test_keeper_remove_acl/__init__.py
 create mode 100644 tests/integration/test_keeper_remove_acl/configs/check_node_acl_on_remove.xml
 create mode 100644 tests/integration/test_keeper_remove_acl/configs/enable_keeper1.xml
 create mode 100644 tests/integration/test_keeper_remove_acl/test.py

diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp
index 7eb50ea7ac8b..172844e8ea5b 100644
--- a/src/Coordination/CoordinationSettings.cpp
+++ b/src/Coordination/CoordinationSettings.cpp
@@ -69,7 +69,8 @@ namespace ErrorCodes
     DECLARE(UInt64, log_slow_total_threshold_ms, 5000, "Requests for which the total latency is larger than this settings will be logged", 0) \
     DECLARE(UInt64, log_slow_cpu_threshold_ms, 100, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \
     DECLARE(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0) \
-    DECLARE(Bool, use_xid_64, false, "Enable 64-bit XID. It is disabled by default because of backward compatibility", 0)
+    DECLARE(Bool, use_xid_64, false, "Enable 64-bit XID. It is disabled by default because of backward compatibility", 0) \
+    DECLARE(Bool, check_node_acl_on_remove, false, "When trying to remove a node, check ACLs from both the node itself and the parent node. If disabled, default behaviour will be used where only ACL from the parent node is checked", 0) \
 
 DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
 IMPLEMENT_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
@@ -277,6 +278,11 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const
     write_int(coordination_settings[CoordinationSetting::log_slow_cpu_threshold_ms]);
     writeText("log_slow_connection_operation_threshold_ms=", buf);
     write_int(coordination_settings[CoordinationSetting::log_slow_connection_operation_threshold_ms]);
+
+    writeText("use_xid_64=", buf);
+    write_bool(coordination_settings[CoordinationSetting::use_xid_64]);
+    writeText("check_node_acl_on_remove=", buf);
+    write_bool(coordination_settings[CoordinationSetting::check_node_acl_on_remove]);
 }
 
 KeeperConfigurationAndSettingsPtr
diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index a5bd73fed4b5..cdac684740aa 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -52,6 +52,7 @@ namespace DB
 namespace CoordinationSetting
 {
     extern const CoordinationSettingsUInt64 log_slow_cpu_threshold_ms;
+    extern const CoordinationSettingsBool check_node_acl_on_remove;
 }
 
 namespace ErrorCodes
@@ -1828,6 +1829,12 @@ processLocal(const Coordination::ZooKeeperGetRequest & zk_request, Storage & sto
 template <typename Storage>
 bool checkAuth(const Coordination::ZooKeeperRemoveRequest & zk_request, Storage & storage, int64_t session_id, bool is_local)
 {
+    if (auto check_node_acl = storage.keeper_context->getCoordinationSettings()[CoordinationSetting::check_node_acl_on_remove];
+        check_node_acl && !storage.checkACL(zk_request.getPath(), Coordination::ACL::Delete, session_id, is_local))
+    {
+        return false;
+    }
+
     return storage.checkACL(Coordination::parentNodePath(zk_request.getPath()), Coordination::ACL::Delete, session_id, is_local);
 }
 
diff --git a/tests/integration/test_keeper_remove_acl/__init__.py b/tests/integration/test_keeper_remove_acl/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/integration/test_keeper_remove_acl/configs/check_node_acl_on_remove.xml b/tests/integration/test_keeper_remove_acl/configs/check_node_acl_on_remove.xml
new file mode 100644
index 000000000000..77b2a0bcf65c
--- /dev/null
+++ b/tests/integration/test_keeper_remove_acl/configs/check_node_acl_on_remove.xml
@@ -0,0 +1,7 @@
+<clickhouse>
+    <keeper_server>
+        <coordination_settings>
+            <check_node_acl_on_remove>0</check_node_acl_on_remove>
+        </coordination_settings>
+    </keeper_server>
+</clickhouse>
diff --git a/tests/integration/test_keeper_remove_acl/configs/enable_keeper1.xml b/tests/integration/test_keeper_remove_acl/configs/enable_keeper1.xml
new file mode 100644
index 000000000000..873facafb3c8
--- /dev/null
+++ b/tests/integration/test_keeper_remove_acl/configs/enable_keeper1.xml
@@ -0,0 +1,25 @@
+<clickhouse>
+    <keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>1</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
+        <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <snapshot_distance>75</snapshot_distance>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node</hostname>
+                <port>9234</port>
+                <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
+            </server>
+        </raft_configuration>
+    </keeper_server>
+</clickhouse>
diff --git a/tests/integration/test_keeper_remove_acl/test.py b/tests/integration/test_keeper_remove_acl/test.py
new file mode 100644
index 000000000000..24f41c0fb1b1
--- /dev/null
+++ b/tests/integration/test_keeper_remove_acl/test.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+
+import pytest
+import logging
+
+import helpers.keeper_utils as keeper_utils
+from helpers.cluster import ClickHouseCluster
+
+from kazoo.security import make_digest_acl
+from kazoo.exceptions import NoAuthError
+
+
+cluster = ClickHouseCluster(__file__)
+node = cluster.add_instance(
+    "node",
+    main_configs=["configs/enable_keeper1.xml", "configs/check_node_acl_on_remove.xml"],
+    stay_alive=True,
+)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def wait_nodes():
+    keeper_utils.wait_nodes(cluster, [node])
+
+
+def get_fake_zk(nodename, timeout=30.0):
+    return keeper_utils.get_fake_zk(cluster, nodename, timeout=timeout)
+
+
+def stop_zk_connection(zk_conn):
+    zk_conn.stop()
+    zk_conn.close()
+
+
+def test_server_restart(started_cluster):
+    try:
+        wait_nodes()
+
+        node.stop_clickhouse()
+        node.replace_in_config(
+            "/etc/clickhouse-server/config.d/check_node_acl_on_remove.xml", "1", "0"
+        )
+        node.start_clickhouse()
+
+        def create_node_with_acl():
+            node_zk = get_fake_zk("node")
+            node_zk.add_auth("digest", "clickhouse:password")
+
+            if node_zk.exists("/test_acl_node"):
+                node_zk.delete("/test_acl_node")
+
+            acl = make_digest_acl("clickhouse", "password", all=True)
+            node_zk.create("/test_acl_node", b"test_data", acl=[acl])
+            stop_zk_connection(node_zk)
+
+        def delete_node():
+            node_zk = get_fake_zk("node")
+            node_zk.delete("/test_acl_node")
+            stop_zk_connection(node_zk)
+
+        create_node_with_acl()
+        delete_node()
+        node.stop_clickhouse()
+        node.replace_in_config(
+            "/etc/clickhouse-server/config.d/check_node_acl_on_remove.xml", "0", "1"
+        )
+        node.start_clickhouse()
+
+        create_node_with_acl()
+
+        with pytest.raises(NoAuthError):
+            delete_node()
+    finally:
+        try:
+            stop_zk_connection(
+                node_zk,
+            )
+        except:
+            pass

From 1145dbf7cbfd666077401bde5c81e40ff93fb10f Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 17 Oct 2025 20:13:28 +0000
Subject: [PATCH 031/112] Backport #88746 to 25.8: Startup clickhouse-keeper on
 boot

---
 packages/clickhouse-keeper.postinstall | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/packages/clickhouse-keeper.postinstall b/packages/clickhouse-keeper.postinstall
index 2b4f303b6849..aab4596be52f 100644
--- a/packages/clickhouse-keeper.postinstall
+++ b/packages/clickhouse-keeper.postinstall
@@ -28,6 +28,25 @@ if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
             "${KEEPER_USER}"
     fi
 
+    if [ -x "/bin/systemctl" ] && [ -f /lib/systemd/system/clickhouse-keeper.service ] && [ -d /run/systemd/system ]; then
+        # if old rc.d service present - remove it
+        if [ -x "/etc/init.d/clickhouse-keeper" ] && [ -x "/usr/sbin/update-rc.d" ]; then
+            /usr/sbin/update-rc.d clickhouse-keeper remove
+        fi
+
+        /bin/systemctl daemon-reload
+        /bin/systemctl enable clickhouse-keeper
+    else
+        # If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-keeper
+        if [ -x "/etc/init.d/clickhouse-keeper" ]; then
+            if [ -x "/usr/sbin/update-rc.d" ]; then
+                /usr/sbin/update-rc.d clickhouse-keeper defaults 19 19 >/dev/null || exit $?
+            else
+                echo # Other OS
+            fi
+        fi
+    fi
+
     chown -R "${KEEPER_USER}:${KEEPER_GROUP}" "${KEEPER_CONFDIR}"
     chmod 0755 "${KEEPER_CONFDIR}"
 

From 0e23fd010115f6f0f1585694eaf5804acea6aa56 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Sat, 18 Oct 2025 12:15:56 +0000
Subject: [PATCH 032/112] Backport #88617 to 25.8: cleanup temporary table
 entry from snapshot_detached_tables during table drop

---
 src/Databases/DatabaseMemory.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp
index 266de24116b4..34e1c04491ba 100644
--- a/src/Databases/DatabaseMemory.cpp
+++ b/src/Databases/DatabaseMemory.cpp
@@ -87,6 +87,7 @@ void DatabaseMemory::dropTable(
     std::lock_guard lock{mutex};
     table->is_dropped = true;
     create_queries.erase(table_name);
+    snapshot_detached_tables.erase(table_name);
     UUID table_uuid = table->getStorageID().uuid;
     if (table_uuid != UUIDHelpers::Nil)
         DatabaseCatalog::instance().removeUUIDMappingFinally(table_uuid);

From c37f56ce1e72476950b50e5173f885a44baf7eb7 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 21 Oct 2025 10:12:53 +0000
Subject: [PATCH 033/112] Backport #88814 to 25.8: Catch exceptions when async
 logging fails to prevent program aborts

---
 src/Loggers/OwnSplitChannel.cpp | 107 ++++++++++++++++++++++----------
 1 file changed, 73 insertions(+), 34 deletions(-)

diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp
index 749d8c8ac63f..dd2e6e9de74a 100644
--- a/src/Loggers/OwnSplitChannel.cpp
+++ b/src/Loggers/OwnSplitChannel.cpp
@@ -487,20 +487,40 @@ void OwnAsyncSplitChannel::runChannel(size_t i)
 
     while (is_open)
     {
-        log_notification(notification);
-        notification = queues[i]->waitDequeueMessage();
+        try
+        {
+            log_notification(notification);
+            notification = queues[i]->waitDequeueMessage();
+        }
+        catch (...)
+        {
+            const std::string & exception_message = getCurrentExceptionMessage(true);
+            writeRetry(STDERR_FILENO, "Cannot log message in OwnAsyncSplitChannel channel: ");
+            writeRetry(STDERR_FILENO, exception_message.data(), exception_message.size());
+            writeRetry(STDERR_FILENO, "\n");
+        }
     }
 
-    /// Flush everything before closing
-    log_notification(notification);
-
-    /// We want to process only what's currently in the queue and not block other logging
-    auto queue = queues[i]->getCurrentQueueAndClear();
-    while (!queue.empty())
+    try
     {
-        notification = queue.front();
-        queue.pop_front();
+        /// Flush everything before closing
         log_notification(notification);
+
+        /// We want to process only what's currently in the queue and not block other logging
+        auto queue = queues[i]->getCurrentQueueAndClear();
+        while (!queue.empty())
+        {
+            notification = queue.front();
+            queue.pop_front();
+            log_notification(notification);
+        }
+    }
+    catch (...)
+    {
+        const std::string & exception_message = getCurrentExceptionMessage(true);
+        writeRetry(STDERR_FILENO, "Cannot flush messages in OwnAsyncSplitChannel channel: ");
+        writeRetry(STDERR_FILENO, exception_message.data(), exception_message.size());
+        writeRetry(STDERR_FILENO, "\n");
     }
 }
 
@@ -530,40 +550,59 @@ void OwnAsyncSplitChannel::runTextLog()
     auto notification = text_log_queue.waitDequeueMessage();
     while (is_open)
     {
-        if (flush_text_logs)
+        try
         {
-            auto text_log_locked = text_log.lock();
-            if (!text_log_locked)
-                return;
+            if (flush_text_logs)
+            {
+                auto text_log_locked = text_log.lock();
+                if (!text_log_locked)
+                    return;
 
-            if (notification)
-                log_notification(notification, text_log_locked);
+                if (notification)
+                    log_notification(notification, text_log_locked);
 
-            flush_queue(text_log_locked);
+                flush_queue(text_log_locked);
 
-            flush_text_logs = false;
-            flush_text_logs.notify_all();
+                flush_text_logs = false;
+                flush_text_logs.notify_all();
+            }
+            else if (notification)
+            {
+                auto text_log_locked = text_log.lock();
+                if (!text_log_locked)
+                    return;
+                log_notification(notification, text_log_locked);
+            }
+
+            notification = text_log_queue.waitDequeueMessage();
         }
-        else if (notification)
+        catch (...)
         {
-            auto text_log_locked = text_log.lock();
-            if (!text_log_locked)
-                return;
-            log_notification(notification, text_log_locked);
+            const std::string & exception_message = getCurrentExceptionMessage(true);
+            writeRetry(STDERR_FILENO, "Cannot log message in OwnAsyncSplitChannel text log: ");
+            writeRetry(STDERR_FILENO, exception_message.data(), exception_message.size());
+            writeRetry(STDERR_FILENO, "\n");
         }
-
-        notification = text_log_queue.waitDequeueMessage();
     }
 
-    /// We want to flush everything already in the queue before closing so all messages are logged
-    auto text_log_locked = text_log.lock();
-    if (!text_log_locked)
-        return;
-
-    if (notification)
-        log_notification(notification, text_log_locked);
+    try
+    {
+        /// We want to flush everything already in the queue before closing so all messages are logged
+        auto text_log_locked = text_log.lock();
+        if (!text_log_locked)
+            return;
 
-    flush_queue(text_log_locked);
+        if (notification)
+            log_notification(notification, text_log_locked);
+        flush_queue(text_log_locked);
+    }
+    catch (...)
+    {
+        const std::string & exception_message = getCurrentExceptionMessage(true);
+        writeRetry(STDERR_FILENO, "Cannot flush queue in OwnAsyncSplitChannel text log: ");
+        writeRetry(STDERR_FILENO, exception_message.data(), exception_message.size());
+        writeRetry(STDERR_FILENO, "\n");
+    }
 }
 
 void OwnAsyncSplitChannel::setChannelProperty(const std::string & channel_name, const std::string & name, const std::string & value)

From 5c7cc0157a9fbddaf6edb48382005b0f3e82816f Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 21 Oct 2025 12:18:03 +0000
Subject: [PATCH 034/112] Backport #87789 to 25.8: Fix Insert Select with CTE

---
 src/Interpreters/InterpreterInsertQuery.cpp     |  7 +++++++
 .../03632_insert_select_cte_bug.reference       |  1 +
 .../0_stateless/03632_insert_select_cte_bug.sql | 17 +++++++++++++++++
 3 files changed, 25 insertions(+)
 create mode 100644 tests/queries/0_stateless/03632_insert_select_cte_bug.reference
 create mode 100644 tests/queries/0_stateless/03632_insert_select_cte_bug.sql

diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index a0e9c1b6cccb..619a2799a0bf 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -9,6 +9,8 @@
 #include <Core/ServerSettings.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <IO/ReadBuffer.h>
+#include <Interpreters/ApplyWithAliasVisitor.h>
+#include <Interpreters/ApplyWithSubqueryVisitor.h>
 #include <Interpreters/DatabaseCatalog.h>
 #include <Interpreters/InterpreterSelectWithUnionQuery.h>
 #include <Interpreters/InterpreterWatchQuery.h>
@@ -80,6 +82,7 @@ namespace Setting
     extern const SettingsBool async_query_sending_for_remote;
     extern const SettingsBool async_socket_for_remote;
     extern const SettingsUInt64 max_distributed_depth;
+    extern const SettingsBool enable_global_with_statement;
 }
 
 namespace MergeTreeSetting
@@ -760,6 +763,10 @@ InterpreterInsertQuery::distributedWriteIntoReplicatedMergeTreeFromClusterStorag
     {
         if (auto * select_query = select.list_of_selects->children.at(0)->as<ASTSelectQuery>())
         {
+            if (local_context->getSettingsRef()[Setting::enable_global_with_statement])
+                ApplyWithAliasVisitor::visit(select.list_of_selects->children.at(0));
+            ApplyWithSubqueryVisitor(local_context).visit(select.list_of_selects->children.at(0));
+
             JoinedTables joined_tables(Context::createCopy(local_context), *select_query);
             if (joined_tables.tablesCount() == 1)
                 src_storage = joined_tables.getLeftTableStorage();
diff --git a/tests/queries/0_stateless/03632_insert_select_cte_bug.reference b/tests/queries/0_stateless/03632_insert_select_cte_bug.reference
new file mode 100644
index 000000000000..d00491fd7e5b
--- /dev/null
+++ b/tests/queries/0_stateless/03632_insert_select_cte_bug.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/03632_insert_select_cte_bug.sql b/tests/queries/0_stateless/03632_insert_select_cte_bug.sql
new file mode 100644
index 000000000000..0692f5d6b375
--- /dev/null
+++ b/tests/queries/0_stateless/03632_insert_select_cte_bug.sql
@@ -0,0 +1,17 @@
+SET enable_analyzer=1; -- parallel distributed insert select for replicated tables works only with analyzer
+SET parallel_distributed_insert_select=2;
+SET enable_global_with_statement=1;
+
+DROP TABLE IF EXISTS test_insert SYNC;
+
+CREATE TABLE test_insert (c1 String, c2 UInt8)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_03632/tables/test_insert', '{replica}')
+ORDER BY ();
+
+INSERT INTO test_insert
+WITH cte_test AS (SELECT '1234', 1)
+SELECT * FROM cte_test;
+
+SELECT count() FROM test_insert;
+
+DROP TABLE test_insert;

From 27f3f97c73bb7aafb9e1d4ab9f4a973cda784043 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 21 Oct 2025 12:19:39 +0000
Subject: [PATCH 035/112] Backport #86184 to 25.8: Fix inferring
 Date/DateTime/DateTime64 on dates that are out of supported range

---
 src/Common/DateLUT.cpp                        | 11 ++++
 src/Common/DateLUT.h                          |  2 +
 src/Common/DateLUTImpl.h                      | 40 +++++++++++++
 src/IO/ReadHelpers.cpp                        | 21 ++++++-
 src/IO/ReadHelpers.h                          | 52 ++++++++++++++---
 src/IO/parseDateTimeBestEffort.cpp            | 48 ++++++++++-----
 .../test_storage_kafka/test_batch_fast.py     |  2 +-
 .../01186_conversion_to_nullable.reference    |  6 +-
 .../01556_accurate_cast_or_null.reference     |  6 +-
 tests/queries/0_stateless/02404_data.CSV      | 10 ----
 .../0_stateless/02404_data.CSVWithNames       | 11 ----
 .../0_stateless/02404_data.CustomSeparated    | 10 ----
 .../0_stateless/02404_data.JSONCompactEachRow | 10 ----
 .../0_stateless/02404_data.JSONEachRow        | 10 ----
 tests/queries/0_stateless/02404_data.TSKV     | 10 ----
 tests/queries/0_stateless/02404_data.TSV      | 10 ----
 .../0_stateless/02404_data.TSVWithNames       | 11 ----
 tests/queries/0_stateless/02404_data.Values   |  1 -
 ...rence_cache_respect_format_settings.sql.j2 |  1 +
 .../03149_asof_join_ddb_timestamps.reference  |  8 +--
 ...bad_date_and_datetimes_inference.reference | 49 ++++++++++++++++
 ...03599_bad_date_and_datetimes_inference.sql | 58 +++++++++++++++++++
 22 files changed, 266 insertions(+), 121 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02404_data.CSV
 delete mode 100644 tests/queries/0_stateless/02404_data.CSVWithNames
 delete mode 100644 tests/queries/0_stateless/02404_data.CustomSeparated
 delete mode 100644 tests/queries/0_stateless/02404_data.JSONCompactEachRow
 delete mode 100644 tests/queries/0_stateless/02404_data.JSONEachRow
 delete mode 100644 tests/queries/0_stateless/02404_data.TSKV
 delete mode 100644 tests/queries/0_stateless/02404_data.TSV
 delete mode 100644 tests/queries/0_stateless/02404_data.TSVWithNames
 delete mode 100644 tests/queries/0_stateless/02404_data.Values
 create mode 100644 tests/queries/0_stateless/03599_bad_date_and_datetimes_inference.reference
 create mode 100644 tests/queries/0_stateless/03599_bad_date_and_datetimes_inference.sql

diff --git a/src/Common/DateLUT.cpp b/src/Common/DateLUT.cpp
index ba7dd6602e48..39e8044f36d6 100644
--- a/src/Common/DateLUT.cpp
+++ b/src/Common/DateLUT.cpp
@@ -209,6 +209,11 @@ ExtendedDayNum makeDayNum(const DateLUTImpl & date_lut, Int16 year, UInt8 month,
     return date_lut.makeDayNum(year, month, day_of_month, default_error_day_num);
 }
 
+std::optional<ExtendedDayNum> tryToMakeDayNum(const DateLUTImpl & date_lut, Int16 year, UInt8 month, UInt8 day_of_month)
+{
+    return date_lut.tryToMakeDayNum(year, month, day_of_month);
+}
+
 Int64 makeDate(const DateLUTImpl & date_lut, Int16 year, UInt8 month, UInt8 day_of_month)
 {
     static_assert(std::same_as<Int64, DateLUTImpl::Time>);
@@ -221,6 +226,12 @@ Int64 makeDateTime(const DateLUTImpl & date_lut, Int16 year, UInt8 month, UInt8
     return date_lut.makeDateTime(year, month, day_of_month, hour, minute, second);
 }
 
+std::optional<Int64> tryToMakeDateTime(const DateLUTImpl & date_lut, Int16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second)
+{
+    static_assert(std::same_as<Int64, DateLUTImpl::Time>);
+    return date_lut.tryToMakeDateTime(year, month, day_of_month, hour, minute, second);
+}
+
 const std::string & getDateLUTTimeZone(const DateLUTImpl & date_lut)
 {
     return date_lut.getTimeZone();
diff --git a/src/Common/DateLUT.h b/src/Common/DateLUT.h
index 72992e3c70d2..ff025fd8036b 100644
--- a/src/Common/DateLUT.h
+++ b/src/Common/DateLUT.h
@@ -87,9 +87,11 @@ inline UInt64 timeInNanoseconds(std::chrono::time_point<std::chrono::system_cloc
 /// A few helper functions to avoid having to include DateLUTImpl.h in some heavy headers
 
 ExtendedDayNum makeDayNum(const DateLUTImpl & date_lut, Int16 year, UInt8 month, UInt8 day_of_month, Int32 default_error_day_num = 0);
+std::optional<ExtendedDayNum> tryToMakeDayNum(const DateLUTImpl & date_lut, Int16 year, UInt8 month, UInt8 day_of_month);
 
 Int64 makeDate(const DateLUTImpl & date_lut, Int16 year, UInt8 month, UInt8 day_of_month);
 Int64 makeDateTime(const DateLUTImpl & date_lut, Int16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second);
+std::optional<Int64> tryToMakeDateTime(const DateLUTImpl & date_lut, Int16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second);
 
 const std::string & getDateLUTTimeZone(const DateLUTImpl & date_lut);
 UInt32 getDayNumOffsetEpoch();
diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h
index 5c08916f8095..b9c058b60a85 100644
--- a/src/Common/DateLUTImpl.h
+++ b/src/Common/DateLUTImpl.h
@@ -1177,6 +1177,20 @@ class DateLUTImpl
         return LUTIndex{std::min(index, static_cast<UInt32>(DATE_LUT_SIZE - 1))};
     }
 
+    std::optional<LUTIndex> tryToMakeLUTIndex(Int16 year, UInt8 month, UInt8 day_of_month) const
+    {
+        if (unlikely(year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31))
+            return std::nullopt;
+
+        auto year_lut_index = (year - DATE_LUT_MIN_YEAR) * 12 + month - 1;
+        UInt32 index = years_months_lut[year_lut_index].toUnderType() + day_of_month - 1;
+
+        if (index >= DATE_LUT_SIZE)
+            return std::nullopt;
+
+        return LUTIndex(index);
+    }
+
     /// Create DayNum from year, month, day of month.
     ExtendedDayNum makeDayNum(Int16 year, UInt8 month, UInt8 day_of_month, Int32 default_error_day_num = 0) const
     {
@@ -1186,6 +1200,18 @@ class DateLUTImpl
         return toDayNum(makeLUTIndex(year, month, day_of_month));
     }
 
+    std::optional<ExtendedDayNum> tryToMakeDayNum(Int16 year, UInt8 month, UInt8 day_of_month) const
+    {
+        if (unlikely(year < DATE_LUT_MIN_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31))
+            return std::nullopt;
+
+        auto index = tryToMakeLUTIndex(year, month, day_of_month);
+        if (!index)
+            return std::nullopt;
+
+        return toDayNum(*index);
+    }
+
     Time makeDate(Int16 year, UInt8 month, UInt8 day_of_month) const
     {
         return lut[makeLUTIndex(year, month, day_of_month)].date;
@@ -1204,6 +1230,20 @@ class DateLUTImpl
         return lut[index].date + time_offset;
     }
 
+    std::optional<Time> tryToMakeDateTime(Int16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second) const
+    {
+        auto index = tryToMakeLUTIndex(year, month, day_of_month);
+        if (!index)
+            return std::nullopt;
+
+        Time time_offset = hour * 3600 + minute * 60 + second;
+
+        if (time_offset >= lut[*index].time_at_offset_change())
+            time_offset -= lut[*index].amount_of_offset_change();
+
+        return lut[*index].date + time_offset;
+    }
+
     Time makeTime(int64_t hour, UInt8 minute, UInt8 second) const
     {
         Time time_offset = hour * 3600 + minute * 60 + second;
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index 7d0852d270c7..c3575ca52bc6 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -1567,10 +1567,25 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
             second = (s[6] - '0') * 10 + (s[7] - '0');
         }
 
-        if (unlikely(year == 0))
-            datetime = 0;
+        if constexpr (throw_exception)
+        {
+            if (unlikely(year == 0))
+                datetime = 0;
+            else
+                datetime = makeDateTime(date_lut, year, month, day, hour, minute, second);
+        }
         else
-            datetime = makeDateTime(date_lut, year, month, day, hour, minute, second);
+        {
+            auto datetime_maybe = tryToMakeDateTime(date_lut, year, month, day, hour, minute, second);
+            if (!datetime_maybe)
+                return false;
+
+            /// For usual DateTime check if value is within supported range
+            if (!dt64_mode && (*datetime_maybe < 0 || *datetime_maybe > UINT32_MAX))
+                return false;
+
+            datetime = *datetime_maybe;
+        }
     }
     else
     {
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index 63d951547e86..058799acf457 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -609,6 +609,15 @@ inline void convertToDayNum(DayNum & date, ExtendedDayNum & from)
         date = from;
 }
 
+inline bool tryToConvertToDayNum(DayNum & date, ExtendedDayNum & from)
+{
+    if (unlikely(from < 0 || from > 0xFFFF))
+        return false;
+
+    date = from;
+    return true;
+}
+
 template <typename ReturnType = void>
 inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_delimiters = nullptr)
 {
@@ -617,13 +626,25 @@ inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLU
     LocalDate local_date;
 
     if constexpr (throw_exception)
+    {
         readDateTextImpl<ReturnType>(local_date, buf, allowed_delimiters);
-    else if (!readDateTextImpl<ReturnType>(local_date, buf, allowed_delimiters))
-        return false;
+        ExtendedDayNum ret = makeDayNum(date_lut, local_date.year(), local_date.month(), local_date.day());
+        convertToDayNum(date, ret);
+    }
+    else
+    {
+        if (!readDateTextImpl<ReturnType>(local_date, buf, allowed_delimiters))
+            return false;
 
-    ExtendedDayNum ret = makeDayNum(date_lut, local_date.year(), local_date.month(), local_date.day());
-    convertToDayNum(date, ret);
-    return ReturnType(true);
+        auto ret = tryToMakeDayNum(date_lut, local_date.year(), local_date.month(), local_date.day());
+        if (!ret)
+            return false;
+
+        if (!tryToConvertToDayNum(date, *ret))
+            return false;
+
+        return true;
+    }
 }
 
 template <typename ReturnType = void>
@@ -868,10 +889,25 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons
                 second = (s[17] - '0') * 10 + (s[18] - '0');
             }
 
-            if (unlikely(year == 0))
-                datetime = 0;
+            if constexpr (throw_exception)
+            {
+                if (unlikely(year == 0))
+                    datetime = 0;
+                else
+                    datetime = makeDateTime(date_lut, year, month, day, hour, minute, second);
+            }
             else
-                datetime = makeDateTime(date_lut, year, month, day, hour, minute, second);
+            {
+                auto datetime_maybe = tryToMakeDateTime(date_lut, year, month, day, hour, minute, second);
+                if (!datetime_maybe)
+                    return false;
+
+                /// For usual DateTime check if value is within supported range
+                if (!dt64_mode && (*datetime_maybe < 0 || *datetime_maybe > UINT32_MAX))
+                    return false;
+
+                datetime = *datetime_maybe;
+            }
 
             if (dt_long)
                 buf.position() += date_time_broken_down_length;
diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp
index cb92ab49e2c2..f4981482df2f 100644
--- a/src/IO/parseDateTimeBestEffort.cpp
+++ b/src/IO/parseDateTimeBestEffort.cpp
@@ -753,31 +753,49 @@ ReturnType parseDateTimeBestEffortImpl(
         }
     };
 
-    if constexpr (strict)
+    if constexpr (std::is_same_v<ReturnType, void>)
     {
-        if constexpr (is_64)
+        if (has_time_zone_offset)
         {
-            if (year < 1900)
-                return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime64: year {} is less than minimum supported year 1900", year);
+            res = utc_time_zone.makeDateTime(year, month, day_of_month, hour, minute, second);
+            adjust_time_zone();
         }
         else
         {
-            if (year < 1970)
-                return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: year {} is less than minimum supported year 1970", year);
+            res = local_time_zone.makeDateTime(year, month, day_of_month, hour, minute, second);
         }
     }
-
-    if (has_time_zone_offset)
-    {
-        res = utc_time_zone.makeDateTime(year, month, day_of_month, hour, minute, second);
-        adjust_time_zone();
-    }
     else
     {
-        res = local_time_zone.makeDateTime(year, month, day_of_month, hour, minute, second);
-    }
 
-    return ReturnType(true);
+        if (has_time_zone_offset)
+        {
+            auto res_maybe = utc_time_zone.tryToMakeDateTime(year, month, day_of_month, hour, minute, second);
+            if (!res_maybe)
+                return false;
+
+            /// For usual DateTime check if value is within supported range
+            if (!is_64 && (*res_maybe < 0 || *res_maybe > UINT32_MAX))
+                return false;
+
+            res = *res_maybe;
+            adjust_time_zone();
+        }
+        else
+        {
+            auto res_maybe = local_time_zone.tryToMakeDateTime(year, month, day_of_month, hour, minute, second);
+            if (!res_maybe)
+                return false;
+
+            /// For usual DateTime check if value is within supported range
+            if (!is_64 && (*res_maybe < 0 || *res_maybe > UINT32_MAX))
+                return false;
+
+            res = *res_maybe;
+        }
+
+        return true;
+    }
 }
 
 template <typename ReturnType, bool is_us_style, bool strict = false>
diff --git a/tests/integration/test_storage_kafka/test_batch_fast.py b/tests/integration/test_storage_kafka/test_batch_fast.py
index db0f6c51074c..8ecd5a7d49d3 100644
--- a/tests/integration/test_storage_kafka/test_batch_fast.py
+++ b/tests/integration/test_storage_kafka/test_batch_fast.py
@@ -1368,7 +1368,7 @@ def test_kafka_virtual_columns_with_materialized_view(
                 ENGINE = MergeTree()
                 ORDER BY key;
             CREATE MATERIALIZED VIEW test.consumer TO test.view AS
-                SELECT *, _key as kafka_key, _topic as topic, _offset as offset, _partition as partition, _timestamp = 0 ? '0000-00-00 00:00:00' : toString(_timestamp) as timestamp FROM test.kafka;
+                SELECT *, _key as kafka_key, _topic as topic, _offset as offset, _partition as partition, _timestamp = 0 ? '1970-01-01 00:00:00' : toString(_timestamp) as timestamp FROM test.kafka;
         """
         )
 
diff --git a/tests/queries/0_stateless/01186_conversion_to_nullable.reference b/tests/queries/0_stateless/01186_conversion_to_nullable.reference
index e4c1fd7c40bf..b05ab29a9952 100644
--- a/tests/queries/0_stateless/01186_conversion_to_nullable.reference
+++ b/tests/queries/0_stateless/01186_conversion_to_nullable.reference
@@ -10,12 +10,12 @@
 256
 2020-12-24
 \N
-1970-01-01
 \N
-2149-06-06
+\N
+\N
 2020-12-24 01:02:03
 \N
-1970-01-01 02:00:00
+\N
 \N
 2020-12-24 01:02:03.00
 \N
diff --git a/tests/queries/0_stateless/01556_accurate_cast_or_null.reference b/tests/queries/0_stateless/01556_accurate_cast_or_null.reference
index 5187a19cc726..6b5e76bcd42a 100644
--- a/tests/queries/0_stateless/01556_accurate_cast_or_null.reference
+++ b/tests/queries/0_stateless/01556_accurate_cast_or_null.reference
@@ -36,13 +36,13 @@
 2023-05-30 14:38:20
 1970-01-01 00:00:19
 1970-01-01 19:26:40
-1970-01-01 00:00:00
-2106-02-07 06:28:15
+\N
+\N
 \N
 \N
 \N
 2023-05-30
-2149-06-06
+\N
 1970-01-20
 \N
 \N
diff --git a/tests/queries/0_stateless/02404_data.CSV b/tests/queries/0_stateless/02404_data.CSV
deleted file mode 100644
index 2d8b5c8daa8f..000000000000
--- a/tests/queries/0_stateless/02404_data.CSV
+++ /dev/null
@@ -1,10 +0,0 @@
-0,"1970-01-01"
-1,"1970-01-02"
-2,"1970-01-03"
-3,"1970-01-04"
-4,"1970-01-05"
-5,"1970-01-06"
-6,"1970-01-07"
-7,"1970-01-08"
-8,"1970-01-09"
-9,"1970-01-10"
diff --git a/tests/queries/0_stateless/02404_data.CSVWithNames b/tests/queries/0_stateless/02404_data.CSVWithNames
deleted file mode 100644
index 346470089162..000000000000
--- a/tests/queries/0_stateless/02404_data.CSVWithNames
+++ /dev/null
@@ -1,11 +0,0 @@
-"number","toDate(number)"
-0,"1970-01-01"
-1,"1970-01-02"
-2,"1970-01-03"
-3,"1970-01-04"
-4,"1970-01-05"
-5,"1970-01-06"
-6,"1970-01-07"
-7,"1970-01-08"
-8,"1970-01-09"
-9,"1970-01-10"
diff --git a/tests/queries/0_stateless/02404_data.CustomSeparated b/tests/queries/0_stateless/02404_data.CustomSeparated
deleted file mode 100644
index f3ae1663536d..000000000000
--- a/tests/queries/0_stateless/02404_data.CustomSeparated
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1970-01-01
-1	1970-01-02
-2	1970-01-03
-3	1970-01-04
-4	1970-01-05
-5	1970-01-06
-6	1970-01-07
-7	1970-01-08
-8	1970-01-09
-9	1970-01-10
diff --git a/tests/queries/0_stateless/02404_data.JSONCompactEachRow b/tests/queries/0_stateless/02404_data.JSONCompactEachRow
deleted file mode 100644
index de2e0986aab5..000000000000
--- a/tests/queries/0_stateless/02404_data.JSONCompactEachRow
+++ /dev/null
@@ -1,10 +0,0 @@
-["0", "1970-01-01"]
-["1", "1970-01-02"]
-["2", "1970-01-03"]
-["3", "1970-01-04"]
-["4", "1970-01-05"]
-["5", "1970-01-06"]
-["6", "1970-01-07"]
-["7", "1970-01-08"]
-["8", "1970-01-09"]
-["9", "1970-01-10"]
diff --git a/tests/queries/0_stateless/02404_data.JSONEachRow b/tests/queries/0_stateless/02404_data.JSONEachRow
deleted file mode 100644
index e77256ac7fc1..000000000000
--- a/tests/queries/0_stateless/02404_data.JSONEachRow
+++ /dev/null
@@ -1,10 +0,0 @@
-{"number":"0","toDate(number)":"1970-01-01"}
-{"number":"1","toDate(number)":"1970-01-02"}
-{"number":"2","toDate(number)":"1970-01-03"}
-{"number":"3","toDate(number)":"1970-01-04"}
-{"number":"4","toDate(number)":"1970-01-05"}
-{"number":"5","toDate(number)":"1970-01-06"}
-{"number":"6","toDate(number)":"1970-01-07"}
-{"number":"7","toDate(number)":"1970-01-08"}
-{"number":"8","toDate(number)":"1970-01-09"}
-{"number":"9","toDate(number)":"1970-01-10"}
diff --git a/tests/queries/0_stateless/02404_data.TSKV b/tests/queries/0_stateless/02404_data.TSKV
deleted file mode 100644
index 70f7ad33c8b7..000000000000
--- a/tests/queries/0_stateless/02404_data.TSKV
+++ /dev/null
@@ -1,10 +0,0 @@
-number=0	toDate(number)=1970-01-01
-number=1	toDate(number)=1970-01-02
-number=2	toDate(number)=1970-01-03
-number=3	toDate(number)=1970-01-04
-number=4	toDate(number)=1970-01-05
-number=5	toDate(number)=1970-01-06
-number=6	toDate(number)=1970-01-07
-number=7	toDate(number)=1970-01-08
-number=8	toDate(number)=1970-01-09
-number=9	toDate(number)=1970-01-10
diff --git a/tests/queries/0_stateless/02404_data.TSV b/tests/queries/0_stateless/02404_data.TSV
deleted file mode 100644
index f3ae1663536d..000000000000
--- a/tests/queries/0_stateless/02404_data.TSV
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1970-01-01
-1	1970-01-02
-2	1970-01-03
-3	1970-01-04
-4	1970-01-05
-5	1970-01-06
-6	1970-01-07
-7	1970-01-08
-8	1970-01-09
-9	1970-01-10
diff --git a/tests/queries/0_stateless/02404_data.TSVWithNames b/tests/queries/0_stateless/02404_data.TSVWithNames
deleted file mode 100644
index 23310234a8cf..000000000000
--- a/tests/queries/0_stateless/02404_data.TSVWithNames
+++ /dev/null
@@ -1,11 +0,0 @@
-number	toDate(number)
-0	1970-01-01
-1	1970-01-02
-2	1970-01-03
-3	1970-01-04
-4	1970-01-05
-5	1970-01-06
-6	1970-01-07
-7	1970-01-08
-8	1970-01-09
-9	1970-01-10
diff --git a/tests/queries/0_stateless/02404_data.Values b/tests/queries/0_stateless/02404_data.Values
deleted file mode 100644
index d9a621d7ec9e..000000000000
--- a/tests/queries/0_stateless/02404_data.Values
+++ /dev/null
@@ -1 +0,0 @@
-(0,'1970-01-01'),(1,'1970-01-02'),(2,'1970-01-03'),(3,'1970-01-04'),(4,'1970-01-05'),(5,'1970-01-06'),(6,'1970-01-07'),(7,'1970-01-08'),(8,'1970-01-09'),(9,'1970-01-10')
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02404_schema_inference_cache_respect_format_settings.sql.j2 b/tests/queries/0_stateless/02404_schema_inference_cache_respect_format_settings.sql.j2
index b726321c56dc..909db1f3183d 100644
--- a/tests/queries/0_stateless/02404_schema_inference_cache_respect_format_settings.sql.j2
+++ b/tests/queries/0_stateless/02404_schema_inference_cache_respect_format_settings.sql.j2
@@ -2,6 +2,7 @@
 
 system drop schema cache for file;
 set input_format_json_try_infer_numbers_from_strings=1;
+set session_timezone='UTC';
 {% for format in ['TSV', 'TSVWithNames', 'CSV', 'CSVWithNames', 'TSKV', 'CustomSeparated', 'JSONEachRow', 'JSONCompactEachRow', 'Values'] -%}
 
 select '{{ format }}';
diff --git a/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference
index 7cfc85d23a5d..9447377eabda 100644
--- a/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference
+++ b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference
@@ -7,7 +7,6 @@
 2023-03-21 19:00:00	3
 2023-03-21 20:00:00	3
 2023-03-21 21:00:00	3
-2106-02-07 06:28:15	9
 2023-03-21 13:00:00	0
 2023-03-21 14:00:00	1
 2023-03-21 15:00:00	2
@@ -17,7 +16,6 @@
 2023-03-21 19:00:00	3
 2023-03-21 20:00:00	3
 2023-03-21 21:00:00	3
-2106-02-07 06:28:15	9
 2023-03-21 12:00:00	\N
 2023-03-21 13:00:00	0
 2023-03-21 14:00:00	1
@@ -28,7 +26,7 @@
 2023-03-21 19:00:00	3
 2023-03-21 20:00:00	3
 2023-03-21 21:00:00	3
-2106-02-07 06:28:15	9
+\N	\N
 \N	\N
 2023-03-21 12:00:00	0
 2023-03-21 13:00:00	0
@@ -40,7 +38,7 @@
 2023-03-21 19:00:00	3
 2023-03-21 20:00:00	3
 2023-03-21 21:00:00	3
-2106-02-07 06:28:15	9
+\N	0
 \N	0
 2023-03-21 12:00:00	\N
 2023-03-21 13:00:00	\N
@@ -52,5 +50,5 @@
 2023-03-21 19:00:00	\N
 2023-03-21 20:00:00	\N
 2023-03-21 21:00:00	\N
-2106-02-07 06:28:15	\N
+\N	\N
 \N	\N
diff --git a/tests/queries/0_stateless/03599_bad_date_and_datetimes_inference.reference b/tests/queries/0_stateless/03599_bad_date_and_datetimes_inference.reference
new file mode 100644
index 000000000000..3b2c8b2bd33f
--- /dev/null
+++ b/tests/queries/0_stateless/03599_bad_date_and_datetimes_inference.reference
@@ -0,0 +1,49 @@
+1800-01-01	Nullable(String)
+1800-01-01	Nullable(String)
+1969-12-31 00:00:00.000000000	Nullable(DateTime64(9))
+1969-12-31 00:00:00.000000000	Nullable(DateTime64(9))
+1800-01-01 00:00:00	Nullable(String)
+1800-01-01 00:00:00	Nullable(String)
+1969-12-31 23:59:59.000000000	Nullable(DateTime64(9))
+1969-12-31 23:59:59.000000000	Nullable(DateTime64(9))
+3000-01-01	Nullable(String)
+3000-01-01	Nullable(String)
+2149-06-07 00:00:00.000000000	Nullable(DateTime64(9))
+2149-06-07 00:00:00.000000000	Nullable(DateTime64(9))
+3000-01-01 00:00:00	Nullable(String)
+3000-01-01 00:00:00	Nullable(String)
+2106-02-07 06:28:16.000000000	Nullable(DateTime64(9))
+2106-02-07 06:28:16.000000000	Nullable(DateTime64(9))
+1900-01-01 00:00:00.000000000	Nullable(DateTime64(9))
+1900-01-01 00:00:00.000000000	Nullable(DateTime64(9))
+1900-01-01 00:00:00.000000000	Nullable(DateTime64(9))
+1900-01-01 00:00:00.000000000	Nullable(DateTime64(9))
+1899-12-31 23:59:59	Nullable(String)
+1899-12-31 23:59:59	Nullable(String)
+2300-01-01 00:00:00.000000000	Nullable(String)
+2300-01-01 00:00:00.000000000	Nullable(String)
+----------------------------------------------
+1800-01-01	Nullable(String)
+1800-01-01	Nullable(String)
+1969-12-31 00:00:00.000000000	Nullable(DateTime64(9))
+1969-12-31 00:00:00.000000000	Nullable(DateTime64(9))
+1800-01-01 00:00:00	Nullable(String)
+1800-01-01 00:00:00	Nullable(String)
+1969-12-31 23:59:59.000000000	Nullable(DateTime64(9))
+1969-12-31 23:59:59.000000000	Nullable(DateTime64(9))
+3000-01-01	Nullable(String)
+3000-01-01	Nullable(String)
+2149-06-07 00:00:00.000000000	Nullable(DateTime64(9))
+2149-06-07 00:00:00.000000000	Nullable(DateTime64(9))
+3000-01-01 00:00:00	Nullable(String)
+3000-01-01 00:00:00	Nullable(String)
+2106-02-07 06:28:16.000000000	Nullable(DateTime64(9))
+2106-02-07 06:28:16.000000000	Nullable(DateTime64(9))
+1900-01-01 00:00:00.000000000	Nullable(DateTime64(9))
+1900-01-01 00:00:00.000000000	Nullable(DateTime64(9))
+1900-01-01 00:00:00.000000000	Nullable(DateTime64(9))
+1900-01-01 00:00:00.000000000	Nullable(DateTime64(9))
+1899-12-31 23:59:59	Nullable(String)
+1899-12-31 23:59:59	Nullable(String)
+2300-01-01 00:00:00.000000000	Nullable(String)
+2300-01-01 00:00:00.000000000	Nullable(String)
diff --git a/tests/queries/0_stateless/03599_bad_date_and_datetimes_inference.sql b/tests/queries/0_stateless/03599_bad_date_and_datetimes_inference.sql
new file mode 100644
index 000000000000..94da1e2a1d9c
--- /dev/null
+++ b/tests/queries/0_stateless/03599_bad_date_and_datetimes_inference.sql
@@ -0,0 +1,58 @@
+set date_time_input_format='basic';
+set session_timezone='UTC';
+
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1800-01-01"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1800-01-01", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1969-12-31"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1969-12-31", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1800-01-01 00:00:00"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1800-01-01 00:00:00", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1969-12-31 23:59:59"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1969-12-31 23:59:59", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "3000-01-01"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "3000-01-01", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "2149-06-07"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "2149-06-07", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "3000-01-01 00:00:00"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "3000-01-01 00:00:00", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "2106-02-07 06:28:16"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "2106-02-07 06:28:16", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1900-01-01"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1900-01-01", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1900-01-01 00:00:00"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1900-01-01 00:00:00", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1899-12-31 23:59:59"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1899-12-31 23:59:59", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "2300-01-01 00:00:00.000000000"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "2300-01-01 00:00:00.000000000", "s" : "some string"}');
+
+select '----------------------------------------------';
+
+set date_time_input_format='best_effort';
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1800-01-01"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1800-01-01", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1969-12-31"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1969-12-31", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1800-01-01 00:00:00"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1800-01-01 00:00:00", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1969-12-31 23:59:59"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1969-12-31 23:59:59", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "3000-01-01"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "3000-01-01", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "2149-06-07"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "2149-06-07", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "3000-01-01 00:00:00"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "3000-01-01 00:00:00", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "2106-02-07 06:28:16"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "2106-02-07 06:28:16", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1900-01-01"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1900-01-01", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1900-01-01 00:00:00"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1900-01-01 00:00:00", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1899-12-31 23:59:59"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "1899-12-31 23:59:59", "s" : "some string"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "2300-01-01 00:00:00.000000000"}');
+select d, toTypeName(d) from format(JSONEachRow, '{"d" : "2300-01-01 00:00:00.000000000", "s" : "some string"}');
+
+
+

From 8edd4d4c9b3f9bcf57aa122c9a23c900fb6806ca Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 22 Oct 2025 13:23:34 +0000
Subject: [PATCH 036/112] Backport #88802 to 25.8: Better access validation for
 `TableFunctionLoop`

---
 src/TableFunctions/TableFunctionLoop.cpp      |  2 ++
 ...loop_table_function_access_check.reference |  1 +
 .../03680_loop_table_function_access_check.sh | 25 +++++++++++++++++++
 3 files changed, 28 insertions(+)
 create mode 100644 tests/queries/0_stateless/03680_loop_table_function_access_check.reference
 create mode 100755 tests/queries/0_stateless/03680_loop_table_function_access_check.sh

diff --git a/src/TableFunctions/TableFunctionLoop.cpp b/src/TableFunctions/TableFunctionLoop.cpp
index 258a7c2f3b65..3fe18ea962f0 100644
--- a/src/TableFunctions/TableFunctionLoop.cpp
+++ b/src/TableFunctions/TableFunctionLoop.cpp
@@ -1,4 +1,5 @@
 #include "config.h"
+#include <Access/Common/AccessFlags.h>
 #include <TableFunctions/ITableFunction.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Interpreters/Context.h>
@@ -118,6 +119,7 @@ namespace DB
             storage = database->tryGetTable(loop_table_name, context);
             if (!storage)
                 throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table '{}' not found in database '{}'", loop_table_name, database_name);
+            context->checkAccess(AccessType::SELECT, database_name, loop_table_name);
         }
         else
         {
diff --git a/tests/queries/0_stateless/03680_loop_table_function_access_check.reference b/tests/queries/0_stateless/03680_loop_table_function_access_check.reference
new file mode 100644
index 000000000000..257cc5642cb1
--- /dev/null
+++ b/tests/queries/0_stateless/03680_loop_table_function_access_check.reference
@@ -0,0 +1 @@
+foo
diff --git a/tests/queries/0_stateless/03680_loop_table_function_access_check.sh b/tests/queries/0_stateless/03680_loop_table_function_access_check.sh
new file mode 100755
index 000000000000..cb2f3a08ceaf
--- /dev/null
+++ b/tests/queries/0_stateless/03680_loop_table_function_access_check.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+# Tags: long, no-replicated-database, no-async-insert
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+user="user03631_${CLICKHOUSE_DATABASE}_$RANDOM"
+db=${CLICKHOUSE_DATABASE}
+
+${CLICKHOUSE_CLIENT} <<EOF
+CREATE TABLE $db.test_table (s String) ENGINE = MergeTree ORDER BY s;
+INSERT INTO $db.test_table VALUES ('foo');
+
+DROP USER IF EXISTS $user;
+CREATE USER $user;
+GRANT CREATE TEMPORARY TABLE ON *.* TO $user;
+EOF
+
+${CLICKHOUSE_CLIENT} --user $user --query "SELECT * FROM loop('$db', 'test_table') LIMIT 1; -- { serverError ACCESS_DENIED }";
+${CLICKHOUSE_CLIENT} --query "GRANT SELECT ON $db.test_table TO $user";
+${CLICKHOUSE_CLIENT} --user $user --query "SELECT * FROM loop('$db', 'test_table') LIMIT 1";
+
+${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS $user";

From d1b1b442fc2573ba59638c07ed4a714cfafc400e Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 22 Oct 2025 17:12:46 +0000
Subject: [PATCH 037/112] Backport #88671 to 25.8: Don't try to mark
 non-existing entries as finished when recovering a Replicated database
 replica

---
 src/Databases/DatabaseReplicated.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 3ac52499ed66..f99884741315 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -1620,6 +1620,16 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
     /// It's a very rare case, and it's okay if some queries throw TIMEOUT_EXCEEDED when waiting for all replicas
     if (first_entry_to_mark_finished)
     {
+        /// Skip non-existing entries that were removed a long time ago (if the replica was offline for a long time)
+        Strings all_nodes = current_zookeeper->getChildren(fs::path(zookeeper_path) / "log");
+        std::erase_if(all_nodes, [] (const String & s) { return !startsWith(s, "query-"); });
+        auto oldest_node = std::min_element(all_nodes.begin(), all_nodes.end());
+        if (oldest_node != all_nodes.end())
+        {
+            UInt32 oldest_entry = DDLTaskBase::getLogEntryNumber(*oldest_node);
+            first_entry_to_mark_finished = std::max(oldest_entry, first_entry_to_mark_finished);
+        }
+
         /// If the replica is new and some of the queries applied during recovery
         /// where issued after the replica was created, then other nodes might be
         /// waiting for this node to notify them that the query was applied.

From 3339b7b980c2925abc4a043c3c346c5911c8b37f Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 24 Oct 2025 11:11:20 +0000
Subject: [PATCH 038/112] Backport #88914 to 25.8: Fix bug in `reverseUTF8`

---
 src/Functions/reverseUTF8.cpp                          | 6 +++---
 tests/queries/0_stateless/03699_reverse_utf8.reference | 4 ++++
 tests/queries/0_stateless/03699_reverse_utf8.sql       | 6 ++++++
 3 files changed, 13 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/03699_reverse_utf8.reference
 create mode 100644 tests/queries/0_stateless/03699_reverse_utf8.sql

diff --git a/src/Functions/reverseUTF8.cpp b/src/Functions/reverseUTF8.cpp
index 9da27c85950d..15deed86c256 100644
--- a/src/Functions/reverseUTF8.cpp
+++ b/src/Functions/reverseUTF8.cpp
@@ -46,7 +46,7 @@ struct ReverseUTF8Impl
             ColumnString::Offset j = prev_offset;
             while (j < offsets[i])
             {
-                if (data[j] < 0xBF)
+                if (data[j] < 0xC0)
                 {
                     res_data[offsets[i] + prev_offset - 1 - j] = data[j];
                     j += 1;
@@ -63,8 +63,8 @@ struct ReverseUTF8Impl
                 }
                 else
                 {
-                    res_data[offsets[i] + prev_offset - 1 - j] = data[j];
-                    j += 1;
+                    memcpy(&res_data[offsets[i] + prev_offset - 1 - j - 3], &data[j], 4);
+                    j += 4;
                 }
             }
 
diff --git a/tests/queries/0_stateless/03699_reverse_utf8.reference b/tests/queries/0_stateless/03699_reverse_utf8.reference
new file mode 100644
index 000000000000..3f8ce110699f
--- /dev/null
+++ b/tests/queries/0_stateless/03699_reverse_utf8.reference
@@ -0,0 +1,4 @@
+тівирп
+🌈🇧🇬
+🌈
+ेत्समन
diff --git a/tests/queries/0_stateless/03699_reverse_utf8.sql b/tests/queries/0_stateless/03699_reverse_utf8.sql
new file mode 100644
index 000000000000..9c31b82ef8ac
--- /dev/null
+++ b/tests/queries/0_stateless/03699_reverse_utf8.sql
@@ -0,0 +1,6 @@
+-- The function reverses the sequence of UTF-8 code points (that is different from bytes or full characters):
+
+SELECT reverseUTF8('привіт');
+SELECT reverseUTF8('🇬🇧🌈');
+SELECT reverseUTF8('🌈');
+SELECT reverseUTF8('नमस्ते');

From ee93c6bb4da529273b1f7a58267033aa66921e22 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 24 Oct 2025 12:16:59 +0000
Subject: [PATCH 039/112] Backport #88828 to 25.8: Revert "Revert "Fix
 coalescing merge tree for tuple types""

---
 .../Algorithms/SummingSortedAlgorithm.cpp     | 92 ++++++++++++++++---
 ...scing_merge_tree_fix_empty_tuple.reference |  6 ++
 ..._coalescing_merge_tree_fix_empty_tuple.sql | 40 ++++++++
 3 files changed, 126 insertions(+), 12 deletions(-)
 create mode 100644 tests/queries/0_stateless/03652_coalescing_merge_tree_fix_empty_tuple.reference
 create mode 100644 tests/queries/0_stateless/03652_coalescing_merge_tree_fix_empty_tuple.sql

diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
index 7fbb7ae75de3..aa9b0525931d 100644
--- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
@@ -1,12 +1,15 @@
+#include <memory>
 #include <Processors/Merges/Algorithms/SummingSortedAlgorithm.h>
 
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <Columns/ColumnAggregateFunction.h>
 #include <Columns/ColumnTuple.h>
+#include <Common/Exception.h>
 #include <Common/AlignedBuffer.h>
 #include <Common/Arena.h>
 #include <Common/FieldVisitorSum.h>
 #include <Common/StringUtils.h>
+#include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeCustomSimpleAggregateFunction.h>
 #include <DataTypes/NestedUtils.h>
@@ -51,7 +54,7 @@ struct SummingSortedAlgorithm::AggregateDescription
     bool is_agg_func_type = false;
     bool is_simple_agg_func_type = false;
     bool remove_default_values;
-    bool aggregate_all_columns;
+    bool aggregate_all_columns = false;
 
     String sum_function_map_name;
 
@@ -257,6 +260,45 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns(
         const ColumnWithTypeAndName & column = header.safeGetByPosition(i);
 
         const auto * simple = dynamic_cast<const DataTypeCustomSimpleAggregateFunction *>(column.type->getCustomName());
+        bool is_non_empty_tuple = typeid_cast<const DataTypeTuple *>(column.type.get()) && !typeid_cast<const DataTypeTuple *>(column.type.get())->getElements().empty();
+        if (aggregate_all_columns && (is_non_empty_tuple || typeid_cast<const DataTypeArray *>(column.type.get())) && !simple)
+        {
+            const auto map_name = Nested::extractTableName(column.name);
+            /// if nested table name ends with `Map` it is a possible candidate for special handling
+            if (map_name == column.name || !endsWith(map_name, "Map"))
+            {
+                bool is_agg_func = WhichDataType(column.type).isAggregateFunction();
+
+                SummingSortedAlgorithm::AggregateDescription desc;
+                desc.remove_default_values = remove_default_values;
+                desc.aggregate_all_columns = aggregate_all_columns;
+                desc.sum_function_map_name = sum_function_map_name;
+                desc.is_agg_func_type = is_agg_func;
+                desc.column_numbers = {i};
+
+                desc.real_type = column.type;
+                desc.nested_type = recursiveRemoveLowCardinality(desc.real_type);
+                if (desc.real_type.get() == desc.nested_type.get())
+                    desc.nested_type = nullptr;
+
+                if (simple)
+                {
+                    // simple aggregate function
+                    desc.init(simple->getFunction(), true);
+                    if (desc.function->allocatesMemoryInArena())
+                        def.allocates_memory_in_arena = true;
+                }
+                else if (!is_agg_func)
+                {
+                    desc.init(sum_function_name.c_str(), {column.type});
+                }
+
+                def.columns_to_aggregate.emplace_back(std::move(desc));
+            }
+
+            continue;
+        }
+
         if (column.name == BlockNumberColumn::name || column.name == BlockOffsetColumn::name)
         {
             def.column_numbers_not_to_aggregate.push_back(i);
@@ -281,7 +323,15 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns(
             bool is_agg_func = WhichDataType(column.type).isAggregateFunction();
 
             /// There are special const columns for example after prewhere sections.
-            if (!aggregate_all_columns && ((!column.type->isSummable() && !is_agg_func && !simple) || isColumnConst(*column.column)))
+            if (!aggregate_all_columns)
+            {
+                if ((!column.type->isSummable() && !is_agg_func && !simple) || isColumnConst(*column.column))
+                {
+                    def.column_numbers_not_to_aggregate.push_back(i);
+                    continue;
+                }
+            }
+            else if (column.type->getTypeId() == TypeIndex::Tuple)
             {
                 def.column_numbers_not_to_aggregate.push_back(i);
                 continue;
@@ -452,10 +502,17 @@ static void postprocessChunk(
 
         if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getResultType()))
         {
-            /// Unpack tuple into block.
-            size_t tuple_size = desc.column_numbers.size();
-            for (size_t i = 0; i < tuple_size; ++i)
-                res_columns[desc.column_numbers[i]] = assert_cast<const ColumnTuple &>(*column).getColumnPtr(i);
+            if (desc.aggregate_all_columns)
+            {
+                res_columns[desc.column_numbers[0]] = column;
+            }
+            else
+            {
+                /// Unpack tuple into block.
+                size_t tuple_size = desc.column_numbers.size();
+                for (size_t i = 0; i < tuple_size; ++i)
+                   res_columns[desc.column_numbers[i]] = assert_cast<const ColumnTuple &>(*column).getColumnPtr(i);
+            }
         }
         else if (desc.nested_type)
         {
@@ -535,12 +592,24 @@ void SummingSortedAlgorithm::SummingMergedData::initialize(const DB::Block & hea
         // Wrap aggregated columns in a tuple to match function signature
         if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getResultType()))
         {
-            size_t tuple_size = desc.column_numbers.size();
-            MutableColumns tuple_columns(tuple_size);
-            for (size_t i = 0; i < tuple_size; ++i)
-                tuple_columns[i] = std::move(columns[desc.column_numbers[i]]);
+            if (desc.aggregate_all_columns)
+            {
+                auto column = desc.real_type->createColumn();
+                size_t tuple_size = static_cast<const ColumnTuple &>(*column).tupleSize();
+                MutableColumns tuple_columns(tuple_size);
+                for (size_t i = 0; i < tuple_size; ++i)
+                    tuple_columns[i] = static_cast<const ColumnTuple &>(*column).getColumnPtr(i)->cloneEmpty();
+                new_columns.emplace_back(ColumnTuple::create(std::move(tuple_columns)));
+            }
+            else
+            {
+                size_t tuple_size = desc.column_numbers.size();
+                MutableColumns tuple_columns(tuple_size);
+                for (size_t i = 0; i < tuple_size; ++i)
+                    tuple_columns[i] = std::move(columns[desc.column_numbers[i]]);
 
-            new_columns.emplace_back(ColumnTuple::create(std::move(tuple_columns)));
+                new_columns.emplace_back(ColumnTuple::create(std::move(tuple_columns)));
+            }
         }
         else
         {
@@ -734,7 +803,6 @@ Chunk SummingSortedAlgorithm::SummingMergedData::pull()
 {
     auto chunk = MergedData::pull();
     postprocessChunk(chunk, def.column_names.size(), def);
-
     initAggregateDescription();
 
     return chunk;
diff --git a/tests/queries/0_stateless/03652_coalescing_merge_tree_fix_empty_tuple.reference b/tests/queries/0_stateless/03652_coalescing_merge_tree_fix_empty_tuple.reference
new file mode 100644
index 000000000000..3764512c34d8
--- /dev/null
+++ b/tests/queries/0_stateless/03652_coalescing_merge_tree_fix_empty_tuple.reference
@@ -0,0 +1,6 @@
+(1,2)
+(3,4)
+[1,2]
+[3,4]
+()
+()
diff --git a/tests/queries/0_stateless/03652_coalescing_merge_tree_fix_empty_tuple.sql b/tests/queries/0_stateless/03652_coalescing_merge_tree_fix_empty_tuple.sql
new file mode 100644
index 000000000000..775ee528dfd2
--- /dev/null
+++ b/tests/queries/0_stateless/03652_coalescing_merge_tree_fix_empty_tuple.sql
@@ -0,0 +1,40 @@
+DROP TABLE IF EXISTS t0;
+
+CREATE TABLE t0 (c0 Tuple(a Int32, b Nullable(Int32)), c1 Int32) ENGINE = SummingMergeTree() ORDER BY c1;
+INSERT INTO t0 VALUES ((1,2), 0);
+INSERT INTO t0 VALUES ((3,4), 0);
+SELECT c0 FROM t0 FINAL;
+
+DROP TABLE IF EXISTS t0;
+
+CREATE TABLE t0 (c0 Tuple(a Int32, b Nullable(Int32)), c1 Int32) ENGINE = CoalescingMergeTree() ORDER BY c1;
+INSERT INTO t0 VALUES ((1,2), 0); -- return this one because tuple has not a nullable type so we can not aggregate by tuple columns
+INSERT INTO t0 VALUES ((3,4), 0);
+SELECT c0 FROM t0 FINAL;
+
+DROP TABLE IF EXISTS t0;
+
+CREATE TABLE t0 (c0 Array(Nullable(Int32)), c1 Int32) ENGINE = SummingMergeTree() ORDER BY c1;
+INSERT INTO t0 VALUES ([1,2], 0);
+INSERT INTO t0 VALUES ([3,4], 0);
+SELECT c0 FROM t0 FINAL;
+
+
+DROP TABLE IF EXISTS t0;
+
+CREATE TABLE t0 (c0 Array(Nullable(Int32)), c1 Int32) ENGINE = CoalescingMergeTree() ORDER BY c1;
+INSERT INTO t0 VALUES ([1,2], 0); -- return this one because array has not a nullable type so we can not aggregate by tuple columns
+INSERT INTO t0 VALUES ([3,4], 0);
+SELECT c0 FROM t0 FINAL;
+
+DROP TABLE IF EXISTS t0;
+
+CREATE TABLE t0 (c0 Tuple) ENGINE = CoalescingMergeTree() ORDER BY tuple();
+INSERT INTO t0 (c0) VALUES (());
+SELECT c0 FROM t0 FINAL;
+
+DROP TABLE IF EXISTS t0;
+
+CREATE TABLE t0 (c0 Tuple) ENGINE = SummingMergeTree() ORDER BY tuple();
+INSERT INTO t0 (c0) VALUES (());
+SELECT c0 FROM t0 FINAL;

From 26b4a0eb86043572a6e81f1a8b104ab2ed5755c6 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 24 Oct 2025 13:21:02 +0000
Subject: [PATCH 040/112] Backport #88672 to 25.8: Add profiling and logging on
 merge destruction

---
 src/Common/ProfileEvents.cpp                  | 21 ++++++++
 src/Interpreters/Context.cpp                  | 42 +++++++++++++--
 .../MergeTree/MergeTreeBackgroundExecutor.cpp | 52 +++++++++++++++---
 .../MergeTree/MergeTreeBackgroundExecutor.h   | 53 ++++++++++++++++++-
 .../MergeTree/tests/gtest_executor.cpp        | 32 +++++++++--
 5 files changed, 184 insertions(+), 16 deletions(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index b62fafd22834..6e98ca6b7f60 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -379,6 +379,27 @@
     M(MergeTreeDataWriterBlocks, "Number of blocks INSERTed to MergeTree tables. Each block forms a data part of level zero.", ValueType::Number) \
     M(MergeTreeDataWriterBlocksAlreadySorted, "Number of blocks INSERTed to MergeTree tables that appeared to be already sorted.", ValueType::Number) \
     \
+    /* Per-executor background executor task timings */ \
+    M(MergeMutateBackgroundExecutorTaskExecuteStepMicroseconds, "Time spent in executeStep() for MergeMutate executor tasks.", ValueType::Microseconds) \
+    M(MergeMutateBackgroundExecutorTaskCancelMicroseconds, "Time spent in cancel() for MergeMutate executor tasks.", ValueType::Microseconds) \
+    M(MergeMutateBackgroundExecutorTaskResetMicroseconds, "Time spent resetting task for MergeMutate executor.", ValueType::Microseconds) \
+    M(MergeMutateBackgroundExecutorWaitMicroseconds, "Time spent waiting for completion in MergeMutate executor.", ValueType::Microseconds) \
+    \
+    M(MoveBackgroundExecutorTaskExecuteStepMicroseconds, "Time spent in executeStep() for Move executor tasks.", ValueType::Microseconds) \
+    M(MoveBackgroundExecutorTaskCancelMicroseconds, "Time spent in cancel() for Move executor tasks.", ValueType::Microseconds) \
+    M(MoveBackgroundExecutorTaskResetMicroseconds, "Time spent resetting task for Move executor.", ValueType::Microseconds) \
+    M(MoveBackgroundExecutorWaitMicroseconds, "Time spent waiting for completion in Move executor.", ValueType::Microseconds) \
+    \
+    M(FetchBackgroundExecutorTaskExecuteStepMicroseconds, "Time spent in executeStep() for Fetch executor tasks.", ValueType::Microseconds) \
+    M(FetchBackgroundExecutorTaskCancelMicroseconds, "Time spent in cancel() for Fetch executor tasks.", ValueType::Microseconds) \
+    M(FetchBackgroundExecutorTaskResetMicroseconds, "Time spent resetting task for Fetch executor.", ValueType::Microseconds) \
+    M(FetchBackgroundExecutorWaitMicroseconds, "Time spent waiting for completion in Fetch executor.", ValueType::Microseconds) \
+    \
+    M(CommonBackgroundExecutorTaskExecuteStepMicroseconds, "Time spent in executeStep() for Common executor tasks.", ValueType::Microseconds) \
+    M(CommonBackgroundExecutorTaskCancelMicroseconds, "Time spent in cancel() for Common executor tasks.", ValueType::Microseconds) \
+    M(CommonBackgroundExecutorTaskResetMicroseconds, "Time spent resetting task for Common executor.", ValueType::Microseconds) \
+    M(CommonBackgroundExecutorWaitMicroseconds, "Time spent waiting for completion in Common executor.", ValueType::Microseconds) \
+    \
     M(MergeTreeDataWriterSkipIndicesCalculationMicroseconds, "Time spent calculating skip indices", ValueType::Microseconds) \
     M(MergeTreeDataWriterStatisticsCalculationMicroseconds, "Time spent calculating statistics", ValueType::Microseconds) \
     M(MergeTreeDataWriterSortingBlocksMicroseconds, "Time spent sorting blocks", ValueType::Microseconds) \
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 5e9d05deed06..f102c660aa9c 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -169,6 +169,26 @@ namespace ProfileEvents
     extern const Event QueryRemoteWriteThrottlerSleepMicroseconds;
     extern const Event QueryBackupThrottlerBytes;
     extern const Event QueryBackupThrottlerSleepMicroseconds;
+
+    extern const Event MergeMutateBackgroundExecutorTaskExecuteStepMicroseconds;
+    extern const Event MergeMutateBackgroundExecutorTaskCancelMicroseconds;
+    extern const Event MergeMutateBackgroundExecutorTaskResetMicroseconds;
+    extern const Event MergeMutateBackgroundExecutorWaitMicroseconds;
+
+    extern const Event MoveBackgroundExecutorTaskExecuteStepMicroseconds;
+    extern const Event MoveBackgroundExecutorTaskCancelMicroseconds;
+    extern const Event MoveBackgroundExecutorTaskResetMicroseconds;
+    extern const Event MoveBackgroundExecutorWaitMicroseconds;
+
+    extern const Event FetchBackgroundExecutorTaskExecuteStepMicroseconds;
+    extern const Event FetchBackgroundExecutorTaskCancelMicroseconds;
+    extern const Event FetchBackgroundExecutorTaskResetMicroseconds;
+    extern const Event FetchBackgroundExecutorWaitMicroseconds;
+
+    extern const Event CommonBackgroundExecutorTaskExecuteStepMicroseconds;
+    extern const Event CommonBackgroundExecutorTaskCancelMicroseconds;
+    extern const Event CommonBackgroundExecutorTaskResetMicroseconds;
+    extern const Event CommonBackgroundExecutorWaitMicroseconds;
 }
 
 namespace CurrentMetrics
@@ -6457,6 +6477,10 @@ void Context::initializeBackgroundExecutorsIfNeeded()
         /*max_tasks_count*/background_pool_max_tasks_count,
         CurrentMetrics::BackgroundMergesAndMutationsPoolTask,
         CurrentMetrics::BackgroundMergesAndMutationsPoolSize,
+        ProfileEvents::MergeMutateBackgroundExecutorTaskExecuteStepMicroseconds,
+        ProfileEvents::MergeMutateBackgroundExecutorTaskCancelMicroseconds,
+        ProfileEvents::MergeMutateBackgroundExecutorTaskResetMicroseconds,
+        ProfileEvents::MergeMutateBackgroundExecutorWaitMicroseconds,
         background_merges_mutations_scheduling_policy
     );
     LOG_INFO(shared->log, "Initialized background executor for merges and mutations with num_threads={}, num_tasks={}, scheduling_policy={}",
@@ -6468,7 +6492,11 @@ void Context::initializeBackgroundExecutorsIfNeeded()
         background_move_pool_size,
         background_move_pool_size,
         CurrentMetrics::BackgroundMovePoolTask,
-        CurrentMetrics::BackgroundMovePoolSize
+        CurrentMetrics::BackgroundMovePoolSize,
+        ProfileEvents::MoveBackgroundExecutorTaskExecuteStepMicroseconds,
+        ProfileEvents::MoveBackgroundExecutorTaskCancelMicroseconds,
+        ProfileEvents::MoveBackgroundExecutorTaskResetMicroseconds,
+        ProfileEvents::MoveBackgroundExecutorWaitMicroseconds
     );
     LOG_INFO(shared->log, "Initialized background executor for move operations with num_threads={}, num_tasks={}", background_move_pool_size, background_move_pool_size);
 
@@ -6478,7 +6506,11 @@ void Context::initializeBackgroundExecutorsIfNeeded()
         background_fetches_pool_size,
         background_fetches_pool_size,
         CurrentMetrics::BackgroundFetchesPoolTask,
-        CurrentMetrics::BackgroundFetchesPoolSize
+        CurrentMetrics::BackgroundFetchesPoolSize,
+        ProfileEvents::FetchBackgroundExecutorTaskExecuteStepMicroseconds,
+        ProfileEvents::FetchBackgroundExecutorTaskCancelMicroseconds,
+        ProfileEvents::FetchBackgroundExecutorTaskResetMicroseconds,
+        ProfileEvents::FetchBackgroundExecutorWaitMicroseconds
     );
     LOG_INFO(shared->log, "Initialized background executor for fetches with num_threads={}, num_tasks={}", background_fetches_pool_size, background_fetches_pool_size);
 
@@ -6488,7 +6520,11 @@ void Context::initializeBackgroundExecutorsIfNeeded()
         background_common_pool_size,
         background_common_pool_size,
         CurrentMetrics::BackgroundCommonPoolTask,
-        CurrentMetrics::BackgroundCommonPoolSize
+        CurrentMetrics::BackgroundCommonPoolSize,
+        ProfileEvents::CommonBackgroundExecutorTaskExecuteStepMicroseconds,
+        ProfileEvents::CommonBackgroundExecutorTaskCancelMicroseconds,
+        ProfileEvents::CommonBackgroundExecutorTaskResetMicroseconds,
+        ProfileEvents::CommonBackgroundExecutorWaitMicroseconds
     );
     LOG_INFO(shared->log, "Initialized background executor for common operations (e.g. clearing old parts) with num_threads={}, num_tasks={}", background_common_pool_size, background_common_pool_size);
 
diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
index d66111ea406a..dab0c54729fb 100644
--- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
@@ -2,6 +2,7 @@
 #include <Storages/MergeTree/BackgroundJobsAssignee.h>
 
 #include <algorithm>
+#include <optional>
 
 #include <Common/ThreadPool.h>
 #include <Common/setThreadName.h>
@@ -44,6 +45,10 @@ MergeTreeBackgroundExecutor<Queue>::MergeTreeBackgroundExecutor(
     size_t max_tasks_count_,
     CurrentMetrics::Metric metric_,
     CurrentMetrics::Metric max_tasks_metric_,
+    ProfileEvents::Event execute_profile_event_,
+    ProfileEvents::Event cancel_profile_event_,
+    ProfileEvents::Event reset_profile_event_,
+    ProfileEvents::Event wait_profile_event_,
     std::string_view policy)
     : name(name_)
     , threads_count(threads_count_)
@@ -56,6 +61,11 @@ MergeTreeBackgroundExecutor<Queue>::MergeTreeBackgroundExecutor(
     if (max_tasks_count == 0)
         throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Task count for MergeTreeBackgroundExecutor must not be zero");
 
+    task_events.execute_ms = execute_profile_event_;
+    task_events.cancel_ms = cancel_profile_event_;
+    task_events.reset_ms = reset_profile_event_;
+    task_events.wait_ms = wait_profile_event_;
+
     pending.setCapacity(max_tasks_count);
     active.set_capacity(max_tasks_count);
 
@@ -148,7 +158,7 @@ bool MergeTreeBackgroundExecutor<Queue>::trySchedule(ExecutableTaskPtr task)
     if (value.load() >= static_cast<int64_t>(max_tasks_count))
         return false;
 
-    pending.push(std::make_shared<TaskRuntimeData>(std::move(task), metric));
+    pending.push(std::make_shared<TaskRuntimeData>(std::move(task), metric, task_events));
 
     has_tasks.notify_one();
     return true;
@@ -210,14 +220,14 @@ void MergeTreeBackgroundExecutor<Queue>::removeTasksCorrespondingToStorage(Stora
 
     for (auto & item : tasks_to_cancel)
     {
-        item->task->cancel();
+        item->cancel();
         item.reset();
     }
 
     /// Wait for each task to be executed
     for (auto & item : tasks_to_wait)
     {
-        item->is_done.wait();
+        item->wait();
         item.reset();
     }
 }
@@ -235,19 +245,45 @@ void MergeTreeBackgroundExecutor<Queue>::routine(TaskRuntimeDataPtr item)
         active.erase(std::remove(active.begin(), active.end(), item_), active.end());
     };
 
-    auto release_task = [] (TaskRuntimeDataPtr && item_) TSA_REQUIRES(mutex)
+    auto release_task = [this] (TaskRuntimeDataPtr && item_) TSA_REQUIRES(mutex)
     {
         /// We have to call reset() under a lock, otherwise a race is possible.
         /// Imagine, that task is finally completed (last execution returned false),
         /// we removed the task from both queues, but still have pointer.
         /// The thread that shutdowns storage will scan queues in order to find some tasks to wait for, but will find nothing.
         /// So, the destructor of a task and the destructor of a storage will be executed concurrently.
+        std::optional<String> captured_storage_id;
+        std::optional<String> captured_query_id;
+        bool captured_was_deleting = item_->is_currently_deleting;
+
+        Stopwatch destruction_watch;
+
         NOEXCEPT_SCOPE({
             ALLOW_ALLOCATIONS_IN_SCOPE;
-            item_->task.reset();
+            if (item_->task)
+            {
+                captured_storage_id = item_->task->getStorageID().getNameForLogs();
+                captured_query_id = item_->task->getQueryId();
+            }
+            item_->resetTask();
         });
         item_->is_done.set();
         item_.reset();
+
+        UInt64 elapsed_ms = destruction_watch.elapsedMilliseconds();
+        NOEXCEPT_SCOPE({
+            ALLOW_ALLOCATIONS_IN_SCOPE;
+            if (elapsed_ms > 60ULL * 1000ULL)
+            {
+                LOG_WARNING(log,
+                    "Releasing background task runtime data took {:.3f} seconds (> 60s), executor={}, storage={}, query_id={}, deleting={}",
+                    static_cast<double>(elapsed_ms) / 1000.0,
+                    name,
+                    captured_storage_id.value_or("unknown"),
+                    captured_query_id.value_or("unknown"),
+                    captured_was_deleting);
+            }
+        });
     };
 
     /// No TSA because of unique_lock
@@ -261,7 +297,7 @@ void MergeTreeBackgroundExecutor<Queue>::routine(TaskRuntimeDataPtr item)
             guard.unlock();
             {
                 ALLOW_ALLOCATIONS_IN_SCOPE;
-                item_->task->cancel();
+                item_->cancel();
             }
             guard.lock();
             release_task(std::move(item_));
@@ -302,7 +338,7 @@ void MergeTreeBackgroundExecutor<Queue>::routine(TaskRuntimeDataPtr item)
     {
         ALLOW_ALLOCATIONS_IN_SCOPE;
         query_id = item->task->getQueryId();
-        need_execute_again = item->task->executeStep();
+        need_execute_again = item->executeStep();
 
         if (!need_execute_again)
         {
@@ -321,7 +357,7 @@ void MergeTreeBackgroundExecutor<Queue>::routine(TaskRuntimeDataPtr item)
         try
         {
             ALLOW_ALLOCATIONS_IN_SCOPE;
-            item->task->cancel();
+            item->cancel();
         }
         catch (...)
         {
diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h
index 4b67d50a49bb..d5d55a61d731 100644
--- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h
+++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h
@@ -14,14 +14,24 @@
 
 #include <Storages/MergeTree/IExecutableTask.h>
 #include <base/defines.h>
+#include <Common/ElapsedTimeProfileEventIncrement.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/Logger.h>
+#include <Common/ProfileEvents.h>
+#include <Common/Stopwatch.h>
 #include <Common/ThreadPool_fwd.h>
 
 namespace DB
 {
 
 struct TaskRuntimeData;
+struct TaskProfileEvents
+{
+    ProfileEvents::Event execute_ms = ProfileEvents::end();
+    ProfileEvents::Event cancel_ms = ProfileEvents::end();
+    ProfileEvents::Event reset_ms = ProfileEvents::end();
+    ProfileEvents::Event wait_ms = ProfileEvents::end();
+};
 using TaskRuntimeDataPtr = std::shared_ptr<TaskRuntimeData>;
 
 /**
@@ -30,9 +40,10 @@ using TaskRuntimeDataPtr = std::shared_ptr<TaskRuntimeData>;
  */
 struct TaskRuntimeData
 {
-    TaskRuntimeData(ExecutableTaskPtr && task_, CurrentMetrics::Metric metric_)
+    TaskRuntimeData(ExecutableTaskPtr && task_, CurrentMetrics::Metric metric_, TaskProfileEvents events_)
         : task(std::move(task_))
         , metric(metric_)
+        , events(events_)
     {
         /// Increment and decrement a metric with sequentially consistent memory order
         /// This is needed, because in unit test this metric is read from another thread
@@ -46,8 +57,35 @@ struct TaskRuntimeData
         CurrentMetrics::values[metric].fetch_sub(1);
     }
 
+    void cancel() const
+    {
+        ProfileEventTimeIncrement<Microseconds> watch(events.cancel_ms);
+        if (task)
+            task->cancel();
+    }
+
+    void wait()
+    {
+        ProfileEventTimeIncrement<Microseconds> watch(events.wait_ms);
+        is_done.wait();
+    }
+
+    bool executeStep() const
+    {
+        ProfileEventTimeIncrement<Microseconds> watch(events.execute_ms);
+        return task->executeStep();
+    }
+
+    void resetTask()
+    {
+        ProfileEventTimeIncrement<Microseconds> watch(events.reset_ms);
+        if (task)
+            task.reset();
+    }
+
     ExecutableTaskPtr task;
     CurrentMetrics::Metric metric;
+    TaskProfileEvents events;
     /// Guarded by MergeTreeBackgroundExecutor<>::mutex
     bool is_currently_deleting{false};
     /// Actually autoreset=false is needed only for unit test
@@ -257,6 +295,18 @@ template <class Queue>
 class MergeTreeBackgroundExecutor final : boost::noncopyable
 {
 public:
+    MergeTreeBackgroundExecutor(
+        String name_,
+        size_t threads_count_,
+        size_t max_tasks_count_,
+        CurrentMetrics::Metric metric_,
+        CurrentMetrics::Metric max_tasks_metric_,
+        ProfileEvents::Event execute_profile_event_,
+        ProfileEvents::Event cancel_profile_event_,
+        ProfileEvents::Event reset_profile_event_,
+        ProfileEvents::Event wait_profile_event_,
+        std::string_view policy = {});
+
     MergeTreeBackgroundExecutor(
         String name_,
         size_t threads_count_,
@@ -310,6 +360,7 @@ class MergeTreeBackgroundExecutor final : boost::noncopyable
     bool shutdown TSA_GUARDED_BY(mutex) = false;
     std::unique_ptr<ThreadPool> pool;
     LoggerPtr log = getLogger("MergeTreeBackgroundExecutor");
+    TaskProfileEvents task_events;
 };
 
 extern template class MergeTreeBackgroundExecutor<RoundRobinRuntimeQueue>;
diff --git a/src/Storages/MergeTree/tests/gtest_executor.cpp b/src/Storages/MergeTree/tests/gtest_executor.cpp
index 5b65410f87c0..00c75635b033 100644
--- a/src/Storages/MergeTree/tests/gtest_executor.cpp
+++ b/src/Storages/MergeTree/tests/gtest_executor.cpp
@@ -19,6 +19,14 @@ namespace CurrentMetrics
     extern const Metric BackgroundMergesAndMutationsPoolSize;
 }
 
+namespace ProfileEvents
+{
+    extern const Event CommonBackgroundExecutorTaskExecuteStepMicroseconds;
+    extern const Event CommonBackgroundExecutorTaskCancelMicroseconds;
+    extern const Event CommonBackgroundExecutorTaskResetMicroseconds;
+    extern const Event CommonBackgroundExecutorWaitMicroseconds;
+}
+
 std::random_device device;
 
 class FakeExecutableTask : public IExecutableTask
@@ -111,7 +119,11 @@ TEST(Executor, Simple)
         1, // threads
         100, // max_tasks
         CurrentMetrics::BackgroundMergesAndMutationsPoolTask,
-        CurrentMetrics::BackgroundMergesAndMutationsPoolSize
+        CurrentMetrics::BackgroundMergesAndMutationsPoolSize,
+        ProfileEvents::CommonBackgroundExecutorTaskExecuteStepMicroseconds,
+        ProfileEvents::CommonBackgroundExecutorTaskCancelMicroseconds,
+        ProfileEvents::CommonBackgroundExecutorTaskResetMicroseconds,
+        ProfileEvents::CommonBackgroundExecutorWaitMicroseconds
     );
 
     String schedule; // mutex is not required because we have a single worker
@@ -154,7 +166,11 @@ TEST(Executor, RemoveTasks)
         tasks_kinds,
         tasks_kinds * batch,
         CurrentMetrics::BackgroundMergesAndMutationsPoolTask,
-        CurrentMetrics::BackgroundMergesAndMutationsPoolSize
+        CurrentMetrics::BackgroundMergesAndMutationsPoolSize,
+        ProfileEvents::CommonBackgroundExecutorTaskExecuteStepMicroseconds,
+        ProfileEvents::CommonBackgroundExecutorTaskCancelMicroseconds,
+        ProfileEvents::CommonBackgroundExecutorTaskResetMicroseconds,
+        ProfileEvents::CommonBackgroundExecutorWaitMicroseconds
     );
 
     for (size_t i = 0; i < batch; ++i)
@@ -195,7 +211,11 @@ TEST(Executor, RemoveTasksStress)
         tasks_kinds,
         tasks_kinds * batch * (schedulers_count + removers_count),
         CurrentMetrics::BackgroundMergesAndMutationsPoolTask,
-        CurrentMetrics::BackgroundMergesAndMutationsPoolSize
+        CurrentMetrics::BackgroundMergesAndMutationsPoolSize,
+        ProfileEvents::CommonBackgroundExecutorTaskExecuteStepMicroseconds,
+        ProfileEvents::CommonBackgroundExecutorTaskCancelMicroseconds,
+        ProfileEvents::CommonBackgroundExecutorTaskResetMicroseconds,
+        ProfileEvents::CommonBackgroundExecutorWaitMicroseconds
     );
 
     std::barrier<std::__empty_completion> barrier(schedulers_count + removers_count);
@@ -246,7 +266,11 @@ TEST(Executor, UpdatePolicy)
         1, // threads
         100, // max_tasks
         CurrentMetrics::BackgroundMergesAndMutationsPoolTask,
-        CurrentMetrics::BackgroundMergesAndMutationsPoolSize
+        CurrentMetrics::BackgroundMergesAndMutationsPoolSize,
+        ProfileEvents::CommonBackgroundExecutorTaskExecuteStepMicroseconds,
+        ProfileEvents::CommonBackgroundExecutorTaskCancelMicroseconds,
+        ProfileEvents::CommonBackgroundExecutorTaskResetMicroseconds,
+        ProfileEvents::CommonBackgroundExecutorWaitMicroseconds
     );
 
     String schedule; // mutex is not required because we have a single worker

From 849cc12e869901bafe457bc5a62a0662af7f5311 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Sat, 25 Oct 2025 00:30:16 +0000
Subject: [PATCH 041/112] Backport #88968 to 25.8: Fix SET DEFINER access check
 for ephemeral users

---
 src/Interpreters/InterpreterCreateQuery.cpp   |  7 ++--
 ...eck_ephemeral_set_definer_access.reference |  0
 ...3701_check_ephemeral_set_definer_access.sh | 33 +++++++++++++++++++
 3 files changed, 36 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/03701_check_ephemeral_set_definer_access.reference
 create mode 100755 tests/queries/0_stateless/03701_check_ephemeral_set_definer_access.sh

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 2e2fca56c48c..0e859d775db3 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -2498,8 +2498,10 @@ void InterpreterCreateQuery::processSQLSecurityOption(ContextMutablePtr context_
     if (sql_security.definer && !skip_check_permissions)
     {
         auto definer_name = sql_security.definer->toString();
-        auto & access_control = context_->getAccessControl();
+        if (definer_name != current_user_name)
+            context_->checkAccess(AccessType::SET_DEFINER, definer_name);
 
+        auto & access_control = context_->getAccessControl();
         const auto user = access_control.read<User>(definer_name);
         if (access_control.isEphemeral(access_control.getID<User>(definer_name)))
         {
@@ -2511,9 +2513,6 @@ void InterpreterCreateQuery::processSQLSecurityOption(ContextMutablePtr context_
             new_user->authentication_methods.emplace_back(AuthenticationType::NO_AUTHENTICATION);
             access_control.insertOrReplace(new_user);
         }
-
-        if (definer_name != current_user_name)
-            context_->checkAccess(AccessType::SET_DEFINER, definer_name);
     }
 
     if (sql_security.type == SQLSecurityType::NONE && !skip_check_permissions)
diff --git a/tests/queries/0_stateless/03701_check_ephemeral_set_definer_access.reference b/tests/queries/0_stateless/03701_check_ephemeral_set_definer_access.reference
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/queries/0_stateless/03701_check_ephemeral_set_definer_access.sh b/tests/queries/0_stateless/03701_check_ephemeral_set_definer_access.sh
new file mode 100755
index 000000000000..bdf105aba6fe
--- /dev/null
+++ b/tests/queries/0_stateless/03701_check_ephemeral_set_definer_access.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+# Tags: no-replicated-database, no-async-insert, no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+user="user03701_${CLICKHOUSE_DATABASE}_$RANDOM"
+db=${CLICKHOUSE_DATABASE}
+
+${CLICKHOUSE_CLIENT} <<EOF
+-- Cleanup
+DROP USER IF EXISTS $user;
+CREATE USER $user IN memory;
+GRANT ALL ON *.* TO $user;
+REVOKE SET DEFINER ON * FROM $user;
+
+CREATE TABLE $db.source (x Int64) ENGINE = MergeTree() ORDER BY x;
+EOF
+
+${CLICKHOUSE_CLIENT} --user $user <<EOF
+CREATE MATERIALIZED VIEW $db.test_view
+(
+    x Int64
+)
+ENGINE = MergeTree() ORDER BY x
+DEFINER = CURRENT_USER SQL SECURITY DEFINER
+AS SELECT x FROM $db.source;
+EOF
+
+${CLICKHOUSE_CLIENT} <<EOF
+DROP USER IF EXISTS $user;
+EOF

From 52f63b830d1911362f9c76b0adf959f9cfb0ea3f Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 27 Oct 2025 14:17:42 +0000
Subject: [PATCH 042/112] Update autogenerated version to 25.8.11.66 and
 contributors

---
 cmake/autogenerated_versions.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
index de536b119436..42bc8307e14a 100644
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@@ -2,11 +2,11 @@
 
 # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION,
 # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
-SET(VERSION_REVISION 54511)
+SET(VERSION_REVISION 54512)
 SET(VERSION_MAJOR 25)
 SET(VERSION_MINOR 8)
-SET(VERSION_PATCH 11)
-SET(VERSION_GITHASH 02ec3a1ea1e08fb18d2d9638f00b2b557fa1cc1c)
-SET(VERSION_DESCRIBE v25.8.11.1-lts)
-SET(VERSION_STRING 25.8.11.1)
+SET(VERSION_PATCH 12)
+SET(VERSION_GITHASH fa393206741c830da77b8f1bcf18c753161932c8)
+SET(VERSION_DESCRIBE v25.8.12.1-lts)
+SET(VERSION_STRING 25.8.12.1)
 # end of autochange

From a5d5db8b95617b636265953e9f26581d8e1d1ffb Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 29 Oct 2025 11:12:16 +0000
Subject: [PATCH 043/112] Backport #89040 to 25.8: Replace SchedulePool with a
 separate thread

---
 .../DataLakes/Iceberg/IcebergIterator.cpp     | 73 ++++++++++---------
 .../DataLakes/Iceberg/IcebergIterator.h       |  2 +-
 2 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.cpp
index fc3afcf3c883..7377400d32e4 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.cpp
@@ -278,39 +278,7 @@ IcebergIterator::IcebergIterator(
           data_snapshot_,
           persistent_components_)
     , blocking_queue(100)
-    , producer_task(local_context_->getSchedulePool().createTask(
-          "IcebergMetaReaderThread",
-          [this]
-          {
-              while (!blocking_queue.isFinished())
-              {
-                  std::optional<ManifestFileEntry> entry;
-                  try
-                  {
-                      entry = data_files_iterator.next();
-                  }
-                  catch (...)
-                  {
-                      std::lock_guard lock(exception_mutex);
-                      if (!exception)
-                      {
-                          exception = std::current_exception();
-                      }
-                      blocking_queue.finish();
-                      break;
-                  }
-                  if (!entry.has_value())
-                      break;
-                  while (!blocking_queue.push(std::move(entry.value())))
-                  {
-                      if (blocking_queue.isFinished())
-                      {
-                          break;
-                      }
-                  }
-              }
-              blocking_queue.finish();
-          }))
+    , producer_task(std::nullopt)
     , callback(std::move(callback_))
     , format(configuration_.lock()->format)
     , compression_method(configuration_.lock()->compression_method)
@@ -330,8 +298,38 @@ IcebergIterator::IcebergIterator(
     }
     std::sort(equality_deletes_files.begin(), equality_deletes_files.end());
     std::sort(position_deletes_files.begin(), position_deletes_files.end());
-
-    producer_task->activateAndSchedule();
+    producer_task.emplace(
+        [this]()
+        {
+            while (!blocking_queue.isFinished())
+            {
+                std::optional<ManifestFileEntry> entry;
+                try
+                {
+                    entry = data_files_iterator.next();
+                }
+                catch (...)
+                {
+                    std::lock_guard lock(exception_mutex);
+                    if (!exception)
+                    {
+                        exception = std::current_exception();
+                    }
+                    blocking_queue.finish();
+                    break;
+                }
+                if (!entry.has_value())
+                    break;
+                while (!blocking_queue.push(std::move(entry.value())))
+                {
+                    if (blocking_queue.isFinished())
+                    {
+                        break;
+                    }
+                }
+            }
+            blocking_queue.finish();
+        });
 }
 
 ObjectInfoPtr IcebergIterator::next(size_t)
@@ -372,7 +370,10 @@ size_t IcebergIterator::estimatedKeysCount()
 IcebergIterator::~IcebergIterator()
 {
     blocking_queue.finish();
-    producer_task->deactivate();
+    if (producer_task)
+    {
+        producer_task->join();
+    }
 }
 }
 
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.h b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.h
index 42fe16e437df..16d8f823e303 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.h
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.h
@@ -103,7 +103,7 @@ class IcebergIterator : public IObjectIterator
     Iceberg::SingleThreadIcebergKeysIterator data_files_iterator;
     Iceberg::SingleThreadIcebergKeysIterator deletes_iterator;
     ConcurrentBoundedQueue<Iceberg::ManifestFileEntry> blocking_queue;
-    BackgroundSchedulePool::TaskHolder producer_task;
+    std::optional<ThreadFromGlobalPool> producer_task;
     IDataLakeMetadata::FileProgressCallback callback;
     const String format;
     const String compression_method;

From 320e03dbe706d86ca6088f3f71f6f368720cfdc6 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 29 Oct 2025 11:14:25 +0000
Subject: [PATCH 044/112] Backport #89003 to 25.8: Don't use metadata cache in
 iceberg history table

---
 .../System/StorageSystemIcebergHistory.cpp       | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/Storages/System/StorageSystemIcebergHistory.cpp b/src/Storages/System/StorageSystemIcebergHistory.cpp
index d3588bc93077..f2148a753bb5 100644
--- a/src/Storages/System/StorageSystemIcebergHistory.cpp
+++ b/src/Storages/System/StorageSystemIcebergHistory.cpp
@@ -31,6 +31,7 @@ namespace DB
 namespace Setting
 {
     extern const SettingsSeconds lock_acquire_timeout;
+    extern const SettingsBool use_iceberg_metadata_files_cache;
 }
 
 ColumnsDescription StorageSystemIcebergHistory::getColumnsDescription()
@@ -49,7 +50,12 @@ ColumnsDescription StorageSystemIcebergHistory::getColumnsDescription()
 void StorageSystemIcebergHistory::fillData([[maybe_unused]] MutableColumns & res_columns, [[maybe_unused]] ContextPtr context, const ActionsDAG::Node *, std::vector<UInt8>) const
 {
 #if USE_AVRO
-    const auto access = context->getAccess();
+    ContextMutablePtr context_copy = Context::createCopy(context);
+    Settings settings_copy = context_copy->getSettingsCopy();
+    settings_copy[Setting::use_iceberg_metadata_files_cache] = false;
+    context_copy->setSettings(settings_copy);
+
+    const auto access = context_copy->getAccess();
 
     auto add_history_record = [&](const DatabaseTablesIteratorPtr & it, StorageObjectStorage * object_storage)
     {
@@ -60,9 +66,9 @@ void StorageSystemIcebergHistory::fillData([[maybe_unused]] MutableColumns & res
         /// to handle properly all possible errors which we can get when attempting to read metadata of iceberg table
         try
         {
-            if (IcebergMetadata * iceberg_metadata = dynamic_cast<IcebergMetadata *>(object_storage->getExternalMetadata(context)); iceberg_metadata)
+            if (IcebergMetadata * iceberg_metadata = dynamic_cast<IcebergMetadata *>(object_storage->getExternalMetadata(context_copy)); iceberg_metadata)
             {
-                IcebergMetadata::IcebergHistory iceberg_history_items = iceberg_metadata->getHistory(context);
+                IcebergMetadata::IcebergHistory iceberg_history_items = iceberg_metadata->getHistory(context_copy);
 
                 for (auto & iceberg_history_item : iceberg_history_items)
                 {
@@ -91,11 +97,11 @@ void StorageSystemIcebergHistory::fillData([[maybe_unused]] MutableColumns & res
         for (const auto & db: databases)
         {
             /// with last flag we are filtering out all non iceberg table
-            for (auto iterator = db.second->getLightweightTablesIterator(context, {}, true); iterator->isValid(); iterator->next())
+            for (auto iterator = db.second->getLightweightTablesIterator(context_copy, {}, true); iterator->isValid(); iterator->next())
             {
                 StoragePtr storage = iterator->table();
 
-                TableLockHolder lock = storage->tryLockForShare(context->getCurrentQueryId(), context->getSettingsRef()[Setting::lock_acquire_timeout]);
+                TableLockHolder lock = storage->tryLockForShare(context_copy->getCurrentQueryId(), context_copy->getSettingsRef()[Setting::lock_acquire_timeout]);
                 if (!lock)
                     // Table was dropped while acquiring the lock, skipping table
                     continue;

From a69a4a6891183f85ab3c3927e6e0e7d9ce988ba4 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 30 Oct 2025 16:16:31 +0000
Subject: [PATCH 045/112] Backport #88588 to 25.8: Fix SQL SECURITY DEFINER
 with *cluster functions

---
 src/Storages/StorageView.cpp                  |  6 ++--
 ...cluster_and_sql_security_definer.reference |  4 +++
 ...ith_s3_cluster_and_sql_security_definer.sh | 33 +++++++++++++++++++
 3 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/03667_view_with_s3_cluster_and_sql_security_definer.reference
 create mode 100755 tests/queries/0_stateless/03667_view_with_s3_cluster_and_sql_security_definer.sh

diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index ff94c8e333a6..da9ddc532e7b 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -178,13 +178,15 @@ void StorageView::read(
 
     if (context->getSettingsRef()[Setting::allow_experimental_analyzer])
     {
-        InterpreterSelectQueryAnalyzer interpreter(current_inner_query, getViewContext(context, storage_snapshot), options, column_names);
+        auto view_context = getViewContext(context, storage_snapshot);
+        InterpreterSelectQueryAnalyzer interpreter(current_inner_query, view_context, options, column_names);
         interpreter.addStorageLimits(*query_info.storage_limits);
         query_plan = std::move(interpreter).extractQueryPlan();
     }
     else
     {
-        InterpreterSelectWithUnionQuery interpreter(current_inner_query, getViewContext(context, storage_snapshot), options, column_names);
+        auto view_context = getViewContext(context, storage_snapshot);
+        InterpreterSelectWithUnionQuery interpreter(current_inner_query, view_context, options, column_names);
         interpreter.addStorageLimits(*query_info.storage_limits);
         interpreter.buildQueryPlan(query_plan);
     }
diff --git a/tests/queries/0_stateless/03667_view_with_s3_cluster_and_sql_security_definer.reference b/tests/queries/0_stateless/03667_view_with_s3_cluster_and_sql_security_definer.reference
new file mode 100644
index 000000000000..e785149d264f
--- /dev/null
+++ b/tests/queries/0_stateless/03667_view_with_s3_cluster_and_sql_security_definer.reference
@@ -0,0 +1,4 @@
+4
+4
+4
+4
diff --git a/tests/queries/0_stateless/03667_view_with_s3_cluster_and_sql_security_definer.sh b/tests/queries/0_stateless/03667_view_with_s3_cluster_and_sql_security_definer.sh
new file mode 100755
index 000000000000..f1686991b961
--- /dev/null
+++ b/tests/queries/0_stateless/03667_view_with_s3_cluster_and_sql_security_definer.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+# Tag no-fasttest: Depends on AWS
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+other_user="user03667_${CLICKHOUSE_DATABASE}_$RANDOM"
+db=${CLICKHOUSE_DATABASE}
+
+${CLICKHOUSE_CLIENT} <<EOF
+DROP USER IF EXISTS other_user;
+CREATE USER $other_user;
+GRANT SELECT ON $db.* TO $other_user;
+EOF
+
+${CLICKHOUSE_CLIENT} <<EOF
+CREATE VIEW $db.test_view
+SQL SECURITY DEFINER
+AS SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/a.tsv');
+EOF
+
+${CLICKHOUSE_CLIENT} --query "SELECT count() FROM $db.test_view"
+${CLICKHOUSE_CLIENT} --user $other_user --query "SELECT count() FROM $db.test_view"
+
+${CLICKHOUSE_CLIENT} --query "SELECT count() FROM $db.test_view SETTINGS enable_analyzer=0"
+${CLICKHOUSE_CLIENT} --user $other_user --query "SELECT count() FROM $db.test_view SETTINGS enable_analyzer=0"
+
+${CLICKHOUSE_CLIENT} <<EOF
+DROP VIEW IF EXISTS $db.test_view;
+DROP USER IF EXISTS $other_user;
+EOF

From ec044a58ff648748d8dc7a07e99b442de10052dc Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 31 Oct 2025 10:13:14 +0000
Subject: [PATCH 046/112] Backport #88201 to 25.8: Using shared_ptr for
 QueryState in TCPHandler to detect if the state is invalid in
 setProgressCallback, setFileProgressCallback and setBlockMarshallingCallback

---
 src/Server/TCPHandler.cpp | 98 +++++++++++++++++++++++----------------
 src/Server/TCPHandler.h   |  4 +-
 2 files changed, 60 insertions(+), 42 deletions(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 1d59f6ad3259..eb5f058927a4 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -522,7 +522,7 @@ void TCPHandler::runImpl()
         /// the MemoryTracker will be wrong for possible deallocations.
         /// (i.e. deallocations from the Aggregator with two-level aggregation)
         /// Also it resets socket's timeouts.
-        std::optional<QueryState> query_state;
+        std::shared_ptr<QueryState> query_state;
 
         try
         {
@@ -538,7 +538,7 @@ void TCPHandler::runImpl()
             if (part_uuids_to_ignore.has_value() && !receivePacketsExpectQuery(query_state))
                 continue;
 
-            chassert(query_state.has_value());
+            chassert(query_state);
 
             if (connectionLimitReached())
             {
@@ -556,7 +556,7 @@ void TCPHandler::runImpl()
             query_scope.emplace(query_state->query_context, /* fatal_error_callback */ [this, &query_state]
             {
                 std::lock_guard lock(callback_mutex);
-                sendLogs(query_state.value());
+                sendLogs(*query_state);
             });
 
             /// If query received, then settings in query_context has been updated.
@@ -567,7 +567,7 @@ void TCPHandler::runImpl()
             /// It should be reset at the end of query.
             query_state->timeout_setter = std::make_unique<TimeoutSetter>(socket(), send_timeout, receive_timeout);
 
-            SCOPE_EXIT(logQueryDuration(query_state.value()));
+            SCOPE_EXIT(logQueryDuration(*query_state));
 
             /// Should we send internal logs to client?
             const auto client_logs_level = query_state->query_context->getSettingsRef()[Setting::send_logs_level];
@@ -612,10 +612,10 @@ void TCPHandler::runImpl()
 
                 std::lock_guard lock(callback_mutex);
 
-                checkIfQueryCanceled(query_state.value());
+                checkIfQueryCanceled(*query_state);
 
                 /// Get blocks of temporary tables
-                readTemporaryTables(query_state.value());
+                readTemporaryTables(*query_state);
 
                 /// Reset the input stream, as we received an empty block while receiving external table data.
                 /// So, the stream has been marked as cancelled and we can't read from it anymore.
@@ -634,7 +634,7 @@ void TCPHandler::runImpl()
 
                 std::lock_guard lock(callback_mutex);
 
-                checkIfQueryCanceled(query_state.value());
+                checkIfQueryCanceled(*query_state);
 
                 query_state->need_receive_data_for_input = true;
 
@@ -642,13 +642,13 @@ void TCPHandler::runImpl()
                 if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA
                     && query_state->query_context->getSettingsRef()[Setting::input_format_defaults_for_omitted_fields])
                 {
-                    sendTableColumns(query_state.value(), metadata_snapshot->getColumns());
+                    sendTableColumns(*query_state, metadata_snapshot->getColumns());
                 }
 
                 /// Send block to the client - input storage structure.
                 query_state->input_header = metadata_snapshot->getSampleBlock();
-                sendData(query_state.value(), query_state->input_header);
-                sendTimezone(query_state.value());
+                sendData(*query_state, query_state->input_header);
+                sendTimezone(*query_state);
 
                 /// Update flag after reading external tables
                 query_state->read_all_data = false;
@@ -661,9 +661,9 @@ void TCPHandler::runImpl()
 
                 std::lock_guard lock(callback_mutex);
 
-                checkIfQueryCanceled(query_state.value());
+                checkIfQueryCanceled(*query_state);
 
-                if (receivePacketsExpectData(query_state.value()))
+                if (receivePacketsExpectData(*query_state))
                     return query_state->block_for_input;
 
                 query_state->block_in.reset();
@@ -681,13 +681,13 @@ void TCPHandler::runImpl()
 
                 std::lock_guard lock(callback_mutex);
 
-                checkIfQueryCanceled(query_state.value());
+                checkIfQueryCanceled(*query_state);
 
                 sendReadTaskRequest();
 
                 ProfileEvents::increment(ProfileEvents::ReadTaskRequestsSent);
 
-                auto res = receiveClusterFunctionReadTaskResponse(query_state.value());
+                auto res = receiveClusterFunctionReadTaskResponse(*query_state);
 
                 ProfileEvents::increment(ProfileEvents::ReadTaskRequestsSentElapsedMicroseconds, watch.elapsedMicroseconds());
 
@@ -701,9 +701,9 @@ void TCPHandler::runImpl()
 
                 std::lock_guard lock(callback_mutex);
 
-                checkIfQueryCanceled(query_state.value());
+                checkIfQueryCanceled(*query_state);
 
-                sendMergeTreeAllRangesAnnouncement(query_state.value(), announcement);
+                sendMergeTreeAllRangesAnnouncement(*query_state, announcement);
                 ProfileEvents::increment(ProfileEvents::MergeTreeAllRangesAnnouncementsSent);
                 ProfileEvents::increment(ProfileEvents::MergeTreeAllRangesAnnouncementsSentElapsedMicroseconds, watch.elapsedMicroseconds());
             });
@@ -720,7 +720,7 @@ void TCPHandler::runImpl()
                 sendMergeTreeReadTaskRequest(std::move(request));
 
                 ProfileEvents::increment(ProfileEvents::MergeTreeReadTaskRequestsSent);
-                auto res = receivePartitionMergeTreeReadTaskResponse(query_state.value());
+                auto res = receivePartitionMergeTreeReadTaskResponse(*query_state);
                 ProfileEvents::increment(ProfileEvents::MergeTreeReadTaskRequestsSentElapsedMicroseconds, watch.elapsedMicroseconds());
                 return res;
             });
@@ -745,12 +745,12 @@ void TCPHandler::runImpl()
             {
                 /// FIXME: check explicitly that insert query suggests to receive data via native protocol,
                 query_state->need_receive_data_for_insert = true;
-                processInsertQuery(query_state.value());
+                processInsertQuery(*query_state);
                 query_state->io.onFinish();
             }
             else if (query_state->io.pipeline.pulling())
             {
-                processOrdinaryQuery(query_state.value());
+                processOrdinaryQuery(*query_state);
                 query_state->io.onFinish();
             }
             else if (query_state->io.pipeline.completed())
@@ -765,14 +765,14 @@ void TCPHandler::runImpl()
                         {
                             std::lock_guard lock(callback_mutex);
 
-                            receivePacketsExpectCancel(query_state.value());
+                            receivePacketsExpectCancel(*query_state);
 
                             if (query_state->stop_read_return_partial_result)
                                 return true;
 
-                            sendProgress(query_state.value());
-                            sendSelectProfileEvents(query_state.value());
-                            sendLogs(query_state.value());
+                            sendProgress(*query_state);
+                            sendSelectProfileEvents(*query_state);
+                            sendLogs(*query_state);
                             return false;
                         };
 
@@ -789,8 +789,8 @@ void TCPHandler::runImpl()
                 /// NOTE: we cannot send Progress for regular INSERT (with VALUES)
                 /// without breaking protocol compatibility, but it can be done
                 /// by increasing revision.
-                sendProgress(query_state.value());
-                sendSelectProfileEvents(query_state.value());
+                sendProgress(*query_state);
+                sendSelectProfileEvents(*query_state);
             }
             else
             {
@@ -800,8 +800,8 @@ void TCPHandler::runImpl()
             /// Do it before sending end of stream, to have a chance to show log message in client.
             query_scope->logPeakMemoryUsage();
 
-            sendLogs(query_state.value());
-            sendEndOfStream(query_state.value());
+            sendLogs(*query_state);
+            sendEndOfStream(*query_state);
 
             query_state->finalizeOut(out);
         }
@@ -818,7 +818,7 @@ void TCPHandler::runImpl()
 #ifdef DEBUG_OR_SANITIZER_BUILD
         catch (const std::logic_error & e)
         {
-            if (query_state.has_value())
+            if (query_state)
                 query_state->io.onException();
             exception = std::make_unique<DB::Exception>(Exception::CreateFromSTDTag{}, e);
             sendException(*exception, send_exception_with_stack_trace);
@@ -838,7 +838,7 @@ void TCPHandler::runImpl()
         {
             auto exception_code = exception->code();
 
-            if (!query_state.has_value())
+            if (!query_state)
                 return;
 
             try
@@ -893,17 +893,17 @@ void TCPHandler::runImpl()
 
                 /// Try to send logs to client, but it could be risky too
                 /// Assume that we can't break output here
-                sendLogs(query_state.value());
+                sendLogs(*query_state);
 
                 if (exception_code == ErrorCodes::QUERY_WAS_CANCELLED_BY_CLIENT)
-                    sendEndOfStream(query_state.value());
+                    sendEndOfStream(*query_state);
                 else
                     sendException(*exception, send_exception_with_stack_trace);
 
                 /// A query packet is always followed by one or more data packets.
                 /// If some of those data packets are left, try to skip them.
                 if (!query_state->read_all_data)
-                    skipData(query_state.value());
+                    skipData(*query_state);
 
                 LOG_TRACE(log, "Logs and exception has been sent. The connection is preserved.");
             }
@@ -956,7 +956,7 @@ void TCPHandler::extractConnectionSettingsFromContext(const ContextPtr & context
 }
 
 
-bool TCPHandler::receivePacketsExpectQuery(std::optional<QueryState> & state)
+bool TCPHandler::receivePacketsExpectQuery(std::shared_ptr<QueryState> & state)
 {
     UInt64 packet_type = 0;
     readVarUInt(packet_type, *in);
@@ -2043,12 +2043,13 @@ void TCPHandler::processClusterNameAndSalt()
 }
 
 
-void TCPHandler::processQuery(std::optional<QueryState> & state)
+void TCPHandler::processQuery(std::shared_ptr<QueryState> & state)
 {
     UInt64 stage = 0;
     UInt64 compression = 0;
 
-    state.emplace();
+    chassert(!state);
+    state = std::make_shared<QueryState>();
 
     if (part_uuids_to_ignore.has_value())
         state->part_uuids_to_ignore = std::move(part_uuids_to_ignore);
@@ -2211,19 +2212,36 @@ void TCPHandler::processQuery(std::optional<QueryState> & state)
     if (state->part_uuids_to_ignore)
         state->query_context->getIgnoredPartUUIDs()->add(*state->part_uuids_to_ignore);
 
+    std::weak_ptr<QueryState> state_wptr = state;
+
     state->query_context->setProgressCallback(
-        [this, &state] (const Progress & value) { this->updateProgress(state.value(), value); });
+        [this, state_wptr](const Progress & value)
+        {
+            auto current_state = state_wptr.lock();
+            if (!current_state)
+                return;
+            this->updateProgress(*current_state, value);
+        });
     state->query_context->setFileProgressCallback(
-        [this, &state](const FileProgress & value) { this->updateProgress(state.value(), Progress(value)); });
+        [this, state_wptr](const FileProgress & value)
+        {
+            auto current_state = state_wptr.lock();
+            if (!current_state)
+                return;
+            this->updateProgress(*current_state, Progress(value));
+        });
 
     state->query_context->setBlockMarshallingCallback(
-        [this, &state](const Block & block)
+        [this, &state_wptr](const Block & block)
         {
+            auto current_state = state_wptr.lock();
+            if (!current_state)
+                return block;
             return convertColumnsToBLOBs(
                 block,
-                getCompressionCodec(state->query_context->getSettingsRef(), state->compression),
+                getCompressionCodec(current_state->query_context->getSettingsRef(), current_state->compression),
                 client_tcp_protocol_version,
-                getFormatSettings(state->query_context));
+                getFormatSettings(current_state->query_context));
         });
 
     ///
diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h
index 36a2bf8eb583..ff21da36d3e8 100644
--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@@ -275,7 +275,7 @@ class TCPHandler : public Poco::Net::TCPServerConnection
     void receiveHello();
     bool receiveQueryPlan(QueryState & state);
     void receiveAddendum();
-    bool receivePacketsExpectQuery(std::optional<QueryState> & state);
+    bool receivePacketsExpectQuery(std::shared_ptr<QueryState> & state);
     bool receivePacketsExpectData(QueryState & state) TSA_REQUIRES(callback_mutex);
     bool receivePacketsExpectDataConcurrentWithExecutor(QueryState & state);
     void receivePacketsExpectCancel(QueryState & state) TSA_REQUIRES(callback_mutex);
@@ -285,7 +285,7 @@ class TCPHandler : public Poco::Net::TCPServerConnection
     std::optional<ParallelReadResponse> receivePartitionMergeTreeReadTaskResponse(QueryState & state) TSA_REQUIRES(callback_mutex);
 
     void processCancel(QueryState & state) TSA_REQUIRES(callback_mutex);
-    void processQuery(std::optional<QueryState> & state);
+    void processQuery(std::shared_ptr<QueryState> & state);
     void processIgnoredPartUUIDs();
     bool processData(QueryState & state, bool scalar) TSA_REQUIRES(callback_mutex);
     void processClusterNameAndSalt();

From 870fd2b3658b6018af3a454568792eac0af6babf Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 31 Oct 2025 12:16:54 +0000
Subject: [PATCH 047/112] Backport #89068 to 25.8: Fix `clusterAllReplicas`
 queries with external role when it was dropped

---
 src/QueryPipeline/RemoteQueryExecutor.cpp     |  3 +-
 ...702_not_existing_role_on_cluster.reference |  0
 .../03702_not_existing_role_on_cluster.sh     | 45 +++++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/03702_not_existing_role_on_cluster.reference
 create mode 100755 tests/queries/0_stateless/03702_not_existing_role_on_cluster.sh

diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp
index 9984ca24117f..fa4794555ee4 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.cpp
+++ b/src/QueryPipeline/RemoteQueryExecutor.cpp
@@ -425,7 +425,8 @@ void RemoteQueryExecutor::sendQueryUnlocked(ClientInfo::QueryKind query_kind, As
             const auto & access_control = context->getAccessControl();
             for (const auto & e : user->granted_roles.getElements())
             {
-                auto names = access_control.readNames(e.ids);
+                // `tryReadNames` instead of `readNames` because the original user might have a dropped role.
+                auto names = access_control.tryReadNames(e.ids);
                 granted_roles.insert(names.begin(), names.end());
             }
         }
diff --git a/tests/queries/0_stateless/03702_not_existing_role_on_cluster.reference b/tests/queries/0_stateless/03702_not_existing_role_on_cluster.reference
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/queries/0_stateless/03702_not_existing_role_on_cluster.sh b/tests/queries/0_stateless/03702_not_existing_role_on_cluster.sh
new file mode 100755
index 000000000000..d0ed2c9c4f9a
--- /dev/null
+++ b/tests/queries/0_stateless/03702_not_existing_role_on_cluster.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+# Tags: no-replicated-database, no-parallel
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+user="user03702_${CLICKHOUSE_DATABASE}_$RANDOM"
+role="role03702_${CLICKHOUSE_DATABASE}_$RANDOM"
+db=${CLICKHOUSE_DATABASE}
+
+${CLICKHOUSE_CLIENT} <<EOF
+DROP DATABASE IF EXISTS shard_0;
+DROP DATABASE IF EXISTS shard_1;
+
+CREATE DATABASE IF NOT EXISTS shard_0;
+CREATE DATABASE IF NOT EXISTS shard_1;
+
+SET distributed_ddl_output_mode = 'none';
+DROP USER IF EXISTS $user ON CLUSTER test_cluster_two_shards_different_databases;
+CREATE USER $user ON CLUSTER test_cluster_two_shards_different_databases;
+
+DROP ROLE IF EXISTS $role ON CLUSTER test_cluster_two_shards_different_databases;
+CREATE ROLE $role ON CLUSTER test_cluster_two_shards_different_databases;
+
+GRANT REMOTE ON *.* TO $user ON CLUSTER test_cluster_two_shards_different_databases;
+GRANT SELECT ON *.* TO $role ON CLUSTER test_cluster_two_shards_different_databases;
+
+GRANT $role TO $user ON CLUSTER test_cluster_two_shards_different_databases;
+DROP ROLE $role ON CLUSTER test_cluster_two_shards_different_databases;
+EOF
+
+${CLICKHOUSE_CLIENT} --user $user <<EOF
+SELECT
+    hostName() AS h,
+    count()
+FROM clusterAllReplicas('test_cluster_two_shards_different_databases', system.one)
+GROUP BY h
+FORMAT Null;
+EOF
+
+${CLICKHOUSE_CLIENT} <<EOF
+SET distributed_ddl_output_mode = 'none';
+DROP USER IF EXISTS $user ON CLUSTER test_cluster_two_shards_different_databases;
+EOF

From 13db77b1fab1e51452142e7e47197cae0b038933 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 31 Oct 2025 17:14:35 +0000
Subject: [PATCH 048/112] Backport #89133 to 25.8: Fix allocation size
 calculation for baseXXEncode/Decode functions

---
 src/Functions/FunctionBase32Conversion.h              | 11 +++++------
 src/Functions/FunctionBase58Conversion.h              |  6 +++---
 .../03702_encode_decode_memory_usage.reference        |  0
 .../0_stateless/03702_encode_decode_memory_usage.sql  |  5 +++++
 4 files changed, 13 insertions(+), 9 deletions(-)
 create mode 100644 tests/queries/0_stateless/03702_encode_decode_memory_usage.reference
 create mode 100644 tests/queries/0_stateless/03702_encode_decode_memory_usage.sql

diff --git a/src/Functions/FunctionBase32Conversion.h b/src/Functions/FunctionBase32Conversion.h
index 251a409444ed..024860971258 100644
--- a/src/Functions/FunctionBase32Conversion.h
+++ b/src/Functions/FunctionBase32Conversion.h
@@ -11,11 +11,10 @@ struct Base32EncodeTraits
     template <typename Col>
     static size_t getBufferSize(Col const & src_column)
     {
-        auto const src_length = src_column.getChars().size();
-        auto const string_count = src_column.size();
+        auto const src_length = src_column.getChars().size() + src_column.size();
         /// Every 5 bytes becomes 8 bytes in base32
         /// Add padding for incomplete blocks and round up
-        return (src_length + 4) / 5 * 8 * string_count;
+        return (src_length + 4) / 5 * 8;
     }
 
     static size_t perform(std::string_view src, UInt8 * dst)
@@ -29,10 +28,10 @@ struct Base32DecodeTraits
     template <typename Col>
     static size_t getBufferSize(Col const & src_column)
     {
-        auto const string_length = src_column.byteSize();
-        auto const string_count = src_column.size();
+        /// This function can be used for FixedString columns so we need to take into account NULL terminator
+        auto const string_length = src_column.getChars().size() + src_column.size();
         /// decoded size is at most length of encoded (every 8 bytes becomes at most 5 bytes)
-        return (string_length * 5 + 7) / 8 * string_count;
+        return (string_length * 5 + 7) / 8;
     }
 
     static std::optional<size_t> perform(std::string_view src, UInt8 * dst)
diff --git a/src/Functions/FunctionBase58Conversion.h b/src/Functions/FunctionBase58Conversion.h
index 25c8f60eb215..ba45a364f597 100644
--- a/src/Functions/FunctionBase58Conversion.h
+++ b/src/Functions/FunctionBase58Conversion.h
@@ -29,10 +29,10 @@ struct Base58DecodeTraits
     template <typename Col>
     static size_t getBufferSize(Col const & src_column)
     {
-        auto const string_length = src_column.byteSize();
-        auto const string_count = src_column.size();
+        /// This function can be used for FixedString columns so we need to take into account NULL terminator
+        auto const string_length = src_column.getChars().size() + src_column.size();
         /// decoded size is at most length of encoded (every 8 bytes becomes at most 6 bytes)
-        return (string_length * 6 + 7) / 8 * string_count;
+        return (string_length * 6 + 7) / 8;
     }
 
     static std::optional<size_t> perform(std::string_view src, UInt8 * dst)
diff --git a/tests/queries/0_stateless/03702_encode_decode_memory_usage.reference b/tests/queries/0_stateless/03702_encode_decode_memory_usage.reference
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/queries/0_stateless/03702_encode_decode_memory_usage.sql b/tests/queries/0_stateless/03702_encode_decode_memory_usage.sql
new file mode 100644
index 000000000000..1c7eaa5d6aee
--- /dev/null
+++ b/tests/queries/0_stateless/03702_encode_decode_memory_usage.sql
@@ -0,0 +1,5 @@
+-- Tags: no-fasttest
+
+SELECT base32Decode(s) FROM (SELECT base32Encode(randomString(100)) AS s FROM numbers(100000)) FORMAT Null;
+SELECT base58Decode(s) FROM (SELECT base58Encode(randomString(100)) AS s FROM numbers(100000)) FORMAT Null;
+SELECT base64Decode(s) FROM (SELECT base64Encode(randomString(100)) AS s FROM numbers(100000)) FORMAT Null;
\ No newline at end of file

From 75ef622376380223492fd22f765ae5be75273c9c Mon Sep 17 00:00:00 2001
From: robot-ch-test-poll3
 <69307580+robot-ch-test-poll3@users.noreply.github.com>
Date: Sat, 1 Nov 2025 00:21:01 +0700
Subject: [PATCH 049/112] Backport #89203 to 25.8: Fix use-after-free crash if
 table was dropped during RMV refresh (#89264)

Co-authored-by: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
---
 src/Processors/Executors/PipelineExecutor.h   |  5 ++-
 src/Storages/MaterializedView/RefreshTask.cpp | 44 ++++++++++---------
 2 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h
index e8355d0c9245..2481f85e349f 100644
--- a/src/Processors/Executors/PipelineExecutor.h
+++ b/src/Processors/Executors/PipelineExecutor.h
@@ -30,10 +30,13 @@ class PipelineExecutor
 public:
     /// Get pipeline as a set of processors.
     /// Processors should represent full graph. All ports must be connected, all connected nodes are mentioned in set.
-    /// Executor doesn't own processors, just stores reference.
     /// During pipeline execution new processors can appear. They will be added to existing set.
     ///
     /// Explicit graph representation is built in constructor. Throws if graph is not correct.
+    ///
+    /// PipelineExecutor must be destroyed before the corresponding QueryPipeline, because
+    /// QueryPlanResourceHolder may hold some resources referenced by processors and used in
+    /// processor destructors.
     explicit PipelineExecutor(std::shared_ptr<Processors> & processors, QueryStatusPtr elem);
     ~PipelineExecutor();
 
diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp
index 11bc0ff24715..f993395509ab 100644
--- a/src/Storages/MaterializedView/RefreshTask.cpp
+++ b/src/Storages/MaterializedView/RefreshTask.cpp
@@ -729,30 +729,34 @@ std::optional<UUID> RefreshTask::executeRefreshUnlocked(bool append, int32_t roo
                 throw Exception(
                     ErrorCodes::LOGICAL_ERROR, "Pipeline for view {} refresh must be completed", view_storage_id.getFullTableName());
 
-            PipelineExecutor executor(pipeline.processors, pipeline.process_list_element);
-            executor.setReadProgressCallback(pipeline.getReadProgressCallback());
-
             {
-                std::unique_lock exec_lock(execution.executor_mutex);
+                PipelineExecutor executor(pipeline.processors, pipeline.process_list_element);
+                executor.setReadProgressCallback(pipeline.getReadProgressCallback());
+
+                {
+                    std::unique_lock exec_lock(execution.executor_mutex);
+                    if (execution.interrupt_execution.load())
+                        throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, "Refresh for view {} cancelled", view_storage_id.getFullTableName());
+                    execution.executor = &executor;
+                }
+                SCOPE_EXIT({
+                    std::unique_lock exec_lock(execution.executor_mutex);
+                    execution.executor = nullptr;
+                });
+
+                executor.execute(pipeline.getNumThreads(), pipeline.getConcurrencyControl());
+
+                /// A cancelled PipelineExecutor may return without exception but with incomplete results.
+                /// In this case make sure to:
+                ///  * report exception rather than success,
+                ///  * do it before destroying the QueryPipeline; otherwise it may fail assertions about
+                ///    being unexpectedly destroyed before completion and without uncaught exception
+                ///    (specifically, the assert in ~WriteBuffer()).
                 if (execution.interrupt_execution.load())
                     throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, "Refresh for view {} cancelled", view_storage_id.getFullTableName());
-                execution.executor = &executor;
-            }
-            SCOPE_EXIT({
-                std::unique_lock exec_lock(execution.executor_mutex);
-                execution.executor = nullptr;
-            });
 
-            executor.execute(pipeline.getNumThreads(), pipeline.getConcurrencyControl());
-
-            /// A cancelled PipelineExecutor may return without exception but with incomplete results.
-            /// In this case make sure to:
-            ///  * report exception rather than success,
-            ///  * do it before destroying the QueryPipeline; otherwise it may fail assertions about
-            ///    being unexpectedly destroyed before completion and without uncaught exception
-            ///    (specifically, the assert in ~WriteBuffer()).
-            if (execution.interrupt_execution.load())
-                throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, "Refresh for view {} cancelled", view_storage_id.getFullTableName());
+                /// `executor` must be destroyed before `pipeline`!
+            }
 
             logQueryFinish(*query_log_elem, refresh_context, refresh_query, std::move(pipeline), /*pulling_pipeline=*/false, query_span, QueryResultCacheUsage::None, /*internal=*/false);
             query_log_elem = std::nullopt;

From 0c272ae2097622e91a26ef3b15beecee2897c576 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 31 Oct 2025 23:11:51 +0000
Subject: [PATCH 050/112] Backport #89174 to 25.8: Avoid possible data-races
 due to mutable exceptions while parsing Parquet

---
 base/poco/Foundation/include/Poco/Exception.h |  2 +-
 src/Access/Authentication.h                   |  3 +++
 src/Client/ClientBase.cpp                     |  3 +++
 src/Common/Exception.cpp                      | 23 +++++++++++++++++++
 src/Common/Exception.h                        |  4 ++++
 src/Common/mysqlxx/mysqlxx/Exception.h        | 15 ++++++++++++
 src/IO/S3Common.h                             |  3 +++
 .../Formats/Impl/ParquetBlockInputFormat.cpp  |  4 +++-
 8 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/base/poco/Foundation/include/Poco/Exception.h b/base/poco/Foundation/include/Poco/Exception.h
index 59a31e77c3cb..9efd824ebec0 100644
--- a/base/poco/Foundation/include/Poco/Exception.h
+++ b/base/poco/Foundation/include/Poco/Exception.h
@@ -84,7 +84,7 @@ class Foundation_API Exception : public std::exception
     /// The copy can later be thrown again by
     /// invoking rethrow() on it.
 
-    virtual void rethrow() const;
+    [[noreturn]] virtual void rethrow() const;
     /// (Re)Throws the exception.
     ///
     /// This is useful for temporarily storing a
diff --git a/src/Access/Authentication.h b/src/Access/Authentication.h
index b4ba11c71ea3..2c73aa96698c 100644
--- a/src/Access/Authentication.h
+++ b/src/Access/Authentication.h
@@ -38,6 +38,9 @@ struct Authentication
         explicit Require(const String & realm_);
         const String & getRealm() const;
 
+        Require * clone() const override { return new Require(*this); }
+        void rethrow() const override { throw *this; } /// NOLINT(cert-err60-cpp)
+
     private:
         const String realm;
     };
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 7ef943acdde8..8fe062107fe7 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -339,6 +339,9 @@ class LocalFormatError : public Exception
 {
 public:
     using Exception::Exception;
+
+    LocalFormatError * clone() const override { return new LocalFormatError(*this); }
+    void rethrow() const override { throw *this; } /// NOLINT(cert-err60-cpp)
 };
 
 
diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp
index fbba3e83cc42..43f769c78d52 100644
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@@ -779,4 +779,27 @@ ExecutionStatus ExecutionStatus::fromText(const std::string & data)
     return status;
 }
 
+std::exception_ptr copyMutableException(std::exception_ptr ptr)
+{
+    try
+    {
+        std::rethrow_exception(ptr);
+    }
+    catch (Poco::Exception & e)
+    {
+        try
+        {
+            e.rethrow();
+        }
+        catch (...)
+        {
+            return std::current_exception();
+        }
+    }
+    catch (...)
+    {
+        return std::current_exception();
+    }
+}
+
 }
diff --git a/src/Common/Exception.h b/src/Common/Exception.h
index 151e78cbaf7a..f21eb3084c2a 100644
--- a/src/Common/Exception.h
+++ b/src/Common/Exception.h
@@ -376,4 +376,8 @@ T current_exception_cast()
     }
 }
 
+/// Return copy of a current exception if it is a Poco::Exception (DB::Exception), since this exception is mutable, and returning reference is unsafe.
+/// And a reference otherwise.
+std::exception_ptr copyMutableException(std::exception_ptr ptr);
+
 }
diff --git a/src/Common/mysqlxx/mysqlxx/Exception.h b/src/Common/mysqlxx/mysqlxx/Exception.h
index 54153a7f1506..700f2b3afe02 100644
--- a/src/Common/mysqlxx/mysqlxx/Exception.h
+++ b/src/Common/mysqlxx/mysqlxx/Exception.h
@@ -14,6 +14,9 @@ struct Exception : public Poco::Exception
     int errnum() const { return code(); }
     const char * name() const noexcept override { return "mysqlxx::Exception"; }
     const char * className() const noexcept override { return "mysqlxx::Exception"; }
+
+    Exception * clone() const override { return new Exception(*this); }
+    void rethrow() const override { throw *this; } /// NOLINT(cert-err60-cpp)
 };
 
 
@@ -23,6 +26,9 @@ struct ConnectionFailed : public Exception
     explicit ConnectionFailed(const std::string & msg, int code = 0) : Exception(msg, code) {}
     const char * name() const noexcept override { return "mysqlxx::ConnectionFailed"; }
     const char * className() const noexcept override { return "mysqlxx::ConnectionFailed"; }
+
+    ConnectionFailed * clone() const override { return new ConnectionFailed(*this); }
+    void rethrow() const override { throw *this; } /// NOLINT(cert-err60-cpp)
 };
 
 
@@ -32,6 +38,9 @@ struct ConnectionLost : public Exception
     explicit ConnectionLost(const std::string & msg, int code = 0) : Exception(msg, code) {}
     const char * name() const noexcept override { return "mysqlxx::ConnectionLost"; }
     const char * className() const noexcept override { return "mysqlxx::ConnectionLost"; }
+
+    ConnectionLost * clone() const override { return new ConnectionLost(*this); }
+    void rethrow() const override { throw *this; } /// NOLINT(cert-err60-cpp)
 };
 
 
@@ -41,6 +50,9 @@ struct BadQuery : public Exception
     explicit BadQuery(const std::string & msg, int code = 0) : Exception(msg, code) {}
     const char * name() const noexcept override { return "mysqlxx::BadQuery"; }
     const char * className() const noexcept override { return "mysqlxx::BadQuery"; }
+
+    BadQuery * clone() const override { return new BadQuery(*this); }
+    void rethrow() const override { throw *this; } /// NOLINT(cert-err60-cpp)
 };
 
 
@@ -50,6 +62,9 @@ struct CannotParseValue : public Exception
     explicit CannotParseValue(const std::string & msg, int code = 0) : Exception(msg, code) {}
     const char * name() const noexcept override { return "mysqlxx::CannotParseValue"; }
     const char * className() const noexcept override { return "mysqlxx::CannotParseValue"; }
+
+    CannotParseValue * clone() const override { return new CannotParseValue(*this); }
+    void rethrow() const override { throw *this; } /// NOLINT(cert-err60-cpp)
 };
 
 
diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h
index 79fe7a8650b1..3f095307ae64 100644
--- a/src/IO/S3Common.h
+++ b/src/IO/S3Common.h
@@ -47,6 +47,9 @@ class S3Exception : public Exception
 
     bool isRetryableError() const;
 
+    S3Exception * clone() const override { return new S3Exception(*this); }
+    void rethrow() const override { throw *this; } /// NOLINT(cert-err60-cpp)
+
 private:
     Aws::S3::S3Errors code;
 };
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index e65beb30d385..06b69ca71e3f 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -1169,7 +1169,9 @@ Chunk ParquetBlockInputFormat::read()
         if (background_exception)
         {
             is_stopped = true;
-            std::rethrow_exception(background_exception);
+            /// This exception can be mutated (addMessage()) in IInputFormat::generate(),
+            /// so we need to copy it (copyMutableException()) to avoid sharing mutable exception between multiple threads
+            std::rethrow_exception(copyMutableException(background_exception));
         }
         if (is_stopped)
             return {};

From aee08f1cc6b3e64a3912a5c7c244d4a4fb07cf3f Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 3 Nov 2025 17:13:04 +0000
Subject: [PATCH 051/112] Backport #89177 to 25.8: Calculate all subquery sets
 inplace before Iceberg partition pruning

---
 .../QueryPlan/ReadFromObjectStorageStep.cpp   |  9 +++
 src/Storages/VirtualColumnUtils.cpp           | 19 ++++-
 src/Storages/VirtualColumnUtils.h             |  3 +
 ...est_partition_pruning_with_subquery_set.py | 79 +++++++++++++++++++
 ...functions_with_parallel_replicas.reference |  3 -
 5 files changed, 108 insertions(+), 5 deletions(-)
 create mode 100644 tests/integration/test_storage_iceberg_with_spark/test_partition_pruning_with_subquery_set.py

diff --git a/src/Processors/QueryPlan/ReadFromObjectStorageStep.cpp b/src/Processors/QueryPlan/ReadFromObjectStorageStep.cpp
index bb37a5fea25a..a182b2f84034 100644
--- a/src/Processors/QueryPlan/ReadFromObjectStorageStep.cpp
+++ b/src/Processors/QueryPlan/ReadFromObjectStorageStep.cpp
@@ -15,6 +15,7 @@
 #include <Formats/FormatFactory.h>
 #include <IO/ReadBufferFromString.h>
 #include <Interpreters/Context.h>
+#include <Storages/VirtualColumnUtils.h>
 
 
 namespace DB
@@ -61,6 +62,14 @@ QueryPlanStepPtr ReadFromObjectStorageStep::clone() const
 void ReadFromObjectStorageStep::applyFilters(ActionDAGNodes added_filter_nodes)
 {
     SourceStepWithFilter::applyFilters(std::move(added_filter_nodes));
+    // It is important to build the inplace sets for the filter here, before reading data from object storage.
+    // If we delay building these sets until later in the pipeline, the filter can be applied after the data
+    // has already been read, potentially in parallel across many streams. This can significantly reduce the
+    // effectiveness of an Iceberg partition pruning, as unnecessary data may be read. Additionally, building ordered sets
+    // at this stage enables the KeyCondition class to apply more efficient optimizations than for unordered sets.
+    if (!filter_actions_dag)
+        return;
+    VirtualColumnUtils::buildOrderedSetsForDAG(*filter_actions_dag, getContext());
 }
 
 void ReadFromObjectStorageStep::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value)
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index d2c201b4219f..462a56bb1e92 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -64,7 +64,7 @@ namespace DB
 namespace VirtualColumnUtils
 {
 
-void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context)
+void buildSetsForDagImpl(const ActionsDAG & dag, const ContextPtr & context, bool ordered)
 {
     for (const auto & node : dag.getNodes())
     {
@@ -80,13 +80,28 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context)
                 if (!future_set->get())
                 {
                     if (auto * set_from_subquery = typeid_cast<FutureSetFromSubquery *>(future_set.get()))
-                        set_from_subquery->buildSetInplace(context);
+                    {
+                        if (ordered)
+                            set_from_subquery->buildOrderedSetInplace(context);
+                        else
+                            set_from_subquery->buildSetInplace(context);
+                    }
                 }
             }
         }
     }
 }
 
+void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context)
+{
+    buildSetsForDagImpl(dag, context, /* ordered = */ false);
+}
+
+void buildOrderedSetsForDAG(const ActionsDAG & dag, const ContextPtr & context)
+{
+    buildSetsForDagImpl(dag, context, /* ordered = */ true);
+}
+
 ExpressionActionsPtr buildFilterExpression(ActionsDAG dag, ContextPtr context)
 {
     buildSetsForDAG(dag, context);
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index 6dd4cd00054b..7c8829080eb3 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -40,6 +40,9 @@ void filterBlockWithExpression(const ExpressionActionsPtr & actions, Block & blo
 /// Builds sets used by ActionsDAG inplace.
 void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context);
 
+/// Builds ordered sets used by ActionsDAG inplace.
+void buildOrderedSetsForDAG(const ActionsDAG & dag, const ContextPtr & context);
+
 /// Checks if all functions used in DAG are deterministic.
 bool isDeterministic(const ActionsDAG::Node * node);
 
diff --git a/tests/integration/test_storage_iceberg_with_spark/test_partition_pruning_with_subquery_set.py b/tests/integration/test_storage_iceberg_with_spark/test_partition_pruning_with_subquery_set.py
new file mode 100644
index 000000000000..5567f618b2da
--- /dev/null
+++ b/tests/integration/test_storage_iceberg_with_spark/test_partition_pruning_with_subquery_set.py
@@ -0,0 +1,79 @@
+import pytest
+
+from helpers.iceberg_utils import (
+    check_validity_and_get_prunned_files_general,
+    execute_spark_query_general,
+    get_creation_expression,
+    get_uuid_str
+)
+
+@pytest.mark.parametrize(
+    "storage_type",
+    ["s3", "azure", "local"],
+)
+def test_partition_pruning_with_subquery_set(started_cluster_iceberg_with_spark, storage_type):
+    instance = started_cluster_iceberg_with_spark.instances["node1"]
+    spark = started_cluster_iceberg_with_spark.spark_session
+    TABLE_NAME = "test_partition_pruning_" + storage_type + "_" + get_uuid_str()
+    IN_MEMORY_TABLE = "in_memory_table_" + get_uuid_str()
+
+    def execute_spark_query(query: str):
+        return execute_spark_query_general(
+            spark,
+            started_cluster_iceberg_with_spark,
+            storage_type,
+            TABLE_NAME,
+            query,
+        )
+
+    execute_spark_query(
+        f"""
+            CREATE TABLE {TABLE_NAME} (
+                id INT,
+                data STRING
+            )
+            USING iceberg
+            PARTITIONED BY (identity(id))
+            OPTIONS('format-version'='2')
+        """
+    )
+
+    execute_spark_query(
+        f"""
+        INSERT INTO {TABLE_NAME} VALUES
+        (1, 'a'),
+        (2, 'b'),
+        (3, 'c'),
+        (4, 'd'),
+        (5, 'e');
+    """
+    )
+
+
+    creation_expression = get_creation_expression(
+        storage_type, TABLE_NAME, started_cluster_iceberg_with_spark, table_function=True
+    )
+
+    instance.query(f"CREATE TABLE {IN_MEMORY_TABLE} (id INT) ENGINE = Memory")
+    instance.query(f"INSERT INTO {IN_MEMORY_TABLE} VALUES (2), (4)")
+
+
+    def check_validity_and_get_prunned_files(select_expression):
+        settings1 = {
+            "use_iceberg_partition_pruning": 0
+        }
+        settings2 = {
+            "use_iceberg_partition_pruning": 1
+        }
+        return check_validity_and_get_prunned_files_general(
+            instance, TABLE_NAME, settings1, settings2, 'IcebergPartitionPrunedFiles', select_expression
+        )
+
+    assert (
+        check_validity_and_get_prunned_files(
+            f"SELECT * FROM {creation_expression} WHERE id in (SELECT id FROM {IN_MEMORY_TABLE}) ORDER BY ALL"
+        )
+        == 3
+    )
+
+
diff --git a/tests/queries/0_stateless/03275_auto_cluster_functions_with_parallel_replicas.reference b/tests/queries/0_stateless/03275_auto_cluster_functions_with_parallel_replicas.reference
index 3fcc0142b38a..f634f7f3bce2 100644
--- a/tests/queries/0_stateless/03275_auto_cluster_functions_with_parallel_replicas.reference
+++ b/tests/queries/0_stateless/03275_auto_cluster_functions_with_parallel_replicas.reference
@@ -6,9 +6,6 @@ CreatingSets (Create sets before main query execution)
   Expression ((Project names + Projection))
     Filter ((WHERE + Change column names to column identifiers))
       ReadFromObjectStorage
-  CreatingSet (Create set for subquery)
-    Expression ((Project names + (Projection + Change column names to column identifiers)))
-      ReadFromObjectStorage
 Expression ((Project names + Projection))
   Aggregating
     Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))

From 941569d4502df540354c0f9a955868af0f367e17 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 3 Nov 2025 19:12:39 +0000
Subject: [PATCH 052/112] Backport #88694 to 25.8: Fix possible "Context has
 expired" with the analylzer with subqueries

---
 src/Planner/Planner.cpp                       |  1 +
 ...n_index_analysis_context_expired.reference |  1 +
 ...uery_in_index_analysis_context_expired.sql | 28 +++++++++++++++++++
 3 files changed, 30 insertions(+)
 create mode 100644 tests/queries/0_stateless/03671_dict_in_subquery_in_index_analysis_context_expired.reference
 create mode 100644 tests/queries/0_stateless/03671_dict_in_subquery_in_index_analysis_context_expired.sql

diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 65f27253906f..3e2d7ad83db3 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -1256,6 +1256,7 @@ void addBuildSubqueriesForSetsStepIfNeeded(
             std::make_shared<GlobalPlannerContext>(nullptr, nullptr, FiltersForTableExpressionMap{}));
         subquery_planner.buildQueryPlanIfNeeded();
 
+        query_plan.addInterpreterContext(subquery_planner.getPlannerContext()->getQueryContext());
         subquery->setQueryPlan(std::make_unique<QueryPlan>(std::move(subquery_planner).extractQueryPlan()));
     }
 
diff --git a/tests/queries/0_stateless/03671_dict_in_subquery_in_index_analysis_context_expired.reference b/tests/queries/0_stateless/03671_dict_in_subquery_in_index_analysis_context_expired.reference
new file mode 100644
index 000000000000..d00491fd7e5b
--- /dev/null
+++ b/tests/queries/0_stateless/03671_dict_in_subquery_in_index_analysis_context_expired.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/03671_dict_in_subquery_in_index_analysis_context_expired.sql b/tests/queries/0_stateless/03671_dict_in_subquery_in_index_analysis_context_expired.sql
new file mode 100644
index 000000000000..df9af7c20afa
--- /dev/null
+++ b/tests/queries/0_stateless/03671_dict_in_subquery_in_index_analysis_context_expired.sql
@@ -0,0 +1,28 @@
+-- Tags: no-parallel-replicas
+
+DROP DICTIONARY IF EXISTS dict;
+DROP TABLE IF EXISTS info;
+DROP TABLE IF EXISTS ids;
+
+CREATE TABLE info (iid UInt32) ENGINE = MergeTree() ORDER BY iid;
+INSERT INTO info (iid) VALUES (1);
+
+CREATE TABLE ids (id Int64) ENGINE = MergeTree() ORDER BY ();
+INSERT INTO ids (id) VALUES (1);
+
+CREATE DICTIONARY dict
+(
+    id Int64,
+    children Array(Int64),
+)
+PRIMARY KEY id
+SOURCE(CLICKHOUSE(QUERY 'SELECT 1 id, [1] children'))
+LAYOUT(DIRECT());
+
+SELECT iid IN (SELECT DISTINCT arrayJoin(dictGet(dict, 'children', id)) FROM ids)
+FROM
+(
+    SELECT *
+    FROM info
+    WHERE (iid IN (SELECT DISTINCT arrayJoin(dictGet(dict, 'children', id)) FROM ids))
+);

From ca4779b3ebf8ac95fc11d7ac635554287a6d1e84 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 4 Nov 2025 02:43:40 +0000
Subject: [PATCH 053/112] Backport #88812 to 25.8: Fix propagation of size
 restrictions on DROP of inner tables

---
 src/Interpreters/InterpreterDropQuery.cpp                  | 5 +++++
 .../03667_drop_inner_table_size_limits.reference           | 0
 .../0_stateless/03667_drop_inner_table_size_limits.sql     | 7 +++++++
 3 files changed, 12 insertions(+)
 create mode 100644 tests/queries/0_stateless/03667_drop_inner_table_size_limits.reference
 create mode 100644 tests/queries/0_stateless/03667_drop_inner_table_size_limits.sql

diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index 026c19ded3fa..4e2dd6cb83a6 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -711,6 +711,11 @@ void InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind kind, ContextPtr
         /// and not allowed to drop inner table explicitly. Allowing to drop inner table without explicit grant
         /// looks like expected behaviour and we have tests for it.
         auto drop_context = Context::createCopy(global_context);
+
+        /// We need to propagate settings related to drop size limits,
+        ///  otherwise we will not be able to drop large inner tables.
+        drop_context->setSettings(current_context->getSettingsRef());
+
         if (ignore_sync_setting)
             drop_context->setSetting("database_atomic_wait_for_drop_and_detach_synchronously", false);
         drop_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY);
diff --git a/tests/queries/0_stateless/03667_drop_inner_table_size_limits.reference b/tests/queries/0_stateless/03667_drop_inner_table_size_limits.reference
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/queries/0_stateless/03667_drop_inner_table_size_limits.sql b/tests/queries/0_stateless/03667_drop_inner_table_size_limits.sql
new file mode 100644
index 000000000000..0c9f08e697e0
--- /dev/null
+++ b/tests/queries/0_stateless/03667_drop_inner_table_size_limits.sql
@@ -0,0 +1,7 @@
+CREATE TABLE t (id UInt64) ENGINE = MergeTree ORDER BY id;
+CREATE MATERIALIZED VIEW mv (id UInt64) ENGINE = MergeTree ORDER BY id AS SELECT id FROM t;
+INSERT INTO t SELECT number FROM numbers(1000);
+SET max_table_size_to_drop = 1, max_partition_size_to_drop = 1;
+DROP TABLE mv;  -- { serverError TABLE_SIZE_EXCEEDS_MAX_DROP_SIZE_LIMIT }
+SET max_table_size_to_drop = 0, max_partition_size_to_drop = 0;
+DROP TABLE mv;

From 436dff9e43b3d7dd2f0cc2e3e333a71d85bc5aab Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 4 Nov 2025 03:29:54 +0000
Subject: [PATCH 054/112] Backport #89395 to 25.8: Revert "Do not perform
 PrimaryKeyExpand step if skip index is already part of primary key"

---
 src/Core/Settings.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp
index 903d984dee49..2f2739a0e1dc 100644
--- a/src/Core/Settings.cpp
+++ b/src/Core/Settings.cpp
@@ -1497,6 +1497,16 @@ Using skip indexes may exclude rows (granules) containing the latest data which
 
 Possible values:
 
+- 0 — Disabled.
+- 1 — Enabled.
+)", 0) \
+    DECLARE(Bool, use_skip_indexes_on_data_read, false, R"(
+Enable using data skipping indexes during data reading.
+
+When enabled, skip indexes are evaluated dynamically at the time each data granule is being read, rather than being analyzed in advance before query execution begins. This can reduce query startup latency.
+
+Possible values:
+
 - 0 — Disabled.
 - 1 — Enabled.
 )", 0) \

From 8a1e45eec768fe217b1d149780f2580a41e34592 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 4 Nov 2025 13:16:36 +0100
Subject: [PATCH 055/112] Backport #89458 to 25.8: Fix handling of empty parts
 with non-empty projections during merge

---
 src/Storages/MergeTree/MergeTask.cpp              | 10 ----------
 ...rts_with_non_empty_projections_merge.reference |  9 +++++++++
 ...pty_parts_with_non_empty_projections_merge.sql | 15 +++++++++++++++
 3 files changed, 24 insertions(+), 10 deletions(-)
 create mode 100644 tests/queries/0_stateless/03707_empty_parts_with_non_empty_projections_merge.reference
 create mode 100644 tests/queries/0_stateless/03707_empty_parts_with_non_empty_projections_merge.sql

diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index 67c10dd85a74..8017194ccb16 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -1376,16 +1376,6 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c
             projection_parts.front()->name,
             projection_parts.back()->name);
 
-        /// Skip parts with empty parent parts.
-        chassert(global_ctx->future_part->parts.size() == projection_parts.size());
-        std::erase_if(
-            projection_parts,
-            [&](const auto & part)
-            {
-                size_t index = &part - projection_parts.data();
-                return global_ctx->future_part->parts[index]->isEmpty();
-            });
-
         auto projection_future_part = std::make_shared<FutureMergedMutatedPart>();
         projection_future_part->assign(std::move(projection_parts), /*patch_parts_=*/ {});
         projection_future_part->name = projection->name;
diff --git a/tests/queries/0_stateless/03707_empty_parts_with_non_empty_projections_merge.reference b/tests/queries/0_stateless/03707_empty_parts_with_non_empty_projections_merge.reference
new file mode 100644
index 000000000000..45bdb15c4b0e
--- /dev/null
+++ b/tests/queries/0_stateless/03707_empty_parts_with_non_empty_projections_merge.reference
@@ -0,0 +1,9 @@
+-- { echoOn }
+
+drop table if exists mt1;
+create table mt1 (time DateTime, projection proj (select min(time))) engine MergeTree order by () TTL time + interval 1 second settings remove_empty_parts=0, merge_with_ttl_timeout=0, deduplicate_merge_projection_mode='ignore';
+system stop merges mt1;
+insert into mt1 select number from numbers(4) settings max_block_size=1, min_insert_block_size_bytes=1;
+system start merges mt1;
+optimize table mt1 final;
+optimize table mt1 final;
diff --git a/tests/queries/0_stateless/03707_empty_parts_with_non_empty_projections_merge.sql b/tests/queries/0_stateless/03707_empty_parts_with_non_empty_projections_merge.sql
new file mode 100644
index 000000000000..75aaaa1ad963
--- /dev/null
+++ b/tests/queries/0_stateless/03707_empty_parts_with_non_empty_projections_merge.sql
@@ -0,0 +1,15 @@
+-- { echoOn }
+
+drop table if exists mt1;
+
+create table mt1 (time DateTime, projection proj (select min(time))) engine MergeTree order by () TTL time + interval 1 second settings remove_empty_parts=0, merge_with_ttl_timeout=0, deduplicate_merge_projection_mode='ignore';
+
+system stop merges mt1;
+
+insert into mt1 select number from numbers(4) settings max_block_size=1, min_insert_block_size_bytes=1;
+
+system start merges mt1;
+
+optimize table mt1 final;
+
+optimize table mt1 final;

From 6d7ed657b409ed9b5af9397f480050b8f0d2f022 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 4 Nov 2025 15:38:50 +0000
Subject: [PATCH 056/112] Backport #88866 to 25.8: Fix incompatibility between
 optimize_read_in_order and lazy materialization

---
 .../optimizeLazyMaterialization.cpp           |   2 +-
 ...terialization_with_read_in_order.reference |  86 +++++++
 ...azy_materialization_with_read_in_order.sql | 226 ++++++++++++++++++
 3 files changed, 313 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/03681_lazy_materialization_with_read_in_order.reference
 create mode 100644 tests/queries/0_stateless/03681_lazy_materialization_with_read_in_order.sql

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeLazyMaterialization.cpp b/src/Processors/QueryPlan/Optimizations/optimizeLazyMaterialization.cpp
index 7beea58fadea..950ac49c1055 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeLazyMaterialization.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeLazyMaterialization.cpp
@@ -241,7 +241,7 @@ bool optimizeLazyMaterialization(QueryPlan::Node & root, Stack & stack, QueryPla
     if (!sorting_step)
         return false;
 
-    if (sorting_step->getType() != SortingStep::Type::Full)
+    if (sorting_step->getType() != SortingStep::Type::Full && sorting_step->getType() != SortingStep::Type::FinishSorting)
         return false;
 
     const auto limit = limit_step->getLimit();
diff --git a/tests/queries/0_stateless/03681_lazy_materialization_with_read_in_order.reference b/tests/queries/0_stateless/03681_lazy_materialization_with_read_in_order.reference
new file mode 100644
index 000000000000..2e4a49172763
--- /dev/null
+++ b/tests/queries/0_stateless/03681_lazy_materialization_with_read_in_order.reference
@@ -0,0 +1,86 @@
+=== Test 1: ORDER BY a (sorting key) ===
+LazilyRead (Lazily Read)
+Lazily read columns: e, d, b, c
+Prefix sort description: __table1.a ASC
+Result sort description: __table1.a ASC
+ReadType: InOrder
+0	0
+1	2
+2	4
+3	6
+4	8
+=== Test 2: ORDER BY a with WHERE ===
+LazilyRead (Lazily Read)
+Lazily read columns: d, b, c
+Prefix sort description: __table1.a ASC
+Result sort description: __table1.a ASC
+ReadType: InOrder
+51	102
+52	104
+53	106
+54	108
+55	110
+=== Test 3: ORDER BY a with PREWHERE ===
+LazilyRead (Lazily Read)
+Lazily read columns: d, b, c
+Prefix sort description: __table1.a ASC
+Result sort description: __table1.a ASC
+ReadType: InOrder
+51	102
+52	104
+53	106
+54	108
+55	110
+=== Test 4: ORDER BY a, e (e should not be lazy) ===
+LazilyRead (Lazily Read)
+Lazily read columns: d, b, c
+Prefix sort description: __table1.a ASC
+Result sort description: __table1.a ASC, __table1.e ASC
+ReadType: InOrder
+0	0
+1	2
+2	4
+3	6
+4	8
+=== Test 5: ORDER BY a, a+1 ===
+LazilyRead (Lazily Read)
+Lazily read columns: e, d, b, c
+Prefix sort description: __table1.a ASC
+Result sort description: __table1.a ASC, plus(__table1.a, 1_UInt8) ASC
+ReadType: InOrder
+0	0
+1	2
+2	4
+3	6
+4	8
+=== Test 6: Verify ORDER BY ASC correctness ===
+1	one	10
+2	two	20
+3	three	30
+4	four	40
+5	five	50
+=== Test 7: Verify ORDER BY DESC correctness ===
+10	ten	100
+9	nine	90
+8	eight	80
+7	seven	70
+6	six	60
+=== Test 8: Verify filtering with ORDER BY ===
+5	five	50
+6	six	60
+7	seven	70
+8	eight	80
+9	nine	90
+10	ten	100
+=== Test 9: Compare with optimization disabled ===
+1	one	10
+2	two	20
+3	three	30
+4	four	40
+5	five	50
+=== Test 10: Verify EXPLAIN shows both optimizations ===
+LazilyRead (Lazily Read)
+Lazily read columns: data, score, value
+Prefix sort description: __table1.id ASC
+Result sort description: __table1.id ASC
+ReadType: InOrder
diff --git a/tests/queries/0_stateless/03681_lazy_materialization_with_read_in_order.sql b/tests/queries/0_stateless/03681_lazy_materialization_with_read_in_order.sql
new file mode 100644
index 000000000000..608d50257fd2
--- /dev/null
+++ b/tests/queries/0_stateless/03681_lazy_materialization_with_read_in_order.sql
@@ -0,0 +1,226 @@
+-- Test that lazy materialization works together with read-in-order optimization
+-- Tags: no-random-settings
+
+SET query_plan_optimize_lazy_materialization = 1;
+SET query_plan_max_limit_for_lazy_materialization = 10;
+SET optimize_read_in_order = 1;
+SET enable_analyzer = 1;
+SET parallel_replicas_local_plan = 1;
+
+DROP TABLE IF EXISTS test_lazy_read_in_order;
+
+-- Create a table with sorting key on column 'a'
+CREATE TABLE test_lazy_read_in_order
+(
+    a UInt64,
+    b String,
+    c String,
+    d String,
+    e UInt64
+) ENGINE = MergeTree()
+      ORDER BY a;
+
+-- Insert test data
+INSERT INTO test_lazy_read_in_order
+SELECT number,
+       repeat('b', 100),
+       repeat('c', 100),
+       repeat('d', 100),
+       number * 2
+FROM numbers(1000);
+
+-- Test 1: ORDER BY on sorting key column 'a' with LIMIT
+-- Should use both read-in-order AND lazy materialization
+-- Columns b, c, d should be lazily materialized since they're not used in ORDER BY or WHERE
+SELECT '=== Test 1: ORDER BY a (sorting key) ===';
+SELECT trimLeft(explain)
+FROM (
+    EXPLAIN PLAN actions=1
+    SELECT a, b, c, d, e
+    FROM test_lazy_read_in_order
+    ORDER BY a
+    LIMIT 5
+    SETTINGS max_threads=1
+)
+WHERE explain LIKE '%LazilyRead%'
+   OR explain LIKE '%Lazily read columns:%'
+   OR explain LIKE '%ReadType:%'
+   OR explain LIKE '%Prefix sort description:%'
+   OR explain LIKE '%Result sort description:%';
+
+SELECT a, e
+FROM test_lazy_read_in_order
+ORDER BY a
+LIMIT 5;
+
+-- Test 2: ORDER BY on sorting key with WHERE clause
+-- Columns not used in WHERE or ORDER BY should be lazily materialized
+SELECT '=== Test 2: ORDER BY a with WHERE ===';
+SELECT trimLeft(explain)
+FROM (
+    EXPLAIN PLAN actions=1
+    SELECT a, b, c, d, e
+    FROM test_lazy_read_in_order
+    WHERE e > 100
+    ORDER BY a
+    LIMIT 5
+    SETTINGS max_threads=1
+)
+WHERE explain LIKE '%LazilyRead%'
+   OR explain LIKE '%Lazily read columns:%'
+   OR explain LIKE '%ReadType:%'
+   OR explain LIKE '%Prefix sort description:%'
+   OR explain LIKE '%Result sort description:%';
+
+SELECT a, e
+FROM test_lazy_read_in_order
+WHERE e > 100
+ORDER BY a
+LIMIT 5;
+
+-- Test 3: ORDER BY on sorting key with PREWHERE
+-- Similar to Test 2 but with PREWHERE
+SELECT '=== Test 3: ORDER BY a with PREWHERE ===';
+SELECT trimLeft(explain)
+FROM (
+    EXPLAIN PLAN actions=1
+    SELECT a, b, c, d, e
+    FROM test_lazy_read_in_order
+    PREWHERE e > 100
+    ORDER BY a
+    LIMIT 5
+    SETTINGS max_threads=1
+)
+WHERE explain LIKE '%LazilyRead%'
+   OR explain LIKE '%Lazily read columns:%'
+   OR explain LIKE '%ReadType:%'
+   OR explain LIKE '%Prefix sort description:%'
+   OR explain LIKE '%Result sort description:%';
+
+SELECT a, e
+FROM test_lazy_read_in_order PREWHERE e > 100
+ORDER BY a
+LIMIT 5;
+
+-- Test 4: Verify that columns used in ORDER BY are NOT lazily materialized
+-- Column 'e' is used in ORDER BY, so it should not be in LazilyRead
+SELECT '=== Test 4: ORDER BY a, e (e should not be lazy) ===';
+SELECT trimLeft(explain)
+FROM (
+    EXPLAIN PLAN actions=1
+    SELECT a, b, c, d, e
+    FROM test_lazy_read_in_order
+    ORDER BY a, e
+    LIMIT 5
+    SETTINGS max_threads=1
+)
+WHERE explain LIKE '%LazilyRead%'
+   OR explain LIKE '%Lazily read columns:%'
+   OR explain LIKE '%ReadType:%'
+   OR explain LIKE '%Prefix sort description:%'
+   OR explain LIKE '%Result sort description:%';
+
+SELECT a, e
+FROM test_lazy_read_in_order
+ORDER BY a, e
+LIMIT 5;
+
+-- Test 5: ORDER BY with expression on sorting key
+-- Should still use read-in-order for the prefix
+SELECT '=== Test 5: ORDER BY a, a+1 ===';
+SELECT trimLeft(explain)
+FROM (
+    EXPLAIN PLAN actions=1
+    SELECT a, b, c, d, e
+    FROM test_lazy_read_in_order
+    ORDER BY a, a + 1
+    LIMIT 5
+    SETTINGS max_threads=1
+)
+WHERE explain LIKE '%LazilyRead%'
+   OR explain LIKE '%Lazily read columns:%'
+   OR explain LIKE '%ReadType:%'
+   OR explain LIKE '%Prefix sort description:%'
+   OR explain LIKE '%Result sort description:%';
+
+SELECT a, e
+FROM test_lazy_read_in_order
+ORDER BY a, a + 1
+LIMIT 5;
+
+DROP TABLE IF EXISTS test_lazy_read_in_order;
+
+
+-- Additional correctness tests for lazy materialization with read-in-order
+DROP TABLE IF EXISTS test_correctness;
+
+CREATE TABLE test_correctness
+(
+    id    UInt64,
+    value String,
+    score UInt64,
+    data  String
+) ENGINE = MergeTree()
+      ORDER BY id;
+
+-- Insert data in non-sequential order to test sorting
+INSERT INTO test_correctness
+VALUES (5, 'five', 50, 'data5'),
+       (2, 'two', 20, 'data2'),
+       (8, 'eight', 80, 'data8'),
+       (1, 'one', 10, 'data1'),
+       (9, 'nine', 90, 'data9'),
+       (3, 'three', 30, 'data3'),
+       (7, 'seven', 70, 'data7'),
+       (4, 'four', 40, 'data4'),
+       (6, 'six', 60, 'data6'),
+       (10, 'ten', 100, 'data10');
+
+SELECT '=== Test 6: Verify ORDER BY ASC correctness ===';
+-- With both optimizations enabled
+SELECT id, value, score
+FROM test_correctness
+ORDER BY id ASC
+LIMIT 5;
+
+SELECT '=== Test 7: Verify ORDER BY DESC correctness ===';
+-- DESC should also work
+SELECT id, value, score
+FROM test_correctness
+ORDER BY id DESC
+LIMIT 5;
+
+SELECT '=== Test 8: Verify filtering with ORDER BY ===';
+-- Filter and order
+SELECT id, value, score
+FROM test_correctness
+WHERE score >= 50
+ORDER BY id ASC;
+
+SELECT '=== Test 9: Compare with optimization disabled ===';
+-- Same query with optimizations disabled should give same results
+SELECT id, value, score
+FROM test_correctness
+ORDER BY id ASC
+LIMIT 5
+SETTINGS
+optimize_read_in_order = 0,
+query_plan_optimize_lazy_materialization = 0;
+
+SELECT '=== Test 10: Verify EXPLAIN shows both optimizations ===';
+SELECT trimLeft(explain)
+FROM (
+    EXPLAIN PLAN actions=1
+    SELECT id, value, score, data
+    FROM test_correctness
+    ORDER BY id ASC
+    LIMIT 5
+    SETTINGS max_threads=1
+)
+WHERE explain LIKE '%LazilyRead%'
+   OR explain LIKE '%Lazily read columns:%'
+   OR explain LIKE '%ReadType:%'
+   OR explain LIKE '%Prefix sort description:%'
+   OR explain LIKE '%Result sort description:%';
+
+DROP TABLE IF EXISTS test_correctness;

From b73cc99198804065bf45257d38152ee68a5cb491 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 4 Nov 2025 18:15:04 +0000
Subject: [PATCH 057/112] Backport #89139 to 25.8: Fix potential
 heap-use-after-free in projections analysis after #72102

---
 src/Storages/MergeTree/MergeTreeData.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index f6082afbb577..d1fea772e267 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -8175,7 +8175,8 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
         {
             for (const auto & part : real_parts)
             {
-                const auto & primary_key_column = *part->getIndex()->at(0);
+                auto index = part->getIndex();
+                const auto & primary_key_column = *index->at(0);
                 auto & min_column = assert_cast<ColumnAggregateFunction &>(*partition_minmax_count_columns[pos]);
                 insert(min_column, primary_key_column[0]);
             }
@@ -8186,7 +8187,8 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
         {
             for (const auto & part : real_parts)
             {
-                const auto & primary_key_column = *part->getIndex()->at(0);
+                auto index = part->getIndex();
+                const auto & primary_key_column = *index->at(0);
                 auto & max_column = assert_cast<ColumnAggregateFunction &>(*partition_minmax_count_columns[pos]);
                 insert(max_column, primary_key_column[primary_key_column.size() - 1]);
             }

From 529710a7e272934e0f7277162a1c956084565c08 Mon Sep 17 00:00:00 2001
From: divanik <dan.ivanik@clickhouse.com>
Date: Wed, 5 Nov 2025 12:30:18 +0100
Subject: [PATCH 058/112] Modified tests

---
 .../integration/test_storage_iceberg/test.py  | 70 ++++++++++++++++
 ...est_partition_pruning_with_subquery_set.py | 79 -------------------
 2 files changed, 70 insertions(+), 79 deletions(-)
 delete mode 100644 tests/integration/test_storage_iceberg_with_spark/test_partition_pruning_with_subquery_set.py

diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py
index 4ec8b39dc75d..35796886cdf1 100644
--- a/tests/integration/test_storage_iceberg/test.py
+++ b/tests/integration/test_storage_iceberg/test.py
@@ -3199,3 +3199,73 @@ def execute_spark_query(query: str):
         except:
             print("Dictionary: {}, Allowed Content Types: {}".format(diction, allowed_content_types))
             raise
+
+
+@pytest.mark.parametrize(
+    "storage_type",
+    ["s3", "azure", "local"],
+)
+def test_partition_pruning_with_subquery_set(started_cluster_iceberg_with_spark, storage_type):
+    instance = started_cluster_iceberg_with_spark.instances["node1"]
+    spark = started_cluster_iceberg_with_spark.spark_session
+    TABLE_NAME = "test_partition_pruning_" + storage_type + "_" + get_uuid_str()
+    IN_MEMORY_TABLE = "in_memory_table_" + get_uuid_str()
+
+    def execute_spark_query(query: str):
+        return execute_spark_query_general(
+            spark,
+            started_cluster_iceberg_with_spark,
+            storage_type,
+            TABLE_NAME,
+            query,
+        )
+
+    execute_spark_query(
+        f"""
+            CREATE TABLE {TABLE_NAME} (
+                id INT,
+                data STRING
+            )
+            USING iceberg
+            PARTITIONED BY (identity(id))
+            OPTIONS('format-version'='2')
+        """
+    )
+
+    execute_spark_query(
+        f"""
+        INSERT INTO {TABLE_NAME} VALUES
+        (1, 'a'),
+        (2, 'b'),
+        (3, 'c'),
+        (4, 'd'),
+        (5, 'e');
+    """
+    )
+
+
+    creation_expression = get_creation_expression(
+        storage_type, TABLE_NAME, started_cluster_iceberg_with_spark, table_function=True
+    )
+
+    instance.query(f"CREATE TABLE {IN_MEMORY_TABLE} (id INT) ENGINE = Memory")
+    instance.query(f"INSERT INTO {IN_MEMORY_TABLE} VALUES (2), (4)")
+
+
+    def check_validity_and_get_prunned_files(select_expression):
+        settings1 = {
+            "use_iceberg_partition_pruning": 0
+        }
+        settings2 = {
+            "use_iceberg_partition_pruning": 1
+        }
+        return check_validity_and_get_prunned_files_general(
+            instance, TABLE_NAME, settings1, settings2, 'IcebergPartitionPrunedFiles', select_expression
+        )
+
+    assert (
+        check_validity_and_get_prunned_files(
+            f"SELECT * FROM {creation_expression} WHERE id in (SELECT id FROM {IN_MEMORY_TABLE}) ORDER BY ALL"
+        )
+        == 3
+    )
diff --git a/tests/integration/test_storage_iceberg_with_spark/test_partition_pruning_with_subquery_set.py b/tests/integration/test_storage_iceberg_with_spark/test_partition_pruning_with_subquery_set.py
deleted file mode 100644
index 5567f618b2da..000000000000
--- a/tests/integration/test_storage_iceberg_with_spark/test_partition_pruning_with_subquery_set.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import pytest
-
-from helpers.iceberg_utils import (
-    check_validity_and_get_prunned_files_general,
-    execute_spark_query_general,
-    get_creation_expression,
-    get_uuid_str
-)
-
-@pytest.mark.parametrize(
-    "storage_type",
-    ["s3", "azure", "local"],
-)
-def test_partition_pruning_with_subquery_set(started_cluster_iceberg_with_spark, storage_type):
-    instance = started_cluster_iceberg_with_spark.instances["node1"]
-    spark = started_cluster_iceberg_with_spark.spark_session
-    TABLE_NAME = "test_partition_pruning_" + storage_type + "_" + get_uuid_str()
-    IN_MEMORY_TABLE = "in_memory_table_" + get_uuid_str()
-
-    def execute_spark_query(query: str):
-        return execute_spark_query_general(
-            spark,
-            started_cluster_iceberg_with_spark,
-            storage_type,
-            TABLE_NAME,
-            query,
-        )
-
-    execute_spark_query(
-        f"""
-            CREATE TABLE {TABLE_NAME} (
-                id INT,
-                data STRING
-            )
-            USING iceberg
-            PARTITIONED BY (identity(id))
-            OPTIONS('format-version'='2')
-        """
-    )
-
-    execute_spark_query(
-        f"""
-        INSERT INTO {TABLE_NAME} VALUES
-        (1, 'a'),
-        (2, 'b'),
-        (3, 'c'),
-        (4, 'd'),
-        (5, 'e');
-    """
-    )
-
-
-    creation_expression = get_creation_expression(
-        storage_type, TABLE_NAME, started_cluster_iceberg_with_spark, table_function=True
-    )
-
-    instance.query(f"CREATE TABLE {IN_MEMORY_TABLE} (id INT) ENGINE = Memory")
-    instance.query(f"INSERT INTO {IN_MEMORY_TABLE} VALUES (2), (4)")
-
-
-    def check_validity_and_get_prunned_files(select_expression):
-        settings1 = {
-            "use_iceberg_partition_pruning": 0
-        }
-        settings2 = {
-            "use_iceberg_partition_pruning": 1
-        }
-        return check_validity_and_get_prunned_files_general(
-            instance, TABLE_NAME, settings1, settings2, 'IcebergPartitionPrunedFiles', select_expression
-        )
-
-    assert (
-        check_validity_and_get_prunned_files(
-            f"SELECT * FROM {creation_expression} WHERE id in (SELECT id FROM {IN_MEMORY_TABLE}) ORDER BY ALL"
-        )
-        == 3
-    )
-
-

From 8c6fcbf2cc5ed71809bae4ea283a66924304425b Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 5 Nov 2025 15:14:32 +0000
Subject: [PATCH 059/112] Backport #89496 to 25.8: Fix changelog load in Keeper
 if rename failed

---
 src/Coordination/Changelog.cpp                | 12 +++-
 .../tests/gtest_coordination_changelog.cpp    | 56 ++++++++++++++++++-
 2 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index 6b2393c4e457..5dc8503127ba 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -1881,7 +1881,17 @@ try
             }
 
             ChangelogReader reader(changelog_description_ptr);
-            last_log_read_result = reader.readChangelog(entry_storage, start_to_read_from, log);
+            auto log_read_result = reader.readChangelog(entry_storage, start_to_read_from, log);
+
+            /// We didn't find the first required log in this changelog so we move to the next changelog
+            /// This can happen in case we failed to rename changelog to a name with correct first and last log index
+            if (log_read_result.first_read_index == 0)
+            {
+                LOG_TRACE(log, "Changelog contains only logs before {}", start_to_read_from);
+                continue;
+            }
+
+            last_log_read_result = std::move(log_read_result);
 
             if (last_log_read_result->last_read_index != 0)
                 last_read_index = last_log_read_result->last_read_index;
diff --git a/src/Coordination/tests/gtest_coordination_changelog.cpp b/src/Coordination/tests/gtest_coordination_changelog.cpp
index 79057308ad3a..b0889f103691 100644
--- a/src/Coordination/tests/gtest_coordination_changelog.cpp
+++ b/src/Coordination/tests/gtest_coordination_changelog.cpp
@@ -1149,7 +1149,6 @@ TYPED_TEST(CoordinationChangelogTest, TestRotateIntervalChanges)
 
 TYPED_TEST(CoordinationChangelogTest, ChangelogTestMaxLogSize)
 {
-
     ChangelogDirTest test("./logs");
     this->setLogDirectory("./logs");
 
@@ -1524,4 +1523,59 @@ TYPED_TEST(CoordinationChangelogTest, ChangelogTestBrokenWriteAt)
     }
 }
 
+TYPED_TEST(CoordinationChangelogTest, ChangelogLoadingFromInvalidName)
+{
+    if (this->enable_compression)
+        return;
+
+    ChangelogDirTest test("./logs");
+    this->setLogDirectory("./logs");
+
+    {
+        DB::KeeperLogStore changelog(
+            DB::LogFileSettings{
+                .force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100'000, .max_size = 500},
+            DB::FlushSettings(),
+            this->keeper_context);
+        changelog.init(1, 0);
+
+        EXPECT_TRUE(fs::exists("./logs/changelog_1_100000.bin"));
+        for (size_t i = 0; i < 500; ++i)
+        {
+            auto entry = getLogEntry(std::to_string(i) + "_hello_world", 1);
+            changelog.append(entry);
+        }
+        changelog.end_of_append_batch(0, 0);
+
+        waitDurableLogs(changelog);
+    }
+
+    // Find file starting with "changelog_1_" (renamed because of file size limit)
+    fs::path new_changelog_path;
+    for (const auto & entry : fs::directory_iterator("./logs"))
+    {
+        if (entry.is_regular_file())
+        {
+            const auto filename = entry.path().filename().string();
+            if (filename.starts_with("changelog_1_"))
+                new_changelog_path = entry.path();
+        }
+    }
+
+    ASSERT_NE(new_changelog_path, fs::path{});
+
+    fs::rename(new_changelog_path, "./logs/changelog_1_100000.bin");
+
+    std::cout << new_changelog_path << std::endl;
+
+    DB::KeeperLogStore changelog(
+        DB::LogFileSettings{
+            .force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100'000, .max_size = 500},
+        DB::FlushSettings(),
+        this->keeper_context);
+    changelog.init(15, 0);
+
+    ASSERT_EQ(changelog.next_slot(), 501);
+}
+
 #endif

From 6d30e647af35fa59508cdc72c7a75f04fed0e588 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 5 Nov 2025 15:19:32 +0000
Subject: [PATCH 060/112] Backport #88008 to 25.8: Fix reading mixed array of
 Floats and Bools in JSON

---
 src/Formats/JSONExtractTree.cpp               | 23 ++++++++-----------
 src/Formats/JSONExtractTree.h                 |  5 +---
 src/Functions/FunctionsJSON.cpp               |  4 ++--
 ...641_json_array_of_float_and_bool.reference |  2 ++
 .../03641_json_array_of_float_and_bool.sql    |  4 ++++
 5 files changed, 19 insertions(+), 19 deletions(-)
 create mode 100644 tests/queries/0_stateless/03641_json_array_of_float_and_bool.reference
 create mode 100644 tests/queries/0_stateless/03641_json_array_of_float_and_bool.sql

diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp
index 7c9817b62903..ae60f9ce7e0a 100644
--- a/src/Formats/JSONExtractTree.cpp
+++ b/src/Formats/JSONExtractTree.cpp
@@ -131,7 +131,7 @@ void jsonElementToString(const typename JSONParser::Element & element, WriteBuff
 
 template <typename JSONParser, typename NumberType>
 bool tryGetNumericValueFromJSONElement(
-    NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_integer, bool allow_type_conversion, String & error)
+    NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_number, bool allow_type_conversion, String & error)
 {
     switch (element.type())
     {
@@ -164,13 +164,10 @@ bool tryGetNumericValueFromJSONElement(
             }
             break;
         case ElementType::BOOL:
-            if constexpr (is_integer<NumberType>)
+            if (convert_bool_to_number && allow_type_conversion)
             {
-                if (convert_bool_to_integer && allow_type_conversion)
-                {
-                    value = static_cast<NumberType>(element.getBool());
-                    break;
-                }
+                value = static_cast<NumberType>(element.getBool());
+                break;
             }
             error = fmt::format("cannot convert bool value to {}", TypeName<NumberType>);
             return false;
@@ -262,7 +259,7 @@ class NumericNode : public JSONExtractTreeNode<JSONParser>
         }
 
         NumberType value;
-        if (!tryGetNumericValueFromJSONElement<JSONParser, NumberType>(value, element, insert_settings.convert_bool_to_integer || is_bool_type, insert_settings.allow_type_conversion, error))
+        if (!tryGetNumericValueFromJSONElement<JSONParser, NumberType>(value, element, /*convert_bool_to_number=*/ true, insert_settings.allow_type_conversion, error))
         {
             if (error.empty())
                 error = fmt::format("cannot read {} value from JSON element: {}", TypeName<NumberType>, jsonElementToString<JSONParser>(element, format_settings));
@@ -319,7 +316,7 @@ class LowCardinalityNumericNode : public NumericNode<JSONParser, NumberType>
         }
 
         NumberType value;
-        if (!tryGetNumericValueFromJSONElement<JSONParser, NumberType>(value, element, insert_settings.convert_bool_to_integer || this->is_bool_type, insert_settings.allow_type_conversion, error))
+        if (!tryGetNumericValueFromJSONElement<JSONParser, NumberType>(value, element, /*convert_bool_to_number=*/ true, insert_settings.allow_type_conversion, error))
         {
             if (error.empty())
                 error = fmt::format("cannot read {} value from JSON element: {}", TypeName<NumberType>, jsonElementToString<JSONParser>(element, format_settings));
@@ -2218,13 +2215,13 @@ template std::unique_ptr<JSONExtractTreeNode<SimdJSONParser>> buildJSONExtractTr
 #if USE_RAPIDJSON
 template void jsonElementToString<RapidJSONParser>(const RapidJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings);
 template std::unique_ptr<JSONExtractTreeNode<RapidJSONParser>> buildJSONExtractTree<RapidJSONParser>(const DataTypePtr & type, const char * source_for_exception_message);
-template bool tryGetNumericValueFromJSONElement<RapidJSONParser, Float64>(Float64 & value, const RapidJSONParser::Element & element, bool convert_bool_to_integer, bool allow_type_conversion, String & error);
+template bool tryGetNumericValueFromJSONElement<RapidJSONParser, Float64>(Float64 & value, const RapidJSONParser::Element & element, bool convert_bool_to_number, bool allow_type_conversion, String & error);
 #else
 template void jsonElementToString<DummyJSONParser>(const DummyJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings);
 template std::unique_ptr<JSONExtractTreeNode<DummyJSONParser>> buildJSONExtractTree<DummyJSONParser>(const DataTypePtr & type, const char * source_for_exception_message);
-template bool tryGetNumericValueFromJSONElement<DummyJSONParser, Float64>(Float64 & value, const DummyJSONParser::Element & element, bool convert_bool_to_integer, bool allow_type_conversion, String & error);
-template bool tryGetNumericValueFromJSONElement<DummyJSONParser, Int64>(Int64 & value, const DummyJSONParser::Element & element, bool convert_bool_to_integer, bool allow_type_conversion, String & error);
-template bool tryGetNumericValueFromJSONElement<DummyJSONParser, UInt64>(UInt64 & value, const DummyJSONParser::Element & element, bool convert_bool_to_integer, bool allow_type_conversion, String & error);
+template bool tryGetNumericValueFromJSONElement<DummyJSONParser, Float64>(Float64 & value, const DummyJSONParser::Element & element, bool convert_bool_to_number, bool allow_type_conversion, String & error);
+template bool tryGetNumericValueFromJSONElement<DummyJSONParser, Int64>(Int64 & value, const DummyJSONParser::Element & element, bool convert_bool_to_number, bool allow_type_conversion, String & error);
+template bool tryGetNumericValueFromJSONElement<DummyJSONParser, UInt64>(UInt64 & value, const DummyJSONParser::Element & element, bool convert_bool_to_number, bool allow_type_conversion, String & error);
 #endif
 
 }
diff --git a/src/Formats/JSONExtractTree.h b/src/Formats/JSONExtractTree.h
index 63f38c50d2f0..616f6c8d64ea 100644
--- a/src/Formats/JSONExtractTree.h
+++ b/src/Formats/JSONExtractTree.h
@@ -9,9 +9,6 @@ struct FormatSettings;
 
 struct JSONExtractInsertSettings
 {
-    /// If false, JSON boolean values won't be inserted into columns with integer types
-    /// It's used in JSONExtractInt64/JSONExtractUInt64/... functions.
-    bool convert_bool_to_integer = true;
     /// If true, when complex type like Array/Map has both valid and invalid elements,
     /// the default value will be inserted on invalid elements.
     /// For example, if we have [1, "hello", 2] and type Array(UInt32),
@@ -44,6 +41,6 @@ template <typename JSONParser>
 void jsonElementToString(const typename JSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings);
 
 template <typename JSONParser, typename NumberType>
-bool tryGetNumericValueFromJSONElement(NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_integer, bool allow_type_conversion, String & error);
+bool tryGetNumericValueFromJSONElement(NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_number, bool allow_type_conversion, String & error);
 
 }
diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp
index b65a3179370b..980489629067 100644
--- a/src/Functions/FunctionsJSON.cpp
+++ b/src/Functions/FunctionsJSON.cpp
@@ -772,7 +772,7 @@ class JSONTypeImpl
 };
 
 
-template <typename JSONParser, typename NumberType, bool convert_bool_to_integer = false>
+template <typename JSONParser, typename NumberType>
 class JSONExtractNumericImpl
 {
 public:
@@ -794,7 +794,7 @@ class JSONExtractNumericImpl
     {
         NumberType value;
 
-        if (!tryGetNumericValueFromJSONElement<JSONParser, NumberType>(value, element, convert_bool_to_integer, /*allow_type_conversion=*/true, error))
+        if (!tryGetNumericValueFromJSONElement<JSONParser, NumberType>(value, element, /*convert_bool_to_number=*/false, /*allow_type_conversion=*/true, error))
             return false;
         auto & col_vec = assert_cast<ColumnVector<NumberType> &>(dest);
         col_vec.insertValue(value);
diff --git a/tests/queries/0_stateless/03641_json_array_of_float_and_bool.reference b/tests/queries/0_stateless/03641_json_array_of_float_and_bool.reference
new file mode 100644
index 000000000000..29b67df6b8a9
--- /dev/null
+++ b/tests/queries/0_stateless/03641_json_array_of_float_and_bool.reference
@@ -0,0 +1,2 @@
+{"a":[42.42,0]}	Array(Nullable(Float64))
+{"a":[42.42,false]}	Array(Dynamic)
diff --git a/tests/queries/0_stateless/03641_json_array_of_float_and_bool.sql b/tests/queries/0_stateless/03641_json_array_of_float_and_bool.sql
new file mode 100644
index 000000000000..65966d619dcd
--- /dev/null
+++ b/tests/queries/0_stateless/03641_json_array_of_float_and_bool.sql
@@ -0,0 +1,4 @@
+set enable_analyzer=1;
+
+select '{"a" : [42.42, false]}'::JSON as json, dynamicType(json.a) settings input_format_json_read_bools_as_numbers=1;
+select '{"a" : [42.42, false]}'::JSON as json, dynamicType(json.a) settings input_format_json_read_bools_as_numbers=0;

From e46dda8770568ad9f126e41438039cab3a0bd6b3 Mon Sep 17 00:00:00 2001
From: divanik <dan.ivanik@clickhouse.com>
Date: Wed, 5 Nov 2025 17:44:28 +0100
Subject: [PATCH 061/112] Fix test

---
 tests/integration/test_storage_iceberg/test.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py
index 35796886cdf1..11ddcdfe4649 100644
--- a/tests/integration/test_storage_iceberg/test.py
+++ b/tests/integration/test_storage_iceberg/test.py
@@ -3205,16 +3205,16 @@ def execute_spark_query(query: str):
     "storage_type",
     ["s3", "azure", "local"],
 )
-def test_partition_pruning_with_subquery_set(started_cluster_iceberg_with_spark, storage_type):
-    instance = started_cluster_iceberg_with_spark.instances["node1"]
-    spark = started_cluster_iceberg_with_spark.spark_session
+def test_partition_pruning_with_subquery_set(started_cluster, storage_type):
+    instance = started_cluster.instances["node1"]
+    spark = started_cluster.spark_session
     TABLE_NAME = "test_partition_pruning_" + storage_type + "_" + get_uuid_str()
     IN_MEMORY_TABLE = "in_memory_table_" + get_uuid_str()
 
     def execute_spark_query(query: str):
         return execute_spark_query_general(
             spark,
-            started_cluster_iceberg_with_spark,
+            started_cluster,
             storage_type,
             TABLE_NAME,
             query,
@@ -3245,7 +3245,7 @@ def execute_spark_query(query: str):
 
 
     creation_expression = get_creation_expression(
-        storage_type, TABLE_NAME, started_cluster_iceberg_with_spark, table_function=True
+        storage_type, TABLE_NAME, started_cluster, table_function=True
     )
 
     instance.query(f"CREATE TABLE {IN_MEMORY_TABLE} (id INT) ENGINE = Memory")

From a05f6189f7e37b6beb0bf9e42f99a5556c23d915 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 5 Nov 2025 17:12:55 +0000
Subject: [PATCH 062/112] Backport #89527 to 25.8: Fix possible "Context has
 expired" with analyzer and PK IN (subquery) (v2)

---
 src/Planner/Planner.cpp                       | 10 ++-
 src/Processors/QueryPlan/QueryPlan.h          |  1 +
 ...1_pk_in_subquery_context_expired.reference |  5 ++
 .../03671_pk_in_subquery_context_expired.sql  | 82 +++++++++++++++++++
 4 files changed, 96 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/03671_pk_in_subquery_context_expired.reference
 create mode 100644 tests/queries/0_stateless/03671_pk_in_subquery_context_expired.sql

diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 3e2d7ad83db3..2bdb558646ee 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -1256,8 +1256,14 @@ void addBuildSubqueriesForSetsStepIfNeeded(
             std::make_shared<GlobalPlannerContext>(nullptr, nullptr, FiltersForTableExpressionMap{}));
         subquery_planner.buildQueryPlanIfNeeded();
 
-        query_plan.addInterpreterContext(subquery_planner.getPlannerContext()->getQueryContext());
-        subquery->setQueryPlan(std::make_unique<QueryPlan>(std::move(subquery_planner).extractQueryPlan()));
+        auto subquery_plan = std::move(subquery_planner).extractQueryPlan();
+        /// Contexts should be copied into the root query plan, because some functions may
+        /// be created using them while this subquery plan will be destroyed after
+        /// FutureSetFromSubquery::buildSetInplace(). Otherwise, function execution may fail
+        /// with a "Context has expired" exception.
+        for (const auto & context : subquery_plan.getInterpretersContexts())
+            query_plan.addInterpreterContext(context);
+        subquery->setQueryPlan(std::make_unique<QueryPlan>(std::move(subquery_plan)));
     }
 
     if (!subqueries.empty())
diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h
index 23d3b7dba3b0..2eb384b1cf6c 100644
--- a/src/Processors/QueryPlan/QueryPlan.h
+++ b/src/Processors/QueryPlan/QueryPlan.h
@@ -109,6 +109,7 @@ class QueryPlan
     /// Do not allow to change the table while the pipeline alive.
     void addTableLock(TableLockHolder lock) { resources.table_locks.emplace_back(std::move(lock)); }
     void addInterpreterContext(std::shared_ptr<const Context> context) { resources.interpreter_context.emplace_back(std::move(context)); }
+    auto getInterpretersContexts() const { return resources.interpreter_context; }
     void addStorageHolder(StoragePtr storage) { resources.storage_holders.emplace_back(std::move(storage)); }
 
     void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); }
diff --git a/tests/queries/0_stateless/03671_pk_in_subquery_context_expired.reference b/tests/queries/0_stateless/03671_pk_in_subquery_context_expired.reference
new file mode 100644
index 000000000000..efb476f924b3
--- /dev/null
+++ b/tests/queries/0_stateless/03671_pk_in_subquery_context_expired.reference
@@ -0,0 +1,5 @@
+1
+1
+Testing w/o relying on enable_global_with_statement...
+1
+1
diff --git a/tests/queries/0_stateless/03671_pk_in_subquery_context_expired.sql b/tests/queries/0_stateless/03671_pk_in_subquery_context_expired.sql
new file mode 100644
index 000000000000..d71680ae9576
--- /dev/null
+++ b/tests/queries/0_stateless/03671_pk_in_subquery_context_expired.sql
@@ -0,0 +1,82 @@
+-- Issue: https://github.com/ClickHouse/ClickHouse/issues/89433
+
+DROP TABLE IF EXISTS tbl;
+DROP TABLE IF EXISTS join_engine;
+
+CREATE TABLE tbl
+(
+    `id1` LowCardinality(String),
+    `id2` LowCardinality(String),
+    `v` Int64
+)
+ENGINE = MergeTree
+ORDER BY (id1, id2, v);
+INSERT INTO tbl VALUES ('a', 'b', 1);
+CREATE TABLE join_engine
+(
+    `id1` LowCardinality(String),
+    `id2` LowCardinality(String),
+    `v` Int64
+)
+ENGINE = Join(ANY, LEFT, id1, id2);
+INSERT INTO join_engine VALUES ('a', 'b', 1);
+
+WITH cte AS
+    (
+        SELECT id2
+        FROM tbl
+        WHERE joinGet(currentDatabase() || '.join_engine', 'v', id1, id2) = tbl.v
+    )
+SELECT uniq(id2) AS count
+FROM
+(
+    -- NOTE: the bug is reproduced only because due to
+    -- enable_global_with_statement adds "cte" here, but likely it will be
+    -- fixed one day... so I've added another test below that does not rely
+    -- on this fact
+    SELECT *
+    FROM tbl AS e
+    WHERE joinGet(currentDatabase() || '.join_engine', 'v', id1, id2) = e.v
+)
+WHERE id2 IN (
+    SELECT id2
+    FROM cte
+)
+UNION ALL
+SELECT uniq(id2) AS count
+FROM cte;
+
+SELECT 'Testing w/o relying on enable_global_with_statement...';
+--
+-- The same as before, but without relying on enable_global_with_statement
+--
+SELECT uniq(id2) AS count
+FROM
+(
+    WITH cte AS
+        (
+            SELECT id2
+            FROM tbl
+            WHERE joinGet(currentDatabase() || '.join_engine', 'v', id1, id2) = tbl.v
+        )
+    SELECT *
+    FROM tbl AS e
+    WHERE joinGet(currentDatabase() || '.join_engine', 'v', id1, id2) = e.v
+)
+WHERE id2 IN (
+    SELECT id2
+    FROM
+    (
+        SELECT id2
+        FROM tbl
+        WHERE joinGet(currentDatabase() || '.join_engine', 'v', id1, id2) = tbl.v
+    )
+)
+UNION ALL
+SELECT uniq(id2) AS count
+FROM
+(
+    SELECT id2
+    FROM tbl
+    WHERE joinGet(currentDatabase() || '.join_engine', 'v', id1, id2) = tbl.v
+);

From 1c35c0f0cc67fed96e4dd1fcc68f7f12ad032bc5 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 6 Nov 2025 11:12:51 +0000
Subject: [PATCH 063/112] Backport #89512 to 25.8: Fix incorrect join result in
 case of JOIN on condition with OR

---
 src/Interpreters/HashJoin/HashJoin.cpp                        | 2 +-
 .../0_stateless/03708_join_or_to_right_any_bug.reference      | 2 ++
 tests/queries/0_stateless/03708_join_or_to_right_any_bug.sql  | 4 ++++
 3 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/03708_join_or_to_right_any_bug.reference
 create mode 100644 tests/queries/0_stateless/03708_join_or_to_right_any_bug.sql

diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp
index 191dca32e0b5..ee188db0e499 100644
--- a/src/Interpreters/HashJoin/HashJoin.cpp
+++ b/src/Interpreters/HashJoin/HashJoin.cpp
@@ -1811,7 +1811,7 @@ void HashJoin::onBuildPhaseFinish()
         }
     }
 
-    if (all_values_unique && strictness == JoinStrictness::All && isInnerOrLeft(kind))
+    if (all_values_unique && strictness == JoinStrictness::All && isInnerOrLeft(kind) && data->maps.size() == 1)
     {
         strictness = JoinStrictness::RightAny;
         all_join_was_promoted_to_right_any = true;
diff --git a/tests/queries/0_stateless/03708_join_or_to_right_any_bug.reference b/tests/queries/0_stateless/03708_join_or_to_right_any_bug.reference
new file mode 100644
index 000000000000..d955b730a536
--- /dev/null
+++ b/tests/queries/0_stateless/03708_join_or_to_right_any_bug.reference
@@ -0,0 +1,2 @@
+0	1	0
+0	1	1
diff --git a/tests/queries/0_stateless/03708_join_or_to_right_any_bug.sql b/tests/queries/0_stateless/03708_join_or_to_right_any_bug.sql
new file mode 100644
index 000000000000..6c0a07ea51cc
--- /dev/null
+++ b/tests/queries/0_stateless/03708_join_or_to_right_any_bug.sql
@@ -0,0 +1,4 @@
+SELECT *
+FROM ( SELECT number AS a, number + 1 AS b FROM numbers(1) ) AS l
+INNER JOIN ( SELECT number AS a FROM numbers(2) ) AS r ON (l.a = r.a) OR (l.b = r.a)
+ORDER BY ALL;

From 8ed5fd497ac90e58d461fa175312fc4e1a901bd6 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Thu, 6 Nov 2025 14:46:19 +0100
Subject: [PATCH 064/112] CI: Add stateless tests to release branch

---
 .github/workflows/release_branches.yml | 116 ++++++++++++++++++++++++-
 ci/workflows/release_branches.py       |   1 +
 2 files changed, 116 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index fc29cc770e7b..9cefc82eddda 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -661,6 +661,120 @@ jobs:
             python3 -m praktika run 'Install packages (arm_release)' --workflow "ReleaseBranchCI" --ci |& tee ./ci/tmp/job.log
           fi
 
+  stateless_tests_amd_asan_distributed_plan_parallel_1_2:
+    runs-on: [self-hosted, amd-medium-cpu]
+    needs: [config_workflow, dockers_build_amd, dockers_build_arm, build_amd_asan]
+    if: ${{ !failure() && !cancelled() && !contains(fromJson(needs.config_workflow.outputs.data).cache_success_base64, 'U3RhdGVsZXNzIHRlc3RzIChhbWRfYXNhbiwgZGlzdHJpYnV0ZWQgcGxhbiwgcGFyYWxsZWwsIDEvMik=') }}
+    name: "Stateless tests (amd_asan, distributed plan, parallel, 1/2)"
+    outputs:
+      data: ${{ steps.run.outputs.DATA }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ env.CHECKOUT_REF }}
+
+      - name: Prepare env script
+        run: |
+          rm -rf ./ci/tmp ./ci/tmp ./ci/tmp
+          mkdir -p ./ci/tmp ./ci/tmp ./ci/tmp
+          cat > ./ci/tmp/praktika_setup_env.sh << 'ENV_SETUP_SCRIPT_EOF'
+          export PYTHONPATH=./ci:.:
+          cat > ./ci/tmp/workflow_config_releasebranchci.json << 'EOF'
+          ${{ needs.config_workflow.outputs.data }}
+          EOF
+          cat > ./ci/tmp/workflow_status.json << 'EOF'
+          ${{ toJson(needs) }}
+          EOF
+          ENV_SETUP_SCRIPT_EOF
+
+      - name: Run
+        id: run
+        run: |
+          . ./ci/tmp/praktika_setup_env.sh
+          set -o pipefail
+          if command -v ts &> /dev/null; then
+            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, parallel, 1/2)' --workflow "ReleaseBranchCI" --ci |& ts '[%Y-%m-%d %H:%M:%S]' | tee ./ci/tmp/job.log
+          else
+            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, parallel, 1/2)' --workflow "ReleaseBranchCI" --ci |& tee ./ci/tmp/job.log
+          fi
+
+  stateless_tests_amd_asan_distributed_plan_parallel_2_2:
+    runs-on: [self-hosted, amd-medium-cpu]
+    needs: [config_workflow, dockers_build_amd, dockers_build_arm, build_amd_asan]
+    if: ${{ !failure() && !cancelled() && !contains(fromJson(needs.config_workflow.outputs.data).cache_success_base64, 'U3RhdGVsZXNzIHRlc3RzIChhbWRfYXNhbiwgZGlzdHJpYnV0ZWQgcGxhbiwgcGFyYWxsZWwsIDIvMik=') }}
+    name: "Stateless tests (amd_asan, distributed plan, parallel, 2/2)"
+    outputs:
+      data: ${{ steps.run.outputs.DATA }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ env.CHECKOUT_REF }}
+
+      - name: Prepare env script
+        run: |
+          rm -rf ./ci/tmp ./ci/tmp ./ci/tmp
+          mkdir -p ./ci/tmp ./ci/tmp ./ci/tmp
+          cat > ./ci/tmp/praktika_setup_env.sh << 'ENV_SETUP_SCRIPT_EOF'
+          export PYTHONPATH=./ci:.:
+          cat > ./ci/tmp/workflow_config_releasebranchci.json << 'EOF'
+          ${{ needs.config_workflow.outputs.data }}
+          EOF
+          cat > ./ci/tmp/workflow_status.json << 'EOF'
+          ${{ toJson(needs) }}
+          EOF
+          ENV_SETUP_SCRIPT_EOF
+
+      - name: Run
+        id: run
+        run: |
+          . ./ci/tmp/praktika_setup_env.sh
+          set -o pipefail
+          if command -v ts &> /dev/null; then
+            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, parallel, 2/2)' --workflow "ReleaseBranchCI" --ci |& ts '[%Y-%m-%d %H:%M:%S]' | tee ./ci/tmp/job.log
+          else
+            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, parallel, 2/2)' --workflow "ReleaseBranchCI" --ci |& tee ./ci/tmp/job.log
+          fi
+
+  stateless_tests_amd_asan_distributed_plan_sequential:
+    runs-on: [self-hosted, amd-small-mem]
+    needs: [config_workflow, dockers_build_amd, dockers_build_arm, build_amd_asan]
+    if: ${{ !failure() && !cancelled() && !contains(fromJson(needs.config_workflow.outputs.data).cache_success_base64, 'U3RhdGVsZXNzIHRlc3RzIChhbWRfYXNhbiwgZGlzdHJpYnV0ZWQgcGxhbiwgc2VxdWVudGlhbCk=') }}
+    name: "Stateless tests (amd_asan, distributed plan, sequential)"
+    outputs:
+      data: ${{ steps.run.outputs.DATA }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ env.CHECKOUT_REF }}
+
+      - name: Prepare env script
+        run: |
+          rm -rf ./ci/tmp ./ci/tmp ./ci/tmp
+          mkdir -p ./ci/tmp ./ci/tmp ./ci/tmp
+          cat > ./ci/tmp/praktika_setup_env.sh << 'ENV_SETUP_SCRIPT_EOF'
+          export PYTHONPATH=./ci:.:
+          cat > ./ci/tmp/workflow_config_releasebranchci.json << 'EOF'
+          ${{ needs.config_workflow.outputs.data }}
+          EOF
+          cat > ./ci/tmp/workflow_status.json << 'EOF'
+          ${{ toJson(needs) }}
+          EOF
+          ENV_SETUP_SCRIPT_EOF
+
+      - name: Run
+        id: run
+        run: |
+          . ./ci/tmp/praktika_setup_env.sh
+          set -o pipefail
+          if command -v ts &> /dev/null; then
+            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, sequential)' --workflow "ReleaseBranchCI" --ci |& ts '[%Y-%m-%d %H:%M:%S]' | tee ./ci/tmp/job.log
+          else
+            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, sequential)' --workflow "ReleaseBranchCI" --ci |& tee ./ci/tmp/job.log
+          fi
+
   integration_tests_amd_asan_1_4:
     runs-on: [self-hosted, amd-medium]
     needs: [config_workflow, dockers_build_amd, dockers_build_arm, build_amd_asan]
@@ -1461,7 +1575,7 @@ jobs:
 
   finish_workflow:
     runs-on: [self-hosted, style-checker-aarch64]
-    needs: [config_workflow, dockers_build_amd, dockers_build_arm, build_amd_debug, build_amd_release, build_amd_asan, build_amd_tsan, build_amd_msan, build_amd_ubsan, build_arm_release, build_arm_asan, build_amd_darwin, build_arm_darwin, docker_server_image, docker_keeper_image, install_packages_amd_release, install_packages_arm_release, integration_tests_amd_asan_1_4, integration_tests_amd_asan_2_4, integration_tests_amd_asan_3_4, integration_tests_amd_asan_4_4, integration_tests_amd_asan_old_analyzer_1_6, integration_tests_amd_asan_old_analyzer_2_6, integration_tests_amd_asan_old_analyzer_3_6, integration_tests_amd_asan_old_analyzer_4_6, integration_tests_amd_asan_old_analyzer_5_6, integration_tests_amd_asan_old_analyzer_6_6, integration_tests_amd_tsan_1_6, integration_tests_amd_tsan_2_6, integration_tests_amd_tsan_3_6, integration_tests_amd_tsan_4_6, integration_tests_amd_tsan_5_6, integration_tests_amd_tsan_6_6, stress_test_amd_debug, stress_test_amd_tsan, stress_test_arm_asan, stress_test_amd_ubsan, stress_test_amd_msan]
+    needs: [config_workflow, dockers_build_amd, dockers_build_arm, build_amd_debug, build_amd_release, build_amd_asan, build_amd_tsan, build_amd_msan, build_amd_ubsan, build_arm_release, build_arm_asan, build_amd_darwin, build_arm_darwin, docker_server_image, docker_keeper_image, install_packages_amd_release, install_packages_arm_release, stateless_tests_amd_asan_distributed_plan_parallel_1_2, stateless_tests_amd_asan_distributed_plan_parallel_2_2, stateless_tests_amd_asan_distributed_plan_sequential, integration_tests_amd_asan_1_4, integration_tests_amd_asan_2_4, integration_tests_amd_asan_3_4, integration_tests_amd_asan_4_4, integration_tests_amd_asan_old_analyzer_1_6, integration_tests_amd_asan_old_analyzer_2_6, integration_tests_amd_asan_old_analyzer_3_6, integration_tests_amd_asan_old_analyzer_4_6, integration_tests_amd_asan_old_analyzer_5_6, integration_tests_amd_asan_old_analyzer_6_6, integration_tests_amd_tsan_1_6, integration_tests_amd_tsan_2_6, integration_tests_amd_tsan_3_6, integration_tests_amd_tsan_4_6, integration_tests_amd_tsan_5_6, integration_tests_amd_tsan_6_6, stress_test_amd_debug, stress_test_amd_tsan, stress_test_arm_asan, stress_test_amd_ubsan, stress_test_amd_msan]
     if: ${{ !cancelled() }}
     name: "Finish Workflow"
     outputs:
diff --git a/ci/workflows/release_branches.py b/ci/workflows/release_branches.py
index 199909e32598..0602ace06395 100644
--- a/ci/workflows/release_branches.py
+++ b/ci/workflows/release_branches.py
@@ -24,6 +24,7 @@
         JobConfigs.docker_sever,
         JobConfigs.docker_keeper,
         *JobConfigs.install_check_master_jobs,
+        *[job for job in JobConfigs.functional_tests_jobs if "asan" in job.name],
         *[
             job
             for job in JobConfigs.integration_test_asan_master_jobs

From e74c4e89ed8e69d432d86eb8da00ff83efa8014b Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 7 Nov 2025 00:33:10 +0000
Subject: [PATCH 065/112] Backport #89550 to 25.8: Fix generic aggregation
 state for LowCardinality(String)

---
 src/Columns/ColumnUnique.h                    | 24 +++++++++++++++++++
 ...ty_aggregate_state_compatibility.reference |  4 ++++
 ...dinality_aggregate_state_compatibility.sql |  5 ++++
 3 files changed, 33 insertions(+)
 create mode 100644 tests/queries/0_stateless/03708_low_cardinality_aggregate_state_compatibility.reference
 create mode 100644 tests/queries/0_stateless/03708_low_cardinality_aggregate_state_compatibility.sql

diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h
index 22c259dc8e1b..3cefa4eec67c 100644
--- a/src/Columns/ColumnUnique.h
+++ b/src/Columns/ColumnUnique.h
@@ -92,6 +92,7 @@ class ColumnUnique final : public COWHelper<IColumnUnique, ColumnUnique<ColumnTy
     StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
     char * serializeValueIntoMemory(size_t n, char * memory) const override;
     const char * skipSerializedInArena(const char * pos) const override;
+    StringRef serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
     void updateHashWithValue(size_t n, SipHash & hash_func) const override;
 
 #if !defined(DEBUG_OR_SANITIZER_BUILD)
@@ -495,6 +496,29 @@ char * ColumnUnique<ColumnType>::serializeValueIntoMemory(size_t n, char * memor
     return column_holder->serializeValueIntoMemory(n, memory);
 }
 
+template <typename ColumnType>
+StringRef ColumnUnique<ColumnType>::serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+{
+    if (is_nullable)
+    {
+        static constexpr auto s = sizeof(UInt8);
+
+        auto * pos = arena.allocContinue(s, begin);
+        UInt8 flag = (n == getNullValueIndex() ? 1 : 0);
+        unalignedStore<UInt8>(pos, flag);
+
+        if (n == getNullValueIndex())
+            return StringRef(pos, s);
+
+        auto nested_ref = column_holder->serializeAggregationStateValueIntoArena(n, arena, begin);
+
+        /// serializeAggregationStateValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back.
+        return StringRef(nested_ref.data - s, nested_ref.size + s);
+    }
+
+    return column_holder->serializeAggregationStateValueIntoArena(n, arena, begin);
+}
+
 template <typename ColumnType>
 size_t ColumnUnique<ColumnType>::uniqueDeserializeAndInsertFromArena(const char * pos, const char *& new_pos)
 {
diff --git a/tests/queries/0_stateless/03708_low_cardinality_aggregate_state_compatibility.reference b/tests/queries/0_stateless/03708_low_cardinality_aggregate_state_compatibility.reference
new file mode 100644
index 000000000000..3308c390fac5
--- /dev/null
+++ b/tests/queries/0_stateless/03708_low_cardinality_aggregate_state_compatibility.reference
@@ -0,0 +1,4 @@
+010D00040000000000000073747200
+010A00010000000000000000
+('str')
+('')
diff --git a/tests/queries/0_stateless/03708_low_cardinality_aggregate_state_compatibility.sql b/tests/queries/0_stateless/03708_low_cardinality_aggregate_state_compatibility.sql
new file mode 100644
index 000000000000..329482e7341c
--- /dev/null
+++ b/tests/queries/0_stateless/03708_low_cardinality_aggregate_state_compatibility.sql
@@ -0,0 +1,5 @@
+select hex(maxDistinctState(tuple('str'::Variant(LowCardinality(String)))));
+select hex(maxDistinctState(tuple(''::Variant(LowCardinality(String)))));
+select maxDistinctMerge(state) from (select maxDistinctState(tuple('str'::Variant(LowCardinality(String)))) as state);
+select maxDistinctMerge(state) from (select maxDistinctState(tuple(''::Variant(LowCardinality(String)))) as state);
+

From 127d163c1ef99de56c93d6533d2f85278672c186 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 7 Nov 2025 10:14:09 +0000
Subject: [PATCH 066/112] Backport #86439 to 25.8: Use IntMax as size limit for
 ttl drop

---
 src/Storages/MergeTree/Compaction/MergeSelectorApplier.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/Compaction/MergeSelectorApplier.cpp b/src/Storages/MergeTree/Compaction/MergeSelectorApplier.cpp
index 49bf49795af9..b9685195c77e 100644
--- a/src/Storages/MergeTree/Compaction/MergeSelectorApplier.cpp
+++ b/src/Storages/MergeTree/Compaction/MergeSelectorApplier.cpp
@@ -7,6 +7,8 @@
 
 #include <Common/logger_useful.h>
 
+#include <limits>
+
 namespace DB
 {
 
@@ -67,10 +69,10 @@ MergeSelectorChoices tryChooseTTLMerge(const ChooseContext & ctx)
     /// Delete parts - 1 priority
     if (!ctx.max_merge_sizes.empty())
     {
-        std::vector<size_t> max_sizes(ctx.max_merge_sizes.size(), ctx.merge_tree_settings[MergeTreeSetting::max_bytes_to_merge_at_max_space_in_pool]);
+        /// The size of the completely expired part of TTL drop is not affected by the merge pressure and the size of the storage space
+        std::vector<size_t> max_sizes(ctx.max_merge_sizes.size(), std::numeric_limits<size_t>::max());
         TTLPartDeleteMergeSelector drop_ttl_selector(ctx.next_delete_times, ctx.current_time);
 
-        /// The size of the completely expired part of TTL drop is not affected by the merge pressure and the size of the storage space
         if (auto merge_ranges = drop_ttl_selector.select(ctx.ranges, max_sizes, ctx.range_filter); !merge_ranges.empty())
             return pack(ctx, std::move(merge_ranges), MergeType::TTLDelete);
     }

From d0b2a030bbb3b6a0b99c0f53cc4d61e6068bd1ba Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 10 Nov 2025 03:36:12 +0000
Subject: [PATCH 067/112] Backport #89692 to 25.8: Speed up
 test_refreshable_mv_skip_old_temp_table_ddls

---
 src/Common/FailPoint.cpp                      |  2 +-
 src/Storages/MaterializedView/RefreshTask.cpp | 11 ++++----
 .../test.py                                   | 26 ++++++++++---------
 3 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp
index defa32a78246..0adbd8102bf9 100644
--- a/src/Common/FailPoint.cpp
+++ b/src/Common/FailPoint.cpp
@@ -117,7 +117,7 @@ static struct InitFiu
     REGULAR(sleep_in_logs_flush) \
     ONCE(smt_commit_exception_before_op) \
     ONCE(backup_add_empty_memory_table) \
-    REGULAR(refresh_task_delay_update_coordination_state_running)
+    REGULAR(refresh_task_stop_racing_for_running_refresh)
 
 
 namespace FailPoints
diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp
index f993395509ab..15882eae0f88 100644
--- a/src/Storages/MaterializedView/RefreshTask.cpp
+++ b/src/Storages/MaterializedView/RefreshTask.cpp
@@ -78,7 +78,7 @@ namespace ErrorCodes
 
 namespace FailPoints
 {
-extern const char refresh_task_delay_update_coordination_state_running[];
+extern const char refresh_task_stop_racing_for_running_refresh[];
 }
 
 RefreshTask::RefreshTask(
@@ -1000,10 +1000,11 @@ bool RefreshTask::updateCoordinationState(CoordinationZnode root, bool running,
         ops.emplace_back(zkutil::makeSetRequest(coordination.path, root.toString(), root.version));
         if (running)
         {
-            fiu_do_on(FailPoints::refresh_task_delay_update_coordination_state_running, {
-                std::chrono::milliseconds sleep_time{3000 + thread_local_rng() % 2000};
-                std::this_thread::sleep_for(sleep_time);
-            });
+            bool stop_racing_for_running_refresh = false;
+            fiu_do_on(FailPoints::refresh_task_stop_racing_for_running_refresh, { stop_racing_for_running_refresh = true; });
+            if (stop_racing_for_running_refresh)
+                return false;
+
             ops.emplace_back(
                 zkutil::makeCreateRequest(coordination.path + "/running", coordination.replica_name, zkutil::CreateMode::Ephemeral));
         }
diff --git a/tests/integration/test_refreshable_mv_skip_old_temp_table_ddls/test.py b/tests/integration/test_refreshable_mv_skip_old_temp_table_ddls/test.py
index 8bef1de9c826..f0ac9ae8bf7f 100644
--- a/tests/integration/test_refreshable_mv_skip_old_temp_table_ddls/test.py
+++ b/tests/integration/test_refreshable_mv_skip_old_temp_table_ddls/test.py
@@ -80,9 +80,7 @@ def test_refreshable_mv_skip_old_temp_tables_ddls(
     node1.query(f"DROP DATABASE IF EXISTS {db_name} SYNC")
     node2.query(f"DROP DATABASE IF EXISTS {db_name}  SYNC")
     # Make sure that the MV is refreshed on node1
-    node2.query(
-        "SYSTEM ENABLE FAILPOINT refresh_task_delay_update_coordination_state_running"
-    )
+    node2.query("SYSTEM ENABLE FAILPOINT refresh_task_stop_racing_for_running_refresh")
     node2.query("SYSTEM ENABLE FAILPOINT database_replicated_delay_entry_execution")
 
     node1.query(
@@ -98,11 +96,11 @@ def test_refreshable_mv_skip_old_temp_tables_ddls(
             f"CREATE TABLE {db_name}.target (x DateTime) ENGINE ReplicatedMergeTree ORDER BY x"
         )
         node1.query(
-            f"CREATE MATERIALIZED VIEW {db_name}.mv REFRESH EVERY 1 SECOND {append_clause} TO {db_name}.target AS SELECT now() AS x"
+            f"CREATE MATERIALIZED VIEW {db_name}.mv REFRESH EVERY 1 HOUR {append_clause} TO {db_name}.target AS SELECT now() AS x"
         )
     else:
         node1.query(
-            f"CREATE MATERIALIZED VIEW {db_name}.mv REFRESH EVERY 1 SECOND {append_clause} (x DateTime) ENGINE ReplicatedMergeTree ORDER BY x AS SELECT now() AS x"
+            f"CREATE MATERIALIZED VIEW {db_name}.mv REFRESH EVERY 1 HOUR {append_clause} (x DateTime) ENGINE ReplicatedMergeTree ORDER BY x AS SELECT now() AS x"
         )
 
     node2.query(
@@ -122,10 +120,16 @@ def test_refreshable_mv_skip_old_temp_tables_ddls(
 
     last_log_ts = get_last_ddl_worker_log_ts(node2, db_name)
 
-    # Wait for node1 to refresh the view several times
-    time.sleep(5)
-
-    node1.query(f"ALTER TABLE {db_name}.mv MODIFY REFRESH EVERY 1 HOUR {append_clause}")
+    last_refresh_time = node1.query(
+        "SELECT last_refresh_time FROM system.view_refreshes WHERE view='mv'"
+    )
+    for i in range(2):
+        node1.query(f"SYSTEM REFRESH VIEW {db_name}.mv")
+    # Ensure that the mv is refresh
+    node1.query_with_retry(
+        "SELECT last_refresh_time FROM system.view_refreshes WHERE view='mv'",
+        check_callback=lambda x: x != last_refresh_time,
+    )
 
     # Make sure that the view is not refreshing, and it is scheduled to be refreshed in at least 10 minutes
     node1.query_with_retry(
@@ -135,9 +139,7 @@ def test_refreshable_mv_skip_old_temp_tables_ddls(
         > datetime.timedelta(minutes=10),
     )
 
-    node2.query(
-        "SYSTEM DISABLE FAILPOINT refresh_task_delay_update_coordination_state_running"
-    )
+    node2.query("SYSTEM DISABLE FAILPOINT refresh_task_stop_racing_for_running_refresh")
     node2.query("SYSTEM DISABLE FAILPOINT database_replicated_delay_entry_execution")
 
     table_info1 = node1.query(f"SELECT uuid, name FROM system.tables WHERE database='{db_name}'")

From d6cf2af4a01e67e002ab8fed2d6f67532b1dd333 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 11 Nov 2025 14:13:31 +0000
Subject: [PATCH 068/112] Backport #89819 to 25.8: Fix reading paths and paths
 subcolumns from advanced shared data serialization

---
 .../Serializations/SerializationObjectSharedData.cpp |  3 +++
 .../03712_json_advanced_shared_data_bug.reference    |  3 +++
 .../03712_json_advanced_shared_data_bug.sql          | 12 ++++++++++++
 3 files changed, 18 insertions(+)
 create mode 100644 tests/queries/0_stateless/03712_json_advanced_shared_data_bug.reference
 create mode 100644 tests/queries/0_stateless/03712_json_advanced_shared_data_bug.sql

diff --git a/src/DataTypes/Serializations/SerializationObjectSharedData.cpp b/src/DataTypes/Serializations/SerializationObjectSharedData.cpp
index af41d02c95f8..c43d7ab76b0a 100644
--- a/src/DataTypes/Serializations/SerializationObjectSharedData.cpp
+++ b/src/DataTypes/Serializations/SerializationObjectSharedData.cpp
@@ -962,6 +962,9 @@ std::shared_ptr<SerializationObjectSharedData::PathsDataGranules> SerializationO
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Info for path {} is not deserialized", requested_path);
 
             const auto & path_info = path_info_it->second;
+            /// Reset callbacks that might be different for different paths.
+            deserialization_settings.seek_stream_to_current_mark_callback = {};
+            deserialization_settings.getter = {};
 
             /// If we have only subcolumns requested for this path, read all subcolumns.
             auto paths_subcolumns_it = structure_state.requested_paths_subcolumns.find(requested_path);
diff --git a/tests/queries/0_stateless/03712_json_advanced_shared_data_bug.reference b/tests/queries/0_stateless/03712_json_advanced_shared_data_bug.reference
new file mode 100644
index 000000000000..207c0c245e0f
--- /dev/null
+++ b/tests/queries/0_stateless/03712_json_advanced_shared_data_bug.reference
@@ -0,0 +1,3 @@
+['{"arr1":9}']	{}
+['{"arr1":9}']	{}
+[]	{"b":[{"c":42}]}
diff --git a/tests/queries/0_stateless/03712_json_advanced_shared_data_bug.sql b/tests/queries/0_stateless/03712_json_advanced_shared_data_bug.sql
new file mode 100644
index 000000000000..66d53787671b
--- /dev/null
+++ b/tests/queries/0_stateless/03712_json_advanced_shared_data_bug.sql
@@ -0,0 +1,12 @@
+DROP TABLE IF EXISTS t0;
+CREATE TABLE t0 (c0 JSON(max_dynamic_paths = 0)) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 1, object_serialization_version = 'v3', object_shared_data_serialization_version_for_zero_level_parts = 'advanced', object_shared_data_buckets_for_wide_part = 1, index_granularity=2;
+
+INSERT INTO t0 SELECT multiIf(
+  number < 2,
+  '{"arr" : [{"arr1" : 9}]}',
+  '{"a" : {"b" : [{"c" : 42}]}}'
+) FROM numbers(3);
+
+SELECT c0.arr.:`Array(JSON)`, c0.^a FROM t0;
+DROP TABLE t0;
+

From a5ad8e9076bcf7b6cf88945bc964374f6d143ed5 Mon Sep 17 00:00:00 2001
From: Pavel Kruglov <48961922+Avogar@users.noreply.github.com>
Date: Tue, 11 Nov 2025 19:14:28 +0100
Subject: [PATCH 069/112] Update 03712_json_advanced_shared_data_bug.sql

---
 .../queries/0_stateless/03712_json_advanced_shared_data_bug.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/03712_json_advanced_shared_data_bug.sql b/tests/queries/0_stateless/03712_json_advanced_shared_data_bug.sql
index 66d53787671b..13a6e394b332 100644
--- a/tests/queries/0_stateless/03712_json_advanced_shared_data_bug.sql
+++ b/tests/queries/0_stateless/03712_json_advanced_shared_data_bug.sql
@@ -1,3 +1,5 @@
+SET optimize_if_transform_strings_to_enum=0;
+
 DROP TABLE IF EXISTS t0;
 CREATE TABLE t0 (c0 JSON(max_dynamic_paths = 0)) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 1, object_serialization_version = 'v3', object_shared_data_serialization_version_for_zero_level_parts = 'advanced', object_shared_data_buckets_for_wide_part = 1, index_granularity=2;
 

From 177d513d68ff988bc4d4598a4f71fb42c8476a52 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 11 Nov 2025 23:12:51 +0000
Subject: [PATCH 070/112] Backport #89870 to 25.8: Remove injective functions
 from `GROUP BY` regardless `optimize_injective_functions_in_group_by` in old
 analyzer for compatibility

---
 src/Interpreters/TreeOptimizer.cpp                        | 7 -------
 ...41_group_by_injective_functoon_bad_arguments.reference | 1 -
 .../03641_group_by_injective_functoon_bad_arguments.sql   | 5 ++++-
 ...713_group_by_injective_function_old_analyzer.reference | 1 +
 .../03713_group_by_injective_function_old_analyzer.sql    | 8 ++++++++
 5 files changed, 13 insertions(+), 9 deletions(-)
 create mode 100644 tests/queries/0_stateless/03713_group_by_injective_function_old_analyzer.reference
 create mode 100644 tests/queries/0_stateless/03713_group_by_injective_function_old_analyzer.sql

diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp
index c56b65ba7c69..7f794c3e90e8 100644
--- a/src/Interpreters/TreeOptimizer.cpp
+++ b/src/Interpreters/TreeOptimizer.cpp
@@ -60,7 +60,6 @@ namespace Setting
     extern const SettingsBool optimize_redundant_functions_in_order_by;
     extern const SettingsBool optimize_rewrite_array_exists_to_has;
     extern const SettingsBool optimize_or_like_chain;
-    extern const SettingsBool optimize_injective_functions_in_group_by;
 }
 
 namespace ErrorCodes
@@ -133,12 +132,6 @@ void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context)
     {
         if (const auto * function = group_exprs[i]->as<ASTFunction>())
         {
-            if (!settings[Setting::optimize_injective_functions_in_group_by])
-            {
-                ++i;
-                continue;
-            }
-
             /// assert function is injective
             if (possibly_injective_function_names.contains(function->name))
             {
diff --git a/tests/queries/0_stateless/03641_group_by_injective_functoon_bad_arguments.reference b/tests/queries/0_stateless/03641_group_by_injective_functoon_bad_arguments.reference
index 95b069a1550e..ce06cf372beb 100644
--- a/tests/queries/0_stateless/03641_group_by_injective_functoon_bad_arguments.reference
+++ b/tests/queries/0_stateless/03641_group_by_injective_functoon_bad_arguments.reference
@@ -1,6 +1,5 @@
 1	str
 1	str
-1	str
 QUERY id: 0
   PROJECTION COLUMNS
     count() UInt64
diff --git a/tests/queries/0_stateless/03641_group_by_injective_functoon_bad_arguments.sql b/tests/queries/0_stateless/03641_group_by_injective_functoon_bad_arguments.sql
index 0cacbda282bd..79e52c4a57f6 100644
--- a/tests/queries/0_stateless/03641_group_by_injective_functoon_bad_arguments.sql
+++ b/tests/queries/0_stateless/03641_group_by_injective_functoon_bad_arguments.sql
@@ -1,6 +1,9 @@
 create table test (json JSON) engine=MergeTree order by tuple();
 insert into test select '{"a" : "str"}';
-select count(), toString(json.a) from test group by toString(json.a) settings enable_analyzer=0, optimize_injective_functions_in_group_by=0;
+
+-- This won't work, see https://github.com/ClickHouse/ClickHouse/issues/89854
+-- select count(), toString(json.a) from test group by toString(json.a) settings enable_analyzer=0, optimize_injective_functions_in_group_by=0;
+
 select count(), toString(json.a) from test group by toString(json.a) settings enable_analyzer=1, optimize_injective_functions_in_group_by=0;
 select count(), toString(json.a) from test group by toString(json.a) settings enable_analyzer=1, optimize_injective_functions_in_group_by=1;
 explain query tree select count(), toString(json.a) from test group by toString(json.a) settings enable_analyzer=1, optimize_injective_functions_in_group_by=1;
diff --git a/tests/queries/0_stateless/03713_group_by_injective_function_old_analyzer.reference b/tests/queries/0_stateless/03713_group_by_injective_function_old_analyzer.reference
new file mode 100644
index 000000000000..2bd278038fc7
--- /dev/null
+++ b/tests/queries/0_stateless/03713_group_by_injective_function_old_analyzer.reference
@@ -0,0 +1 @@
+a_0	0
diff --git a/tests/queries/0_stateless/03713_group_by_injective_function_old_analyzer.sql b/tests/queries/0_stateless/03713_group_by_injective_function_old_analyzer.sql
new file mode 100644
index 000000000000..17c2d664beb5
--- /dev/null
+++ b/tests/queries/0_stateless/03713_group_by_injective_function_old_analyzer.sql
@@ -0,0 +1,8 @@
+SELECT
+    concat('a_', toString(number % 3)) AS a,
+    number % 5 AS b
+FROM numbers(50)
+GROUP BY (a, b)
+ORDER BY (a, b)
+LIMIT 1
+SETTINGS optimize_injective_functions_in_group_by = 0, enable_analyzer = 0;

From ba1543f0e094227130a2db560a3af0e86362b16e Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 12 Nov 2025 02:43:00 +0000
Subject: [PATCH 071/112] Backport #89850 to 25.8: Fix logical error with empty
 tuple inside IN function

---
 src/Analyzer/SetUtils.cpp                     | 10 +--
 src/Interpreters/ActionsVisitor.cpp           |  4 +-
 src/Planner/CollectSets.cpp                   |  4 +-
 src/Planner/PlannerActionsVisitor.cpp         |  4 +-
 .../03210_empty_tuple_lhs_of_in.sql           |  2 +-
 ...3710_empty_tuple_lhs_in_function.reference | 64 +++++++++++++
 .../03710_empty_tuple_lhs_in_function.sql     | 90 +++++++++++++++++++
 7 files changed, 168 insertions(+), 10 deletions(-)
 create mode 100644 tests/queries/0_stateless/03710_empty_tuple_lhs_in_function.reference
 create mode 100644 tests/queries/0_stateless/03710_empty_tuple_lhs_in_function.sql

diff --git a/src/Analyzer/SetUtils.cpp b/src/Analyzer/SetUtils.cpp
index 82554ccf256e..1f1e6e649297 100644
--- a/src/Analyzer/SetUtils.cpp
+++ b/src/Analyzer/SetUtils.cpp
@@ -42,16 +42,13 @@ size_t getCompoundTypeDepth(const IDataType & type)
         else if (which_type.isTuple())
         {
             const auto & tuple_elements = assert_cast<const DataTypeTuple &>(*current_type).getElements();
+            ++result;
             if (!tuple_elements.empty())
                 current_type = tuple_elements.at(0).get();
             else
             {
-                /// Special case: tuple with no element - tuple(). In this case, what's the compound type depth?
-                /// I'm not certain about the theoretical answer, but from experiment, 1 is the most reasonable choice.
-                return 1;
+                break;
             }
-
-            ++result;
         }
         else
         {
@@ -162,7 +159,8 @@ ColumnsWithTypeAndName getSetElementsForConstantValue(const DataTypePtr & expres
     DataTypes set_element_types = {expression_type};
     const auto * lhs_tuple_type = typeid_cast<const DataTypeTuple *>(expression_type.get());
 
-    if (lhs_tuple_type && lhs_tuple_type->getElements().size() != 1)
+    /// Do not unpack if empty tuple or single element tuple
+    if (lhs_tuple_type && lhs_tuple_type->getElements().size() > 1)
         set_element_types = lhs_tuple_type->getElements();
 
     for (auto & set_element_type : set_element_types)
diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index 663ba46650ee..384857cc3538 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -461,7 +461,9 @@ FutureSetPtr makeExplicitSet(
 
     DataTypes set_element_types = {left_arg_type};
     const auto * left_tuple_type = typeid_cast<const DataTypeTuple *>(left_arg_type.get());
-    if (left_tuple_type && left_tuple_type->getElements().size() != 1)
+
+    /// Do not unpack if empty tuple or single element tuple
+    if (left_tuple_type && left_tuple_type->getElements().size() > 1)
         set_element_types = left_tuple_type->getElements();
 
     auto set_element_keys = Set::getElementTypes(set_element_types, context->getSettingsRef()[Setting::transform_null_in]);
diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp
index ec3b88bdcfe1..d2822b0a3181 100644
--- a/src/Planner/CollectSets.cpp
+++ b/src/Planner/CollectSets.cpp
@@ -81,7 +81,9 @@ class CollectSetsVisitor : public ConstInDepthQueryTreeVisitor<CollectSetsVisito
 
             DataTypes set_element_types = {in_first_argument->getResultType()};
             const auto * left_tuple_type = typeid_cast<const DataTypeTuple *>(set_element_types.front().get());
-            if (left_tuple_type && left_tuple_type->getElements().size() != 1)
+
+            /// Do not unpack if empty tuple or single element tuple
+            if (left_tuple_type && left_tuple_type->getElements().size() > 1)
                 set_element_types = left_tuple_type->getElements();
 
             set_element_types = Set::getElementTypes(std::move(set_element_types), settings[Setting::transform_null_in]);
diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp
index 50e54a113967..3527f275d6ee 100644
--- a/src/Planner/PlannerActionsVisitor.cpp
+++ b/src/Planner/PlannerActionsVisitor.cpp
@@ -947,7 +947,9 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma
     {
         set_element_types = {in_first_argument->getResultType()};
         const auto * left_tuple_type = typeid_cast<const DataTypeTuple *>(set_element_types.front().get());
-        if (left_tuple_type && left_tuple_type->getElements().size() != 1)
+
+        /// Do not unpack if empty tuple or single element tuple
+        if (left_tuple_type && left_tuple_type->getElements().size() > 1)
             set_element_types = left_tuple_type->getElements();
 
         set_element_types
diff --git a/tests/queries/0_stateless/03210_empty_tuple_lhs_of_in.sql b/tests/queries/0_stateless/03210_empty_tuple_lhs_of_in.sql
index 0fc5f072da5c..bfaf69c14f55 100644
--- a/tests/queries/0_stateless/03210_empty_tuple_lhs_of_in.sql
+++ b/tests/queries/0_stateless/03210_empty_tuple_lhs_of_in.sql
@@ -1 +1 @@
-SELECT tuple() IN tuple(1) SETTINGS allow_experimental_map_type = 1; -- { serverError INCORRECT_ELEMENT_OF_SET }
+SELECT tuple() IN tuple(1) SETTINGS allow_experimental_map_type = 1; -- { serverError TYPE_MISMATCH }
diff --git a/tests/queries/0_stateless/03710_empty_tuple_lhs_in_function.reference b/tests/queries/0_stateless/03710_empty_tuple_lhs_in_function.reference
new file mode 100644
index 000000000000..856c6f2ef653
--- /dev/null
+++ b/tests/queries/0_stateless/03710_empty_tuple_lhs_in_function.reference
@@ -0,0 +1,64 @@
+1
+1
+1
+0
+1
+1
+1
+1
+1
+1
+1
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+3
+1
+1
+1
+1
+1
+0
+1
+1
+1
+1
+1
+1
+1
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+()
+3
+1
+1
diff --git a/tests/queries/0_stateless/03710_empty_tuple_lhs_in_function.sql b/tests/queries/0_stateless/03710_empty_tuple_lhs_in_function.sql
new file mode 100644
index 000000000000..ae33bc9771e4
--- /dev/null
+++ b/tests/queries/0_stateless/03710_empty_tuple_lhs_in_function.sql
@@ -0,0 +1,90 @@
+SELECT CAST(tuple(), 'Tuple()') IN (tuple());
+
+SELECT CAST(tuple(), 'Tuple()') IN [tuple()];
+
+SELECT CAST(tuple(), 'Tuple()') IN [tuple(), tuple()];
+
+SELECT CAST(tuple(), 'Tuple()') NOT IN (tuple());
+
+SELECT CAST(tuple(), 'Tuple()') IN (tuple(1));-- { serverError TYPE_MISMATCH }
+
+SELECT CAST(tuple(), 'Tuple()') IN [()];
+
+SELECT CAST(tuple(), 'Tuple()') IN (());
+
+SELECT tuple() IN (tuple());
+
+SELECT [tuple()] IN [[tuple()], [tuple()]];
+
+SELECT [tuple()] IN [()];
+
+SELECT tuple() IN (((tuple())));
+
+SELECT tuple() IN [(((tuple())))];
+
+DROP TABLE IF EXISTS test_empty_tuple;
+CREATE TABLE test_empty_tuple (t Tuple()) ENGINE = Memory;
+INSERT INTO test_empty_tuple VALUES (tuple()), (tuple()), (tuple());
+
+SELECT t FROM test_empty_tuple WHERE t IN (tuple());
+
+SELECT t FROM test_empty_tuple WHERE t IN [tuple()];
+
+SELECT t FROM test_empty_tuple WHERE t IN [()];
+
+SELECT t FROM test_empty_tuple WHERE t IN [tuple(), tuple()];
+
+SELECT t FROM test_empty_tuple WHERE t IN tuple();
+
+SELECT t FROM test_empty_tuple WHERE [t] IN [tuple()];
+
+SELECT count() FROM test_empty_tuple WHERE t IN [tuple()];
+
+SELECT arrayJoin([tuple(), tuple()]) IN (tuple());
+
+
+SET enable_analyzer = 0;
+
+SELECT CAST(tuple(), 'Tuple()') IN (tuple());
+
+SELECT CAST(tuple(), 'Tuple()') IN [tuple()];
+
+SELECT CAST(tuple(), 'Tuple()') IN [tuple(), tuple()];
+
+SELECT CAST(tuple(), 'Tuple()') NOT IN (tuple());
+
+SELECT CAST(tuple(), 'Tuple()') IN (tuple(1));-- { serverError TYPE_MISMATCH }
+
+SELECT CAST(tuple(), 'Tuple()') IN [()];
+
+SELECT CAST(tuple(), 'Tuple()') IN (());
+
+SELECT tuple() IN (tuple());
+
+SELECT [tuple()] IN [[tuple()], [tuple()]];
+
+SELECT [tuple()] IN [()];
+
+SELECT tuple() IN (((tuple())));
+
+SELECT tuple() IN [(((tuple())))];
+
+DROP TABLE IF EXISTS test_empty_tuple;
+CREATE TABLE test_empty_tuple (t Tuple()) ENGINE = Memory;
+INSERT INTO test_empty_tuple VALUES (tuple()), (tuple()), (tuple());
+
+SELECT t FROM test_empty_tuple WHERE t IN (tuple());
+
+SELECT t FROM test_empty_tuple WHERE t IN [tuple()];
+
+SELECT t FROM test_empty_tuple WHERE t IN [()];
+
+SELECT t FROM test_empty_tuple WHERE t IN [tuple(), tuple()];
+
+SELECT t FROM test_empty_tuple WHERE t IN tuple();
+
+SELECT t FROM test_empty_tuple WHERE [t] IN [tuple()];
+
+SELECT count() FROM test_empty_tuple WHERE t IN [tuple()];
+
+SELECT arrayJoin([tuple(), tuple()]) IN (tuple());

From ff63f7cac0d86e27c0ece25b39872b9bf173cc44 Mon Sep 17 00:00:00 2001
From: Shankar Iyer <shankar.iyer@clickhouse.com>
Date: Wed, 12 Nov 2025 16:25:01 +0530
Subject: [PATCH 072/112] Revert "Backport #89395 to 25.8: Revert "Do not
 perform PrimaryKeyExpand step if skip index is already part of primary key""

---
 src/Core/Settings.cpp | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp
index 2f2739a0e1dc..903d984dee49 100644
--- a/src/Core/Settings.cpp
+++ b/src/Core/Settings.cpp
@@ -1497,16 +1497,6 @@ Using skip indexes may exclude rows (granules) containing the latest data which
 
 Possible values:
 
-- 0 — Disabled.
-- 1 — Enabled.
-)", 0) \
-    DECLARE(Bool, use_skip_indexes_on_data_read, false, R"(
-Enable using data skipping indexes during data reading.
-
-When enabled, skip indexes are evaluated dynamically at the time each data granule is being read, rather than being analyzed in advance before query execution begins. This can reduce query startup latency.
-
-Possible values:
-
 - 0 — Disabled.
 - 1 — Enabled.
 )", 0) \

From 452db2803a5d976070cf8dac4e0a41768dd10734 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 12 Nov 2025 15:15:30 +0000
Subject: [PATCH 073/112] Backport #89908 to 25.8: Fix logical error with empty
 tuple inside `reverse` and `CAST` function

---
 src/Functions/FunctionsConversion.h           |  7 +++++
 src/Functions/reverse.cpp                     |  7 +++++
 ...714_empty_tuple_reverse_function.reference | 13 +++++++++
 .../03714_empty_tuple_reverse_function.sql    | 27 +++++++++++++++++++
 ...empty_tuple_functions_conversion.reference |  8 ++++++
 ...03715_empty_tuple_functions_conversion.sql | 19 +++++++++++++
 6 files changed, 81 insertions(+)
 create mode 100644 tests/queries/0_stateless/03714_empty_tuple_reverse_function.reference
 create mode 100644 tests/queries/0_stateless/03714_empty_tuple_reverse_function.sql
 create mode 100644 tests/queries/0_stateless/03715_empty_tuple_functions_conversion.reference
 create mode 100644 tests/queries/0_stateless/03715_empty_tuple_functions_conversion.sql

diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index d29a823dede9..ec7e23d9f587 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -4744,6 +4744,13 @@ class FunctionCast final : public IFunctionBase
             const auto * col = arguments.front().column.get();
 
             size_t tuple_size = to_element_types.size();
+
+            if (tuple_size == 0)
+            {
+                /// Preserve the number of rows for empty tuple columns
+                return ColumnTuple::create(col->size());
+            }
+
             const ColumnTuple & column_tuple = typeid_cast<const ColumnTuple &>(*col);
 
             Columns converted_columns(tuple_size);
diff --git a/src/Functions/reverse.cpp b/src/Functions/reverse.cpp
index b59090c3cf66..d0822afe2443 100644
--- a/src/Functions/reverse.cpp
+++ b/src/Functions/reverse.cpp
@@ -98,6 +98,13 @@ class FunctionReverse : public IFunction
         if (const ColumnTuple * col_tuple = checkAndGetColumn<ColumnTuple>(column.get()))
         {
             size_t tuple_size = col_tuple->tupleSize();
+
+            if (tuple_size == 0)
+            {
+                /// Preserve the number of rows for empty tuple columns
+                return ColumnTuple::create(col_tuple->size());
+            }
+
             Columns tuple_columns(tuple_size);
             for (size_t i = 0; i < tuple_size; ++i)
             {
diff --git a/tests/queries/0_stateless/03714_empty_tuple_reverse_function.reference b/tests/queries/0_stateless/03714_empty_tuple_reverse_function.reference
new file mode 100644
index 000000000000..529218205d8e
--- /dev/null
+++ b/tests/queries/0_stateless/03714_empty_tuple_reverse_function.reference
@@ -0,0 +1,13 @@
+()
+()
+()
+()
+()
+()
+()
+()
+()
+Tuple()
+(3,'a',1)
+[()]
+()
diff --git a/tests/queries/0_stateless/03714_empty_tuple_reverse_function.sql b/tests/queries/0_stateless/03714_empty_tuple_reverse_function.sql
new file mode 100644
index 000000000000..1ef82834a127
--- /dev/null
+++ b/tests/queries/0_stateless/03714_empty_tuple_reverse_function.sql
@@ -0,0 +1,27 @@
+SELECT reverse(());
+
+SELECT reverse(tuple());
+
+SELECT reverse(()) FROM numbers(3);
+
+WITH () AS x SELECT reverse(x);
+
+DROP TABLE IF EXISTS table_rev_empty_tuple;
+CREATE TABLE table_rev_empty_tuple
+(
+    x Tuple()
+) ENGINE = Memory;
+
+INSERT INTO table_rev_empty_tuple SELECT tuple() FROM numbers(5);
+
+SELECT reverse(x) FROM table_rev_empty_tuple LIMIT 3;
+
+SELECT toTypeName(reverse(x)) FROM table_rev_empty_tuple LIMIT 1;
+
+DROP TABLE table_rev_empty_tuple;
+
+SELECT reverse((1, 'a', 3));
+
+SELECT reverse([()]);
+
+SELECT reverse((()));
diff --git a/tests/queries/0_stateless/03715_empty_tuple_functions_conversion.reference b/tests/queries/0_stateless/03715_empty_tuple_functions_conversion.reference
new file mode 100644
index 000000000000..96eb26f2d2de
--- /dev/null
+++ b/tests/queries/0_stateless/03715_empty_tuple_functions_conversion.reference
@@ -0,0 +1,8 @@
+()
+()
+()
+()
+()
+()
+()
+()
diff --git a/tests/queries/0_stateless/03715_empty_tuple_functions_conversion.sql b/tests/queries/0_stateless/03715_empty_tuple_functions_conversion.sql
new file mode 100644
index 000000000000..0e2ed4643dbe
--- /dev/null
+++ b/tests/queries/0_stateless/03715_empty_tuple_functions_conversion.sql
@@ -0,0 +1,19 @@
+select CAST((), 'SimpleAggregateFunction(min, Tuple())');
+
+DROP TABLE IF EXISTS tab;
+CREATE TABLE tab (c0 Tuple()) ENGINE = Memory;
+INSERT INTO tab VALUES (()), (()), (());
+
+SELECT CAST(c0, 'SimpleAggregateFunction(min, Tuple())') FROM tab;
+
+DROP TABLE IF EXISTS t0;
+CREATE TABLE t0 (c0 SimpleAggregateFunction(min, Tuple())) ENGINE = MergeTree() ORDER BY tuple();
+INSERT INTO t0 (c0) VALUES (tuple());
+SELECT * FROM t0;
+
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c0 SimpleAggregateFunction(min, Tuple())) ENGINE = MergeTree() ORDER BY tuple() SETTINGS enable_block_number_column = 1, enable_block_offset_column = 1;
+UPDATE t1 SET c0 = () WHERE TRUE;
+INSERT INTO t1 (c0) VALUES (tuple()), (tuple()), (tuple());
+UPDATE t1 SET c0 = () WHERE TRUE;
+SELECT * FROM t1;

From 7f89c65a0bb33129833f59815654c762d014a977 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 12 Nov 2025 22:13:06 +0000
Subject: [PATCH 074/112] Backport #89923 to 25.8: Fix S3 client clone

---
 src/IO/S3/Client.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp
index 59a8d337f16f..2584d96bb1b6 100644
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@@ -313,6 +313,7 @@ Client::Client(
     , explicit_region(other.explicit_region)
     , detect_region(other.detect_region)
     , provider_type(other.provider_type)
+    , api_mode(other.api_mode)
     , max_redirects(other.max_redirects)
     , sse_kms_config(other.sse_kms_config)
     , log(getLogger("S3Client"))

From 634725d5635afa472c2877918b2e24f11cc9a6d0 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 13 Nov 2025 04:17:45 +0000
Subject: [PATCH 075/112] Backport #89259 to 25.8: Only change S3Queue metadata
 in Keeper on the initial replica

---
 .../ObjectStorageQueueMetadata.cpp            | 33 ++++++++++++-------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp
index 4d197bedc7e9..2755d882e4de 100644
--- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp
+++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp
@@ -18,6 +18,7 @@
 #include <Common/getRandomASCIIString.h>
 #include <Common/randomSeed.h>
 #include <Common/DNSResolver.h>
+#include <Interpreters/DDLTask.h>
 #include <shared_mutex>
 #include <Core/ServerUUID.h>
 
@@ -244,24 +245,30 @@ ObjectStorageQueueMetadata::tryAcquireBucket(const Bucket & bucket, const Proces
 
 void ObjectStorageQueueMetadata::alterSettings(const SettingsChanges & changes, const ContextPtr & context)
 {
+    bool is_initial_query = context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY ||
+                            (context->getZooKeeperMetadataTransaction() && context->getZooKeeperMetadataTransaction()->isInitialQuery());
+
     const fs::path alter_settings_lock_path = zookeeper_path / "alter_settings_lock";
     zkutil::EphemeralNodeHolder::Ptr alter_settings_lock;
     auto zookeeper = getZooKeeper();
 
-    /// We will retry taking alter_settings_lock for the duration of 5 seconds.
-    /// Do we need to add a setting for this?
-    const size_t num_tries = 100;
-    for (size_t i = 0; i < num_tries; ++i)
+    if (is_initial_query)
     {
-        alter_settings_lock = zkutil::EphemeralNodeHolder::tryCreate(alter_settings_lock_path, *zookeeper, toString(getCurrentTime()));
+        /// We will retry taking alter_settings_lock for the duration of 5 seconds.
+        /// Do we need to add a setting for this?
+        const size_t num_tries = 100;
+        for (size_t i = 0; i < num_tries; ++i)
+        {
+            alter_settings_lock = zkutil::EphemeralNodeHolder::tryCreate(alter_settings_lock_path, *zookeeper, toString(getCurrentTime()));
 
-        if (alter_settings_lock)
-            break;
+            if (alter_settings_lock)
+                break;
 
-        if (i == num_tries - 1)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to take alter setting lock");
+            if (i == num_tries - 1)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to take alter setting lock");
 
-        sleepForMilliseconds(50);
+            sleepForMilliseconds(50);
+        }
     }
 
     Coordination::Stat stat;
@@ -373,7 +380,11 @@ void ObjectStorageQueueMetadata::alterSettings(const SettingsChanges & changes,
     LOG_TRACE(log, "New metadata: {}", new_metadata_str);
 
     const fs::path table_metadata_path = zookeeper_path / "metadata";
-    zookeeper->set(table_metadata_path, new_metadata_str, stat.version);
+    /// Here we intentionally do not add zk retries,
+    /// because we modify metadata under ephemeral metadata lock,
+    /// so we do not want to retry if it expires.
+    if (is_initial_query)
+        zookeeper->set(table_metadata_path, new_metadata_str, stat.version);
 
     table_metadata.syncChangeableSettings(new_table_metadata);
 }

From 755fc1eb7495f7026d5b0e8f75a040b6ca8f40ff Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 13 Nov 2025 13:24:03 +0000
Subject: [PATCH 076/112] Backport #87520 to 25.8: Fix creating an array of
 empty tuples

---
 src/Functions/tuple.h                                           | 2 +-
 tests/queries/0_stateless/03631_array_of_empty_tuples.reference | 2 ++
 tests/queries/0_stateless/03631_array_of_empty_tuples.sql       | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/03631_array_of_empty_tuples.reference
 create mode 100644 tests/queries/0_stateless/03631_array_of_empty_tuples.sql

diff --git a/src/Functions/tuple.h b/src/Functions/tuple.h
index 1f89eae4a7cd..3fe809ce0f43 100644
--- a/src/Functions/tuple.h
+++ b/src/Functions/tuple.h
@@ -66,7 +66,7 @@ class FunctionTuple : public IFunction
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
     {
         if (arguments.empty())
-            return ColumnTuple::create(input_rows_count);
+            return DataTypeTuple({}).createColumnConstWithDefaultValue(input_rows_count);
 
         size_t tuple_size = arguments.size();
         Columns tuple_columns(tuple_size);
diff --git a/tests/queries/0_stateless/03631_array_of_empty_tuples.reference b/tests/queries/0_stateless/03631_array_of_empty_tuples.reference
new file mode 100644
index 000000000000..b04f46cd04aa
--- /dev/null
+++ b/tests/queries/0_stateless/03631_array_of_empty_tuples.reference
@@ -0,0 +1,2 @@
+[(),()]
+[(),()]
diff --git a/tests/queries/0_stateless/03631_array_of_empty_tuples.sql b/tests/queries/0_stateless/03631_array_of_empty_tuples.sql
new file mode 100644
index 000000000000..c1cd37ebcbfb
--- /dev/null
+++ b/tests/queries/0_stateless/03631_array_of_empty_tuples.sql
@@ -0,0 +1,2 @@
+select [(), ()] from numbers(2);
+

From 2a467ff3d024e9972098b1f8d2586673b7ea1250 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 14 Nov 2025 12:16:49 +0000
Subject: [PATCH 077/112] Backport #89929 to 25.8: Fix buffer size calculation
 for base32Encode

---
 src/Functions/FunctionBase32Conversion.h                   | 7 +++----
 src/Functions/FunctionBase58Conversion.h                   | 3 +--
 .../0_stateless/03714_base32_base58_short_string.reference | 0
 .../0_stateless/03714_base32_base58_short_string.sql       | 4 ++++
 4 files changed, 8 insertions(+), 6 deletions(-)
 create mode 100644 tests/queries/0_stateless/03714_base32_base58_short_string.reference
 create mode 100644 tests/queries/0_stateless/03714_base32_base58_short_string.sql

diff --git a/src/Functions/FunctionBase32Conversion.h b/src/Functions/FunctionBase32Conversion.h
index 024860971258..28fd7ebd422e 100644
--- a/src/Functions/FunctionBase32Conversion.h
+++ b/src/Functions/FunctionBase32Conversion.h
@@ -11,10 +11,10 @@ struct Base32EncodeTraits
     template <typename Col>
     static size_t getBufferSize(Col const & src_column)
     {
-        auto const src_length = src_column.getChars().size() + src_column.size();
+        auto const src_length = src_column.getChars().size();
         /// Every 5 bytes becomes 8 bytes in base32
         /// Add padding for incomplete blocks and round up
-        return (src_length + 4) / 5 * 8;
+        return ((src_length + src_column.size() * 4) / 5) * 8;
     }
 
     static size_t perform(std::string_view src, UInt8 * dst)
@@ -28,8 +28,7 @@ struct Base32DecodeTraits
     template <typename Col>
     static size_t getBufferSize(Col const & src_column)
     {
-        /// This function can be used for FixedString columns so we need to take into account NULL terminator
-        auto const string_length = src_column.getChars().size() + src_column.size();
+        auto const string_length = src_column.getChars().size();
         /// decoded size is at most length of encoded (every 8 bytes becomes at most 5 bytes)
         return (string_length * 5 + 7) / 8;
     }
diff --git a/src/Functions/FunctionBase58Conversion.h b/src/Functions/FunctionBase58Conversion.h
index ba45a364f597..fe0d32aa1f7f 100644
--- a/src/Functions/FunctionBase58Conversion.h
+++ b/src/Functions/FunctionBase58Conversion.h
@@ -29,8 +29,7 @@ struct Base58DecodeTraits
     template <typename Col>
     static size_t getBufferSize(Col const & src_column)
     {
-        /// This function can be used for FixedString columns so we need to take into account NULL terminator
-        auto const string_length = src_column.getChars().size() + src_column.size();
+        auto const string_length = src_column.getChars().size();
         /// decoded size is at most length of encoded (every 8 bytes becomes at most 6 bytes)
         return (string_length * 6 + 7) / 8;
     }
diff --git a/tests/queries/0_stateless/03714_base32_base58_short_string.reference b/tests/queries/0_stateless/03714_base32_base58_short_string.reference
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/queries/0_stateless/03714_base32_base58_short_string.sql b/tests/queries/0_stateless/03714_base32_base58_short_string.sql
new file mode 100644
index 000000000000..3690b6e7b0fc
--- /dev/null
+++ b/tests/queries/0_stateless/03714_base32_base58_short_string.sql
@@ -0,0 +1,4 @@
+-- Tags: no-fasttest
+
+SELECT base32Encode(randomString(1, 100)) FROM numbers(1000) FORMAT Null;
+SELECT base58Encode(randomString(1, 100)) FROM numbers(1000) FORMAT Null;
\ No newline at end of file

From 2f235c1f76cd6671c3874d7c2a9869a5040dd907 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 14 Nov 2025 12:18:18 +0000
Subject: [PATCH 078/112] Backport #89877 to 25.8: Batch removed objects during
 object storage tx undo

---
 .../DiskObjectStorageTransaction.cpp          | 62 +++++++++----------
 .../DiskObjectStorageTransaction.h            |  5 +-
 2 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
index 514db575197c..c3ced398954a 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
@@ -90,7 +90,7 @@ struct PureMetadataObjectStorageOperation final : public IDiskObjectStorageOpera
         on_execute(transaction);
     }
 
-    void undo() override
+    void undo(StoredObjects & /*to_remove*/) override
     {
     }
 
@@ -177,9 +177,8 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation
         }
     }
 
-    void undo() override
+    void undo(StoredObjects & /*to_remove*/) override
     {
-
     }
 
     void finalize(StoredObjects & to_remove) override
@@ -280,7 +279,7 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati
         }
     }
 
-    void undo() override
+    void undo(StoredObjects & /*to_remove*/) override
     {
     }
 
@@ -397,7 +396,7 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp
             removeMetadataRecursive(tx, path);
     }
 
-    void undo() override
+    void undo(StoredObjects & /*to_remove*/) override
     {
     }
 
@@ -473,9 +472,8 @@ struct ReplaceFileObjectStorageOperation final : public IDiskObjectStorageOperat
             tx->moveFile(path_from, path_to);
     }
 
-    void undo() override
+    void undo(StoredObjects & /*to_remove*/) override
     {
-
     }
 
     void finalize(StoredObjects & to_remove) override
@@ -516,9 +514,11 @@ struct WriteFileObjectStorageOperation final : public IDiskObjectStorageOperatio
             on_execute(tx);
     }
 
-    void undo() override
+    void undo(StoredObjects & to_remove) override
     {
-        object_storage.removeObjectIfExists(object);
+        LOG_DEBUG(getLogger("DiskObjectStorageTransaction"), "Undoing WriteFileObjectStorageOperation for path {}, key {}", object.local_path, object.remote_path);
+        /// If the file was created, we need to remove it
+        to_remove.push_back(object);
     }
 
     void finalize(StoredObjects & /*to_remove*/) override
@@ -586,9 +586,9 @@ struct CopyFileObjectStorageOperation final : public IDiskObjectStorageOperation
     }
 
 
-    void undo() override
+    void undo(StoredObjects & to_remove) override
     {
-         destination_object_storage.removeObjectsIfExist(created_objects);
+        to_remove.append_range(created_objects);
     }
 
     void finalize(StoredObjects & /*to_remove*/) override
@@ -639,7 +639,7 @@ struct TruncateFileObjectStorageOperation final : public IDiskObjectStorageOpera
             truncate_outcome = tx->truncateFile(path, size);
     }
 
-    void undo() override
+    void undo(StoredObjects & /*to_remove*/) override
     {
 
     }
@@ -679,9 +679,9 @@ struct CreateEmptyFileObjectStorageOperation final : public IDiskObjectStorageOp
         buf->finalize();
     }
 
-    void undo() override
+    void undo(StoredObjects & to_remove) override
     {
-        object_storage.removeObjectIfExists(object);
+        to_remove.push_back(object);
     }
 
     void finalize(StoredObjects & /*to_remove*/) override
@@ -1129,20 +1129,7 @@ TransactionCommitOutcomeVariant DiskObjectStorageTransaction::tryCommit(const Tr
             ex.addMessage(fmt::format("While executing operation #{}", i));
 
             if (needRollbackBlobs(options))
-            {
-                for (int64_t j = i; j >= 0; --j)
-                {
-                    try
-                    {
-                        operations_to_execute[j]->undo();
-                    }
-                    catch (Exception & rollback_ex)
-                    {
-                        rollback_ex.addMessage(fmt::format("While undoing operation #{}", i));
-                        throw;
-                    }
-                }
-            }
+                undo();
 
             throw;
         }
@@ -1189,10 +1176,23 @@ TransactionCommitOutcomeVariant DiskObjectStorageTransaction::tryCommit(const Tr
 
 void DiskObjectStorageTransaction::undo()
 {
-    if (!is_committed)
+    if (is_committed)
+    {
+        operations_to_execute.clear();
+        return;
+    }
+
+    StoredObjects objects_to_remove;
+    for (const auto & operation : operations_to_execute | std::views::reverse)
+        operation->undo(objects_to_remove);
+
+    try
+    {
+        object_storage.removeObjectsIfExist(objects_to_remove);
+    }
+    catch (...)
     {
-        for (const auto & operation : operations_to_execute | std::views::reverse)
-            operation->undo();
+        tryLogCurrentException(getLogger("DiskObjectStorageTransaction"), "An error occurred during transaction cleanup");
     }
 
     operations_to_execute.clear();
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h
index 04785ce8b8e1..4da9627cf838 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h
+++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h
@@ -25,7 +25,10 @@ struct IDiskObjectStorageOperation
     /// Execute operation and something to metadata transaction
     virtual void execute(MetadataTransactionPtr transaction) = 0;
     /// Revert operation if possible
-    virtual void undo() = 0;
+    /// It is called if something went wrong before commit of metadata transaction
+    /// It is called in reverse order of execution of operations for all operations
+    /// even if they were not executed at all
+    virtual void undo(StoredObjects & to_remove) = 0;
     /// Action to execute after metadata transaction successfully committed.
     /// Useful when it's impossible to revert operation
     /// like removal of blobs. Such implementation can lead to garbage.

From b0069bfd8affb1ecba7569595781c96243228f96 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 14 Nov 2025 13:20:55 +0000
Subject: [PATCH 079/112] Backport #89680 to 25.8: Prevent false positive error
 on empty LowCardinality indexes

---
 src/Columns/ColumnLowCardinality.cpp        |  2 ++
 src/Columns/tests/gtest_low_cardinality.cpp | 22 +++++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp
index 02fe28c0eba6..d37e40353a56 100644
--- a/src/Columns/ColumnLowCardinality.cpp
+++ b/src/Columns/ColumnLowCardinality.cpp
@@ -240,6 +240,8 @@ static void checkPositionsAreLimited(const IColumn & positions, UInt64 limit)
 
         const auto & data = column_ptr->getData();
         size_t num_rows = data.size();
+        if (num_rows == 0)
+            return true;
         UInt64 max_position = 0;
         for (size_t i = 0; i < num_rows; ++i)
             max_position = std::max<UInt64>(max_position, data[i]);
diff --git a/src/Columns/tests/gtest_low_cardinality.cpp b/src/Columns/tests/gtest_low_cardinality.cpp
index 301fa2a60904..c8ae39013fe0 100644
--- a/src/Columns/tests/gtest_low_cardinality.cpp
+++ b/src/Columns/tests/gtest_low_cardinality.cpp
@@ -62,3 +62,25 @@ TEST(ColumnLowCardinality, Clone)
     ASSERT_TRUE(assert_cast<const ColumnLowCardinality &>(*nullable_column).nestedIsNullable());
     ASSERT_FALSE(assert_cast<const ColumnLowCardinality &>(*column).nestedIsNullable());
 }
+
+TEST(ColumnLowCardinality, EmptyDictionaryEmptyIndexes)
+{
+    /// Test edge case: empty dictionary (size=0) with empty indexes (num_rows=0)
+    /// This should not throw an error, as empty indexes are always valid
+    /// Regression test for bug where check was: if (max_position >= limit)
+    /// When num_rows=0, max_position stays 0, and with limit=0, this incorrectly threw
+    
+    auto data_type = std::make_shared<DataTypeUInt32>();
+    auto low_cardinality_type = std::make_shared<DataTypeLowCardinality>(data_type);
+    auto column = low_cardinality_type->createColumn();
+    auto & lc_column = assert_cast<ColumnLowCardinality &>(*column);
+    
+    // Create empty keys and indexes columns
+    auto empty_keys = ColumnUInt32::create();
+    auto empty_indexes = ColumnUInt8::create();
+    
+    // This should NOT throw an exception
+    ASSERT_NO_THROW(lc_column.insertRangeFromDictionaryEncodedColumn(*empty_keys, *empty_indexes));
+    
+    ASSERT_EQ(column->size(), 0);
+}

From 40c0977a84d00b6e94e5d2ffa7cf443727d15be7 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 14 Nov 2025 16:13:23 +0000
Subject: [PATCH 080/112] Backport #89640 to 25.8: Remove duplicated filesystem
 caches from asynchronous metrics, SYSTEM queries

---
 src/Interpreters/Cache/FileCache.cpp          |  1 +
 src/Interpreters/Cache/FileCache.h            |  3 +
 src/Interpreters/Cache/FileCacheFactory.cpp   |  9 +++
 src/Interpreters/Cache/FileCacheFactory.h     |  3 +
 src/Interpreters/Context.cpp                  |  5 +-
 src/Interpreters/InterpreterSystemQuery.cpp   |  8 +--
 .../ServerAsynchronousMetrics.cpp             |  3 +-
 .../System/StorageSystemFilesystemCache.cpp   | 67 +++++++++++--------
 8 files changed, 60 insertions(+), 39 deletions(-)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 493409152a31..a723cdb81084 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -125,6 +125,7 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s
     , keep_current_size_to_max_ratio(1 - settings[FileCacheSetting::keep_free_space_size_ratio])
     , keep_current_elements_to_max_ratio(1 - settings[FileCacheSetting::keep_free_space_elements_ratio])
     , keep_up_free_space_remove_batch(settings[FileCacheSetting::keep_free_space_remove_batch])
+    , name(cache_name)
     , log(getLogger("FileCache(" + cache_name + ")"))
     , metadata(settings[FileCacheSetting::path],
                settings[FileCacheSetting::background_download_queue_size_limit],
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index b9e6363a3ab7..3a92430429a8 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -209,6 +209,8 @@ class FileCache : private boost::noncopyable
 
     void freeSpaceRatioKeepingThreadFunc();
 
+    const String & getName() const { return name; }
+
 private:
     using KeyAndOffset = FileCacheKeyAndOffset;
 
@@ -228,6 +230,7 @@ class FileCache : private boost::noncopyable
     const double keep_current_elements_to_max_ratio;
     const size_t keep_up_free_space_remove_batch;
 
+    String name;
     LoggerPtr log;
 
     std::exception_ptr init_exception;
diff --git a/src/Interpreters/Cache/FileCacheFactory.cpp b/src/Interpreters/Cache/FileCacheFactory.cpp
index 6088c1bdce04..f741c89e5793 100644
--- a/src/Interpreters/Cache/FileCacheFactory.cpp
+++ b/src/Interpreters/Cache/FileCacheFactory.cpp
@@ -52,6 +52,15 @@ FileCacheFactory::CacheByName FileCacheFactory::getAll()
     return caches_by_name;
 }
 
+FileCacheFactory::Caches FileCacheFactory::getUniqueInstances()
+{
+    std::lock_guard lock(mutex);
+    Caches caches;
+    for (const auto & [_, cache_data] : caches_by_name)
+        caches.insert(cache_data);
+    return caches;
+}
+
 FileCachePtr FileCacheFactory::get(const std::string & cache_name)
 {
     std::lock_guard lock(mutex);
diff --git a/src/Interpreters/Cache/FileCacheFactory.h b/src/Interpreters/Cache/FileCacheFactory.h
index 8aae5ae74047..03d89ab76fbb 100644
--- a/src/Interpreters/Cache/FileCacheFactory.h
+++ b/src/Interpreters/Cache/FileCacheFactory.h
@@ -5,6 +5,7 @@
 
 #include <boost/noncopyable.hpp>
 #include <unordered_map>
+#include <unordered_set>
 #include <mutex>
 
 namespace DB
@@ -36,6 +37,7 @@ class FileCacheFactory final : private boost::noncopyable
 
     using FileCacheDataPtr = std::shared_ptr<FileCacheData>;
     using CacheByName = std::unordered_map<std::string, FileCacheDataPtr>;
+    using Caches = std::unordered_set<FileCacheDataPtr>;
 
     static FileCacheFactory & instance();
 
@@ -52,6 +54,7 @@ class FileCacheFactory final : private boost::noncopyable
         const std::string & config_path);
 
     CacheByName getAll();
+    Caches getUniqueInstances();
 
     FileCacheDataPtr getByName(const std::string & cache_name);
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index f102c660aa9c..3f774e2a1b97 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -882,9 +882,8 @@ struct ContextSharedPart : boost::noncopyable
         /// Background operations in cache use background schedule pool.
         /// Deactivate them before destructing it.
         LOG_TRACE(log, "Shutting down caches");
-        const auto & caches = FileCacheFactory::instance().getAll();
-        for (const auto & [_, cache] : caches)
-            cache->cache->deactivateBackgroundOperations();
+        for (const auto & cache_data : FileCacheFactory::instance().getUniqueInstances())
+            cache_data->cache->deactivateBackgroundOperations();
         FileCacheFactory::instance().clear();
 
         {
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 76a09ca9ff9e..f02a6e8ddd6c 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -461,8 +461,7 @@ BlockIO InterpreterSystemQuery::execute()
 
             if (query.filesystem_cache_name.empty())
             {
-                auto caches = FileCacheFactory::instance().getAll();
-                for (const auto & [_, cache_data] : caches)
+                for (const auto & cache_data : FileCacheFactory::instance().getUniqueInstances())
                 {
                     if (!cache_data->cache->isInitialized())
                         continue;
@@ -523,11 +522,10 @@ BlockIO InterpreterSystemQuery::execute()
 
             if (query.filesystem_cache_name.empty())
             {
-                auto caches = FileCacheFactory::instance().getAll();
-                for (const auto & [cache_name, cache_data] : caches)
+                for (const auto & cache_data : FileCacheFactory::instance().getUniqueInstances())
                 {
                     auto file_segments = cache_data->cache->sync();
-                    fill_data(cache_name, cache_data->cache, file_segments);
+                    fill_data(cache_data->cache->getName(), cache_data->cache, file_segments);
                 }
             }
             else
diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp
index 930b0fcb4752..80af95332a12 100644
--- a/src/Interpreters/ServerAsynchronousMetrics.cpp
+++ b/src/Interpreters/ServerAsynchronousMetrics.cpp
@@ -73,12 +73,11 @@ ServerAsynchronousMetrics::~ServerAsynchronousMetrics()
 void ServerAsynchronousMetrics::updateImpl(TimePoint update_time, TimePoint current_time, bool force_update, bool first_run, AsynchronousMetricValues & new_values)
 {
     {
-        auto caches = FileCacheFactory::instance().getAll();
         size_t total_bytes = 0;
         size_t max_bytes = 0;
         size_t total_files = 0;
 
-        for (const auto & [_, cache_data] : caches)
+        for (const auto & cache_data : FileCacheFactory::instance().getUniqueInstances())
         {
             total_bytes += cache_data->cache->getUsedCacheSize();
             max_bytes += cache_data->cache->getMaxCacheSize();
diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp
index 2e9ba4daafb7..fd3c10f2e5e2 100644
--- a/src/Storages/System/StorageSystemFilesystemCache.cpp
+++ b/src/Storages/System/StorageSystemFilesystemCache.cpp
@@ -45,9 +45,15 @@ StorageSystemFilesystemCache::StorageSystemFilesystemCache(const StorageID & tab
 
 void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector<UInt8>) const
 {
-    auto caches = FileCacheFactory::instance().getAll();
+    auto caches_by_name = FileCacheFactory::instance().getAll();
+    std::unordered_set<FileCacheFactory::FileCacheDataPtr> caches;
+    for (const auto & [_, cache_data] : caches_by_name)
+        caches.insert(cache_data);
+    std::unordered_map<FileCacheFactory::FileCacheDataPtr, std::vector<std::string>> caches_by_instance;
+    for (const auto & [cache_name, cache_data] : caches_by_name)
+        caches_by_instance[cache_data].push_back(cache_name);
 
-    for (const auto & [cache_name, cache_data] : caches)
+    for (const auto & cache_data : caches)
     {
         const auto & cache = cache_data->cache;
         if (!cache->isInitialized())
@@ -55,36 +61,39 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex
 
         cache->iterate([&](const FileSegment::Info & file_segment)
         {
-            size_t i = 0;
-            res_columns[i++]->insert(cache_name);
-            res_columns[i++]->insert(cache->getBasePath());
+            for (const auto & cache_name : caches_by_instance.at(cache_data))
+            {
+                size_t i = 0;
+                res_columns[i++]->insert(cache_name);
+                res_columns[i++]->insert(cache->getBasePath());
 
-            /// Do not use `file_segment->getPath` here because it will lead to nullptr dereference
-            /// (because file_segments in getSnapshot doesn't have `cache` field set)
+                /// Do not use `file_segment->getPath` here because it will lead to nullptr dereference
+                /// (because file_segments in getSnapshot doesn't have `cache` field set)
 
-            const auto path = cache->getFileSegmentPath(
-                file_segment.key, file_segment.offset, file_segment.kind,
-                FileCache::UserInfo(file_segment.user_id, file_segment.user_weight));
-            res_columns[i++]->insert(path);
-            res_columns[i++]->insert(file_segment.key.toString());
-            res_columns[i++]->insert(file_segment.range_left);
-            res_columns[i++]->insert(file_segment.range_right);
-            res_columns[i++]->insert(file_segment.size);
-            res_columns[i++]->insert(FileSegment::stateToString(file_segment.state));
-            res_columns[i++]->insert(file_segment.download_finished_time);
-            res_columns[i++]->insert(file_segment.cache_hits);
-            res_columns[i++]->insert(file_segment.references);
-            res_columns[i++]->insert(file_segment.downloaded_size);
-            res_columns[i++]->insert(toString(file_segment.kind));
-            res_columns[i++]->insert(file_segment.is_unbound);
-            res_columns[i++]->insert(file_segment.user_id);
+                const auto path = cache->getFileSegmentPath(
+                    file_segment.key, file_segment.offset, file_segment.kind,
+                    FileCache::UserInfo(file_segment.user_id, file_segment.user_weight));
+                res_columns[i++]->insert(path);
+                res_columns[i++]->insert(file_segment.key.toString());
+                res_columns[i++]->insert(file_segment.range_left);
+                res_columns[i++]->insert(file_segment.range_right);
+                res_columns[i++]->insert(file_segment.size);
+                res_columns[i++]->insert(FileSegment::stateToString(file_segment.state));
+                res_columns[i++]->insert(file_segment.download_finished_time);
+                res_columns[i++]->insert(file_segment.cache_hits);
+                res_columns[i++]->insert(file_segment.references);
+                res_columns[i++]->insert(file_segment.downloaded_size);
+                res_columns[i++]->insert(toString(file_segment.kind));
+                res_columns[i++]->insert(file_segment.is_unbound);
+                res_columns[i++]->insert(file_segment.user_id);
 
-            std::error_code ec;
-            auto size = fs::file_size(path, ec);
-            if (!ec)
-                res_columns[i++]->insert(size);
-            else
-                res_columns[i++]->insertDefault();
+                std::error_code ec;
+                auto size = fs::file_size(path, ec);
+                if (!ec)
+                    res_columns[i++]->insert(size);
+                else
+                    res_columns[i++]->insertDefault();
+            }
         }, FileCache::getCommonUser().user_id);
     }
 }

From 7d54f1351facf3749a5d64d91fd771374aa0e1be Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 14 Nov 2025 18:14:45 +0000
Subject: [PATCH 081/112] Backport #90018 to 25.8: Fix logical error caused by
 empty tuple column in `BSONEachRow` and `MsgPack`

---
 .../Impl/BSONEachRowRowInputFormat.cpp        |  10 +-
 .../Formats/Impl/MsgPackRowInputFormat.cpp    |   7 +-
 .../03277_empty_tuple_formats.reference       |   2 +
 ...bson_each_row_empty_tuple_column.reference |   5 +
 ...03716_bson_each_row_empty_tuple_column.sql |  17 ++
 ...03717_msgpack_empty_tuple_column.reference |   5 +
 .../03717_msgpack_empty_tuple_column.sql      |  20 ++
 ...tiple_column_empty_tuple_formats.reference | 198 ++++++++++++++++++
 ...719_multiple_column_empty_tuple_formats.sh |  47 +++++
 9 files changed, 308 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/03716_bson_each_row_empty_tuple_column.reference
 create mode 100644 tests/queries/0_stateless/03716_bson_each_row_empty_tuple_column.sql
 create mode 100644 tests/queries/0_stateless/03717_msgpack_empty_tuple_column.reference
 create mode 100644 tests/queries/0_stateless/03717_msgpack_empty_tuple_column.sql
 create mode 100644 tests/queries/0_stateless/03719_multiple_column_empty_tuple_formats.reference
 create mode 100755 tests/queries/0_stateless/03719_multiple_column_empty_tuple_formats.sh

diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
index 22373e1f4d18..3f2750b98d00 100644
--- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
@@ -433,14 +433,20 @@ void BSONEachRowRowInputFormat::readTuple(IColumn & column, const DataTypePtr &
 
     assertChar(BSON_DOCUMENT_END, *in);
 
-    if (read_nested_columns != data_type_tuple->getElements().size())
+    const auto elements_size = data_type_tuple->getElements().size();
+    if (read_nested_columns != elements_size)
         throw Exception(
                         ErrorCodes::INCORRECT_DATA,
                         "Cannot parse tuple column with type {} from BSON array/embedded document field, "
                         "the number of fields in tuple and BSON document doesn't match: {} != {}",
                         data_type->getName(),
-                        data_type_tuple->getElements().size(),
+                        elements_size,
                         read_nested_columns);
+
+    /// There are no nested columns to grow, so we must explicitly increment the column size.
+    /// Otherwise, `column.size()` will return 0 for empty tuples columns.
+    if (elements_size == 0)
+        tuple_column.addSize(1);
 }
 
 void BSONEachRowRowInputFormat::readMap(IColumn & column, const DataTypePtr & data_type, BSONType bson_type)
diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
index c2df0ffb41c3..1f6c09212693 100644
--- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
@@ -407,8 +407,13 @@ bool MsgPackVisitor::start_array(size_t size) // NOLINT
 
         ColumnTuple & column_tuple = assert_cast<ColumnTuple &>(info_stack.top().column);
         /// Push nested columns into stack in reverse order.
-        for (ssize_t i = nested_types.size() - 1; i >= 0; --i)
+        for (ssize_t i = static_cast<ssize_t>(nested_types.size()) - 1; i >= 0; --i)
             info_stack.push(Info{column_tuple.getColumn(i), nested_types[i], true, std::nullopt, nullptr});
+
+        /// There are no nested columns to grow, so we must explicitly increment the column size.
+        /// Otherwise, `column.size()` will return 0 for empty tuples columns.
+        if (nested_types.empty())
+            column_tuple.addSize(1);
     }
     else
     {
diff --git a/tests/queries/0_stateless/03277_empty_tuple_formats.reference b/tests/queries/0_stateless/03277_empty_tuple_formats.reference
index 2e92ec780b2a..1b32131f054f 100644
--- a/tests/queries/0_stateless/03277_empty_tuple_formats.reference
+++ b/tests/queries/0_stateless/03277_empty_tuple_formats.reference
@@ -32,7 +32,9 @@ RowBinary
 Values
 ()
 BSONEachRow
+()
 MsgPack
+()
 Native
 ()
 TSV
diff --git a/tests/queries/0_stateless/03716_bson_each_row_empty_tuple_column.reference b/tests/queries/0_stateless/03716_bson_each_row_empty_tuple_column.reference
new file mode 100644
index 000000000000..0265fd6791cc
--- /dev/null
+++ b/tests/queries/0_stateless/03716_bson_each_row_empty_tuple_column.reference
@@ -0,0 +1,5 @@
+1	()
+1	()
+1	()
+1	()
+1	()
diff --git a/tests/queries/0_stateless/03716_bson_each_row_empty_tuple_column.sql b/tests/queries/0_stateless/03716_bson_each_row_empty_tuple_column.sql
new file mode 100644
index 000000000000..e93b96b1fe5b
--- /dev/null
+++ b/tests/queries/0_stateless/03716_bson_each_row_empty_tuple_column.sql
@@ -0,0 +1,17 @@
+DROP TABLE IF EXISTS t0;
+DROP TABLE IF EXISTS random_filename;
+
+CREATE TABLE t0 (c0 Int32, c1 Tuple()) ENGINE = Memory;
+CREATE TABLE random_filename (name String) ENGINE = Memory;
+
+INSERT INTO random_filename SELECT concat('03716_test_bson_empty_tuple_', toString(generateUUIDv4()), '.bson');
+
+INSERT INTO FUNCTION file((SELECT name FROM random_filename LIMIT 1), 'BSONEachRow', 'c0 Int32, c1 Tuple()')
+SELECT 1, tuple() FROM numbers(5) SETTINGS engine_file_truncate_on_insert = 1;
+
+INSERT INTO t0 SELECT * FROM file((SELECT name FROM random_filename LIMIT 1), 'BSONEachRow', 'c0 Int32, c1 Tuple()');
+
+SELECT * FROM t0 ORDER BY c0;
+
+DROP TABLE t0;
+DROP TABLE random_filename;
diff --git a/tests/queries/0_stateless/03717_msgpack_empty_tuple_column.reference b/tests/queries/0_stateless/03717_msgpack_empty_tuple_column.reference
new file mode 100644
index 000000000000..0265fd6791cc
--- /dev/null
+++ b/tests/queries/0_stateless/03717_msgpack_empty_tuple_column.reference
@@ -0,0 +1,5 @@
+1	()
+1	()
+1	()
+1	()
+1	()
diff --git a/tests/queries/0_stateless/03717_msgpack_empty_tuple_column.sql b/tests/queries/0_stateless/03717_msgpack_empty_tuple_column.sql
new file mode 100644
index 000000000000..45f81812bc8b
--- /dev/null
+++ b/tests/queries/0_stateless/03717_msgpack_empty_tuple_column.sql
@@ -0,0 +1,20 @@
+-- Tags: no-fasttest
+-- no-fasttest: 'MsgPack` format is not supported
+
+DROP TABLE IF EXISTS t0;
+DROP TABLE IF EXISTS random_filename;
+
+CREATE TABLE t0 (c0 Int32, c1 Tuple()) ENGINE = Memory;
+CREATE TABLE random_filename (name String) ENGINE = Memory;
+
+INSERT INTO random_filename SELECT concat('03716_test_msgpack_empty_tuple_', toString(generateUUIDv4()), '.msgpack');
+
+INSERT INTO FUNCTION file((SELECT name FROM random_filename LIMIT 1), 'MsgPack', 'c0 Int32, c1 Tuple()')
+SELECT 1, tuple() FROM numbers(5) SETTINGS engine_file_truncate_on_insert = 1;
+
+INSERT INTO t0 SELECT * FROM file((SELECT name FROM random_filename LIMIT 1), 'MsgPack', 'c0 Int32, c1 Tuple()');
+
+SELECT * FROM t0 ORDER BY c0;
+
+DROP TABLE t0;
+DROP TABLE random_filename;
diff --git a/tests/queries/0_stateless/03719_multiple_column_empty_tuple_formats.reference b/tests/queries/0_stateless/03719_multiple_column_empty_tuple_formats.reference
new file mode 100644
index 000000000000..82da4c4a29b5
--- /dev/null
+++ b/tests/queries/0_stateless/03719_multiple_column_empty_tuple_formats.reference
@@ -0,0 +1,198 @@
+Native
+0	()
+1	()
+2	()
+3	()
+4	()
+TSV
+0	()
+1	()
+2	()
+3	()
+4	()
+CSV
+0	\N
+1	\N
+2	\N
+3	\N
+4	\N
+TSKV
+0	()
+1	()
+2	()
+3	()
+4	()
+JSON
+0	()
+1	()
+2	()
+3	()
+4	()
+JSONCompact
+0	()
+1	()
+2	()
+3	()
+4	()
+JSONEachRow
+0	[]
+1	[]
+2	[]
+3	[]
+4	[]
+JSONObjectEachRow
+0	[]
+1	[]
+2	[]
+3	[]
+4	[]
+JSONCompactEachRow
+0	[]
+1	[]
+2	[]
+3	[]
+4	[]
+JSONColumns
+0	[]
+1	[]
+2	[]
+3	[]
+4	[]
+JSONCompactColumns
+0	[]
+1	[]
+2	[]
+3	[]
+4	[]
+JSONColumnsWithMetadata
+0	()
+1	()
+2	()
+3	()
+4	()
+ORC
+0	()
+1	()
+2	()
+3	()
+4	()
+Arrow
+0	()
+1	()
+2	()
+3	()
+4	()
+Avro
+0	()
+1	()
+2	()
+3	()
+4	()
+RowBinary
+0	()
+1	()
+2	()
+3	()
+4	()
+Values
+0	()
+1	()
+2	()
+3	()
+4	()
+BSONEachRow
+0	()
+1	()
+2	()
+3	()
+4	()
+MsgPack
+0	()
+1	()
+2	()
+3	()
+4	()
+Native
+0	()
+1	()
+2	()
+3	()
+4	()
+TSV
+0	()
+1	()
+2	()
+3	()
+4	()
+CSV
+0	()
+1	()
+2	()
+3	()
+4	()
+TSKV
+0	()
+1	()
+2	()
+3	()
+4	()
+JSON
+0	()
+1	()
+2	()
+3	()
+4	()
+JSONCompact
+0	()
+1	()
+2	()
+3	()
+4	()
+JSONEachRow
+0	()
+1	()
+2	()
+3	()
+4	()
+JSONObjectEachRow
+0	()
+1	()
+2	()
+3	()
+4	()
+JSONCompactEachRow
+0	()
+1	()
+2	()
+3	()
+4	()
+JSONColumns
+0	()
+1	()
+2	()
+3	()
+4	()
+JSONCompactColumns
+0	()
+1	()
+2	()
+3	()
+4	()
+JSONColumnsWithMetadata
+0	()
+1	()
+2	()
+3	()
+4	()
+ORC
+0	()
+1	()
+2	()
+3	()
+4	()
+Arrow
+0	()
+1	()
+2	()
+3	()
+4	()
diff --git a/tests/queries/0_stateless/03719_multiple_column_empty_tuple_formats.sh b/tests/queries/0_stateless/03719_multiple_column_empty_tuple_formats.sh
new file mode 100755
index 000000000000..0159e244ec8f
--- /dev/null
+++ b/tests/queries/0_stateless/03719_multiple_column_empty_tuple_formats.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+
+# Test that no formats crash or return LOGICAL_ERROR on empty tuple
+# This test is on the same spirit as 03277_empty_tuple_formats.sh except it has additional column in addition to empty tuple.
+# For example this logical error: Invalid number of rows in Chunk  Int32(size = 5) Tuple(size = 0) column Tuple() at position 1: expected 5, got 0
+# was triggered when there was another column in addition to the empty tuple. This test covers that case.
+
+FILE=03277_$CLICKHOUSE_DATABASE
+
+# With schema inference.
+for format in Native TSV CSV TSKV JSON JSONCompact JSONEachRow JSONObjectEachRow JSONCompactEachRow JSONColumns JSONCompactColumns JSONColumnsWithMetadata ORC Arrow
+do
+  echo $format
+  $CLICKHOUSE_LOCAL -q "
+    insert into function file('$FILE', '$format') select number, () from numbers(5) settings engine_file_truncate_on_insert=1;
+    select * from file('$FILE', '$format');"
+done
+
+# Picky about column names.
+echo Avro
+$CLICKHOUSE_LOCAL -q "
+  insert into function file('$FILE', 'Avro') select number as x, () as y from numbers(5) settings engine_file_truncate_on_insert=1;
+  select * from file('$FILE', 'Avro');"
+
+# Without schema inference.
+for format in RowBinary Values BSONEachRow MsgPack Native TSV CSV TSKV JSON JSONCompact JSONEachRow JSONObjectEachRow JSONCompactEachRow JSONColumns JSONCompactColumns JSONColumnsWithMetadata ORC Arrow
+do
+  echo $format
+  $CLICKHOUSE_LOCAL -q "
+    insert into function file('$FILE', '$format', 'x UInt64, y Tuple()') select number as x, () as y from numbers(5) settings engine_file_truncate_on_insert=1;
+    select * from file('$FILE', '$format', 'x UInt64, y Tuple()');"
+done
+
+# Formats that don't support empty tuples/multiple columns.
+$CLICKHOUSE_LOCAL -q "
+  insert into function file('$FILE', 'Parquet') select number, () from numbers(5) settings engine_file_truncate_on_insert=1; -- {serverError BAD_ARGUMENTS}
+  insert into function file('$FILE', 'Npy') select number, () from numbers(5) settings engine_file_truncate_on_insert=1; -- {serverError TOO_MANY_COLUMNS}
+  insert into function file('$FILE', 'CapnProto', 'x UInt64, y Tuple()') select number as x, () as y from numbers(5) settings engine_file_truncate_on_insert=1; -- {serverError CAPN_PROTO_BAD_CAST}
+  insert into function file('$FILE', 'RawBLOB') select number, () from numbers(5) settings engine_file_truncate_on_insert=1; -- {serverError NOT_IMPLEMENTED}"
+
+rm $FILE

From 8ae50676312f72a19a7a3bc9eccfc9e9b5b9cee4 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 14 Nov 2025 21:11:48 +0000
Subject: [PATCH 082/112] Backport #89942 to 25.8: Fix escaping for some `SHOW`
 queries

---
 src/Databases/SQLite/DatabaseSQLite.cpp       |  3 +-
 .../InterpreterShowColumnsQuery.cpp           | 13 ++++---
 .../InterpreterShowFunctionsQuery.cpp         |  3 +-
 .../03714_queries_escaping_1.reference        |  0
 .../0_stateless/03714_queries_escaping_1.sql  |  1 +
 .../03714_queries_escaping_2.reference        |  0
 .../0_stateless/03714_queries_escaping_2.sql  |  1 +
 .../03714_queries_escaping_3.reference        |  2 ++
 .../0_stateless/03714_queries_escaping_3.sh   | 34 +++++++++++++++++++
 9 files changed, 50 insertions(+), 7 deletions(-)
 create mode 100644 tests/queries/0_stateless/03714_queries_escaping_1.reference
 create mode 100644 tests/queries/0_stateless/03714_queries_escaping_1.sql
 create mode 100644 tests/queries/0_stateless/03714_queries_escaping_2.reference
 create mode 100644 tests/queries/0_stateless/03714_queries_escaping_2.sql
 create mode 100644 tests/queries/0_stateless/03714_queries_escaping_3.reference
 create mode 100755 tests/queries/0_stateless/03714_queries_escaping_3.sh

diff --git a/src/Databases/SQLite/DatabaseSQLite.cpp b/src/Databases/SQLite/DatabaseSQLite.cpp
index eaa7ae680a40..913d86e59b86 100644
--- a/src/Databases/SQLite/DatabaseSQLite.cpp
+++ b/src/Databases/SQLite/DatabaseSQLite.cpp
@@ -16,6 +16,7 @@
 #include <Parsers/ASTFunction.h>
 #include <Storages/StorageSQLite.h>
 #include <Databases/SQLite/SQLiteUtils.h>
+#include <Common/quoteString.h>
 
 
 namespace DB
@@ -104,7 +105,7 @@ bool DatabaseSQLite::checkSQLiteTable(const String & table_name) const
     if (!sqlite_db)
         sqlite_db = openSQLiteDB(database_path, getContext(), /* throw_on_error */true);
 
-    const String query = fmt::format("SELECT name FROM sqlite_master WHERE type='table' AND name='{}';", table_name);
+    const String query = "SELECT name FROM sqlite_master WHERE type = 'table' AND name = " + quoteStringSQLite(table_name) + ";";
 
     auto callback_get_data = [](void * res, int, char **, char **) -> int
     {
diff --git a/src/Interpreters/InterpreterShowColumnsQuery.cpp b/src/Interpreters/InterpreterShowColumnsQuery.cpp
index 1ca074fecaf9..1df668ee8b51 100644
--- a/src/Interpreters/InterpreterShowColumnsQuery.cpp
+++ b/src/Interpreters/InterpreterShowColumnsQuery.cpp
@@ -76,8 +76,8 @@ WITH map(
         'String',      '{}',
         'FixedString', '{}') AS native_to_mysql_mapping,
         )",
-        remap_string_as_text ? "TEXT" : "BLOB",
-        remap_fixed_string_as_text ? "TEXT" : "BLOB");
+            remap_string_as_text ? "TEXT" : "BLOB",
+            remap_fixed_string_as_text ? "TEXT" : "BLOB");
 
         rewritten_query += R"(
         splitByRegexp('\(|\)', type_) AS split,
@@ -127,7 +127,8 @@ SELECT
     '' AS privileges )";
     }
 
-    rewritten_query += fmt::format(R"(
+    rewritten_query += fmt::format(
+        R"(
 -- need to rename columns of the base table to avoid "CYCLIC_ALIASES" errors
 FROM (SELECT name AS name_,
              database AS database_,
@@ -141,7 +142,9 @@ FROM (SELECT name AS name_,
       FROM system.columns)
 WHERE
     database_ = '{}'
-    AND table_ = '{}' )", database, table);
+    AND table_ = '{}' )",
+        database,
+        table);
 
     if (!query.like.empty())
     {
@@ -152,7 +155,7 @@ WHERE
             rewritten_query += "ILIKE ";
         else
             rewritten_query += "LIKE ";
-        rewritten_query += fmt::format("'{}'", query.like);
+        rewritten_query += quoteString(query.like);
     }
     else if (query.where_expression)
         rewritten_query += fmt::format(" AND ({})", query.where_expression->formatWithSecretsOneLine());
diff --git a/src/Interpreters/InterpreterShowFunctionsQuery.cpp b/src/Interpreters/InterpreterShowFunctionsQuery.cpp
index 96a730f65278..3004634144d0 100644
--- a/src/Interpreters/InterpreterShowFunctionsQuery.cpp
+++ b/src/Interpreters/InterpreterShowFunctionsQuery.cpp
@@ -5,6 +5,7 @@
 #include <Interpreters/InterpreterShowFunctionsQuery.h>
 #include <Interpreters/executeQuery.h>
 #include <Parsers/ASTShowFunctionsQuery.h>
+#include <Common/quoteString.h>
 
 namespace DB
 {
@@ -38,7 +39,7 @@ FROM {}.{})",
     {
         rewritten_query += " WHERE name ";
         rewritten_query += query.case_insensitive_like ? "ILIKE " : "LIKE ";
-        rewritten_query += fmt::format("'{}'", query.like);
+        rewritten_query += quoteString(query.like);
     }
 
     return rewritten_query;
diff --git a/tests/queries/0_stateless/03714_queries_escaping_1.reference b/tests/queries/0_stateless/03714_queries_escaping_1.reference
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/queries/0_stateless/03714_queries_escaping_1.sql b/tests/queries/0_stateless/03714_queries_escaping_1.sql
new file mode 100644
index 000000000000..3675d867e70c
--- /dev/null
+++ b/tests/queries/0_stateless/03714_queries_escaping_1.sql
@@ -0,0 +1 @@
+show columns from a.b like 'a\' or 1=1;--'
diff --git a/tests/queries/0_stateless/03714_queries_escaping_2.reference b/tests/queries/0_stateless/03714_queries_escaping_2.reference
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/queries/0_stateless/03714_queries_escaping_2.sql b/tests/queries/0_stateless/03714_queries_escaping_2.sql
new file mode 100644
index 000000000000..33a4890770b2
--- /dev/null
+++ b/tests/queries/0_stateless/03714_queries_escaping_2.sql
@@ -0,0 +1 @@
+show functions like 'a\' or 1=1;--'
diff --git a/tests/queries/0_stateless/03714_queries_escaping_3.reference b/tests/queries/0_stateless/03714_queries_escaping_3.reference
new file mode 100644
index 000000000000..b261da18d51a
--- /dev/null
+++ b/tests/queries/0_stateless/03714_queries_escaping_3.reference
@@ -0,0 +1,2 @@
+1
+0
diff --git a/tests/queries/0_stateless/03714_queries_escaping_3.sh b/tests/queries/0_stateless/03714_queries_escaping_3.sh
new file mode 100755
index 000000000000..dd2f2def1e73
--- /dev/null
+++ b/tests/queries/0_stateless/03714_queries_escaping_3.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, no-parallel
+# no-parallel: dealing with an SQLite database makes concurrent SHOW TABLES queries fail sporadically with the "database is locked" error.
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+export CURR_DATABASE="test_03714_sqllite_${CLICKHOUSE_DATABASE}"
+
+DB_PATH=${USER_FILES_PATH}/${CURR_DATABASE}_db1
+
+function cleanup()
+{
+    ${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS ${CURR_DATABASE}"
+}
+trap cleanup EXIT
+
+
+sqlite3 "${DB_PATH}" 'DROP TABLE IF EXISTS table1'
+
+sqlite3 "${DB_PATH}" 'CREATE TABLE table1 (col1 text, col2 smallint);'
+
+chmod ugo+w "${DB_PATH}"
+
+sqlite3 "${DB_PATH}" "INSERT INTO table1 VALUES ('line1', 1), ('line2', 2), ('line3', 3)"
+
+${CLICKHOUSE_CLIENT} --query="CREATE DATABASE ${CURR_DATABASE} ENGINE = SQLite('${DB_PATH}')"
+
+${CLICKHOUSE_CLIENT} --query="EXISTS TABLE ${CURR_DATABASE}.table1;"
+${CLICKHOUSE_CLIENT} --query="EXISTS TABLE ${CURR_DATABASE}.\"a\' or name='table1\";"
+
+
+${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS ${CURR_DATABASE}"

From 1db578405db453290a89b1d338ffaf71b9740471 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 17 Nov 2025 11:13:24 +0000
Subject: [PATCH 083/112] Backport #90030 to 25.8: fs cache: change default of
 keep_free_space_remove_batch

---
 src/Interpreters/Cache/FileCache_fwd.h                   | 2 +-
 tests/queries/0_stateless/02344_describe_cache.reference | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h
index 0460bc07a40e..eda5d5a71972 100644
--- a/src/Interpreters/Cache/FileCache_fwd.h
+++ b/src/Interpreters/Cache/FileCache_fwd.h
@@ -16,7 +16,7 @@ static constexpr int FILECACHE_DEFAULT_HITS_THRESHOLD = 0; /// Disabled.
 static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024;
 static constexpr double FILECACHE_DEFAULT_FREE_SPACE_SIZE_RATIO = 0; /// Disabled.
 static constexpr double FILECACHE_DEFAULT_FREE_SPACE_ELEMENTS_RATIO = 0; /// Disabled.
-static constexpr int FILECACHE_DEFAULT_FREE_SPACE_REMOVE_BATCH = 10;
+static constexpr int FILECACHE_DEFAULT_FREE_SPACE_REMOVE_BATCH = 100;
 static constexpr auto FILECACHE_DEFAULT_CONFIG_PATH = "filesystem_caches";
 
 static constexpr auto FILECACHE_DEFAULT_CACHE_POLICY = FileCachePolicy::SLRU;
diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference
index cee70f9490bd..38b164fae494 100644
--- a/tests/queries/0_stateless/02344_describe_cache.reference
+++ b/tests/queries/0_stateless/02344_describe_cache.reference
@@ -1,2 +1,2 @@
 1
-02344_describe_cache_test	/var/lib/clickhouse/filesystem_caches/02344_describe_cache_test	102400	10000000	33554432	4194304	0	slru	0.6	5	5000	4194304	16	0	0	0	10	0	0	0	268435456	0	0	0	1	0	0
+02344_describe_cache_test	/var/lib/clickhouse/filesystem_caches/02344_describe_cache_test	102400	10000000	33554432	4194304	0	slru	0.6	5	5000	4194304	16	0	0	0	100	0	0	0	268435456	0	0	0	1	0	0

From da22de91afcd85472d1cab1329bd98448af66919 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 17 Nov 2025 14:12:59 +0000
Subject: [PATCH 084/112] Backport #90079 to 25.8: Allow files starting with
 dots in user_files

---
 src/Common/filesystemHelpers.cpp              | 25 +++++++++++++++++--
 .../03624_proper_path_starts_with.sh          | 18 ++++++++++++-
 2 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp
index a2d92202fdb7..08ed3e10f1a0 100644
--- a/src/Common/filesystemHelpers.cpp
+++ b/src/Common/filesystemHelpers.cpp
@@ -219,7 +219,17 @@ String getFilesystemName([[maybe_unused]] const String & mount_point)
 bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path)
 {
     auto rel = fs::relative(path, prefix_path);
-    return (!rel.empty() && (rel.native() == "." || rel.native()[0] != '.'));
+    if (rel.empty() || rel == "..")
+        return false;
+
+    while (rel.has_relative_path())
+    {
+        rel = rel.parent_path();
+        if (rel == "..")
+            return false;
+    }
+
+    return true;
 }
 
 static bool fileOrSymlinkPathStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path)
@@ -230,7 +240,18 @@ static bool fileOrSymlinkPathStartsWith(const std::filesystem::path & path, cons
     /// not be a path of a symlink itself.
 
     auto rel = fs::absolute(path).lexically_normal().lexically_relative(fs::absolute(prefix_path).lexically_normal());
-    return (!rel.empty() && (rel.native() == "." || rel.native()[0] != '.'));
+
+    if (rel.empty() || rel == "..")
+        return false;
+
+    while (rel.has_relative_path())
+    {
+        rel = rel.parent_path();
+        if (rel == "..")
+            return false;
+    }
+
+    return true;
 }
 
 bool pathStartsWith(const String & path, const String & prefix_path)
diff --git a/tests/queries/0_stateless/03624_proper_path_starts_with.sh b/tests/queries/0_stateless/03624_proper_path_starts_with.sh
index 52b319ad5a79..9ee0040bf738 100755
--- a/tests/queries/0_stateless/03624_proper_path_starts_with.sh
+++ b/tests/queries/0_stateless/03624_proper_path_starts_with.sh
@@ -9,4 +9,20 @@ BAD_PATH_PARENT=$(echo "${USER_FILES_PATH}" | sed 's:/*$::')/../BAD
 BAD_PATH_EXTRA=$(echo "${USER_FILES_PATH}" | sed 's:/*$::')_BAD
 
 $CLICKHOUSE_CLIENT --query "CREATE TABLE t (a UInt64) ENGINE=File('CSV', '${BAD_PATH_PARENT}') -- { serverError DATABASE_ACCESS_DENIED }"
-$CLICKHOUSE_CLIENT --query "CREATE TABLE t (a UInt64) ENGINE=File('CSV', '${BAD_PATH_EXTRA}') -- { serverError DATABASE_ACCESS_DENIED }"
\ No newline at end of file
+$CLICKHOUSE_CLIENT --query "CREATE TABLE t (a UInt64) ENGINE=File('CSV', '${BAD_PATH_EXTRA}') -- { serverError DATABASE_ACCESS_DENIED }"
+
+# Check the behaviour with dots in the path
+# We allow paths like ./file or .file or ..file or a/../s.csv
+# But not paths that go to the parent directory like ../BAD or dir/../../BAD
+
+GOOD_PATH_WITH_DOT=$(echo "${USER_FILES_PATH}/.file_that_does_not_exist_but_is_in_a_valid_path.csv")
+GOOD_PATH_WITH_DOTS=$(echo "${USER_FILES_PATH}/..file_that_does_not_exist_but_is_in_a_valid_path.csv")
+GOOD_PATH_WITH_DOTS_BUT_INSIDE=$(echo "${USER_FILES_PATH}/..a/../s.csv")
+BAD_PATH_PARENT_AFTER_SOME_DOTS=$(echo "${USER_FILES_PATH}" | sed 's:/*$::')/.DIR/../../BAD
+
+$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS tdot, tdotdot, tdotdotin"
+
+$CLICKHOUSE_CLIENT --query "CREATE TABLE tdot (a UInt64) ENGINE=File('CSV', '${GOOD_PATH_WITH_DOT}')"
+$CLICKHOUSE_CLIENT --query "CREATE TABLE tdotdot (a UInt64) ENGINE=File('CSV', '${GOOD_PATH_WITH_DOTS}')"
+$CLICKHOUSE_CLIENT --query "CREATE TABLE tdotdotin (a UInt64) ENGINE=File('CSV', '${GOOD_PATH_WITH_DOTS_BUT_INSIDE}')"
+$CLICKHOUSE_CLIENT --query "CREATE TABLE t (a UInt64) ENGINE=File('CSV', '${BAD_PATH_PARENT_AFTER_SOME_DOTS}') -- { serverError DATABASE_ACCESS_DENIED }"
\ No newline at end of file

From 2c14b00af5f7e61b24e403cee2319095087e0882 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 17 Nov 2025 14:15:18 +0000
Subject: [PATCH 085/112] Backport #89822 to 25.8: Fix possible stack overflow
 in data types binary deserialization

---
 src/DataTypes/DataTypesBinaryEncoding.cpp                      | 3 +++
 ..._data_types_binary_deserialization_stack_overflow.reference | 0
 .../03713_data_types_binary_deserialization_stack_overflow.sql | 1 +
 3 files changed, 4 insertions(+)
 create mode 100644 tests/queries/0_stateless/03713_data_types_binary_deserialization_stack_overflow.reference
 create mode 100644 tests/queries/0_stateless/03713_data_types_binary_deserialization_stack_overflow.sql

diff --git a/src/DataTypes/DataTypesBinaryEncoding.cpp b/src/DataTypes/DataTypesBinaryEncoding.cpp
index 7195db707189..73c490e1706d 100644
--- a/src/DataTypes/DataTypesBinaryEncoding.cpp
+++ b/src/DataTypes/DataTypesBinaryEncoding.cpp
@@ -33,6 +33,7 @@
 #include <IO/ReadHelpers.h>
 #include <Common/FieldBinaryEncoding.h>
 #include <Common/assert_cast.h>
+#include <Common/checkStackSize.h>
 
 namespace DB
 {
@@ -582,6 +583,8 @@ String encodeDataType(const DataTypePtr & type)
 
 DataTypePtr decodeDataType(ReadBuffer & buf)
 {
+    checkStackSize();
+
     UInt8 type;
     readBinary(type, buf);
     switch (BinaryTypeIndex(type))
diff --git a/tests/queries/0_stateless/03713_data_types_binary_deserialization_stack_overflow.reference b/tests/queries/0_stateless/03713_data_types_binary_deserialization_stack_overflow.reference
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/queries/0_stateless/03713_data_types_binary_deserialization_stack_overflow.sql b/tests/queries/0_stateless/03713_data_types_binary_deserialization_stack_overflow.sql
new file mode 100644
index 000000000000..a013a8d0d279
--- /dev/null
+++ b/tests/queries/0_stateless/03713_data_types_binary_deserialization_stack_overflow.sql
@@ -0,0 +1 @@
+select * from format(RowBinaryWithNamesAndTypes, x'010178' || repeat(x'1e', 1000000)) settings input_format_binary_decode_types_in_binary_format=1; -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}

From 74a62d74a0883c007f951d0ce415f53508e066a4 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 17 Nov 2025 17:13:03 +0000
Subject: [PATCH 086/112] Backport #89875 to 25.8: Remove resulting part in
 merge task cancel

---
 src/Storages/MergeTree/MergeTask.cpp  | 5 ++++-
 src/Storages/MergeTree/MutateTask.cpp | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index 8017194ccb16..bab5d1786384 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -1461,7 +1461,7 @@ bool MergeTask::MergeProjectionsStage::finalizeProjectionsAndWholeMerge() const
         global_ctx->cached_marks.emplace(name, std::move(marks));
 
     global_ctx->new_data_part->getDataPartStorage().precommitTransaction();
-    global_ctx->promise.set_value(global_ctx->new_data_part);
+    global_ctx->promise.set_value(std::exchange(global_ctx->new_data_part, nullptr));
 
     return false;
 }
@@ -1622,6 +1622,9 @@ void MergeTask::cancel() noexcept
 
     if (global_ctx->to)
         global_ctx->to->cancel();
+
+    if (global_ctx->new_data_part)
+        global_ctx->new_data_part->removeIfNeeded();
 }
 
 
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index e7ec0a46c87a..164e4715acf9 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -2233,7 +2233,7 @@ bool MutateTask::execute()
             //
             // Fetching a byte-identical part (in case of checksum mismatches) will fail with
             // `Part ... should be deleted after previous attempt before fetch`.
-            promise.set_value(std::move(ctx->new_data_part));
+            promise.set_value(std::exchange(ctx->new_data_part, nullptr));
             return false;
         }
     }
@@ -2244,6 +2244,9 @@ void MutateTask::cancel() noexcept
 {
     if (task)
         task->cancel();
+
+    if (ctx->new_data_part)
+        ctx->new_data_part->removeIfNeeded();
 }
 
 void MutateTask::updateProfileEvents() const

From d01206b1b8143987f7bac55da6100ee16d611f03 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 17 Nov 2025 18:14:12 +0000
Subject: [PATCH 087/112] Backport #90062 to 25.8: Use `libarchive` 3.8.2

---
 contrib/libarchive                |  2 +-
 contrib/libarchive-cmake/config.h | 14 ++++++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/contrib/libarchive b/contrib/libarchive
index 9525f90ca4bd..7f53fce04e4e 160000
--- a/contrib/libarchive
+++ b/contrib/libarchive
@@ -1 +1 @@
-Subproject commit 9525f90ca4bd14c7b335e2f8c84a4607b0af6bdf
+Subproject commit 7f53fce04e4e672230f4eb80b219af17975e4f83
diff --git a/contrib/libarchive-cmake/config.h b/contrib/libarchive-cmake/config.h
index b82e1afa61cc..309251100cd9 100644
--- a/contrib/libarchive-cmake/config.h
+++ b/contrib/libarchive-cmake/config.h
@@ -334,16 +334,16 @@ typedef uint64_t uintmax_t;
 /* #undef ARCHIVE_XATTR_LINUX */
 
 /* Version number of bsdcpio */
-#define BSDCPIO_VERSION_STRING "3.7.4"
+#define BSDCPIO_VERSION_STRING "3.9.0"
 
 /* Version number of bsdtar */
-#define BSDTAR_VERSION_STRING "3.7.4"
+#define BSDTAR_VERSION_STRING "3.9.0"
 
 /* Version number of bsdcat */
-#define BSDCAT_VERSION_STRING "3.7.4"
+#define BSDCAT_VERSION_STRING "3.9.0"
 
 /* Version number of bsdunzip */
-#define BSDUNZIP_VERSION_STRING "3.7.4"
+#define BSDUNZIP_VERSION_STRING "3.9.0"
 
 /* Define to 1 if you have the `acl_create_entry' function. */
 /* #undef HAVE_ACL_CREATE_ENTRY */
@@ -405,6 +405,12 @@ typedef uint64_t uintmax_t;
 /* Define to 1 if you have the `chroot' function. */
 #define HAVE_CHROOT 1
 
+/* Define to 1 if you have the `closefrom' function. */
+/* #undef HAVE_CLOSEFROM */
+
+/* Define to 1 if you have the `close_range' function. */
+/* #undef HAVE_CLOSE_RANGE */
+
 /* Define to 1 if you have the <copyfile.h> header file. */
 /* #undef HAVE_COPYFILE_H */
 

From 367907b8165cdb4227c167a869d70653bc41f2b6 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 17 Nov 2025 18:15:47 +0000
Subject: [PATCH 088/112] Backport #90061 to 25.8: Use `curl` 8.17.0

---
 contrib/curl                      |  2 +-
 contrib/curl-cmake/CMakeLists.txt | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/contrib/curl b/contrib/curl
index cfbfb65047e8..400fffa90f30 160000
--- a/contrib/curl
+++ b/contrib/curl
@@ -1 +1 @@
-Subproject commit cfbfb65047e85e6b08af65fe9cdbcf68e9ad496a
+Subproject commit 400fffa90f30c7a2dc762fa33009d24851bd2016
diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt
index c0332cdb8e57..c616b7d0471e 100644
--- a/contrib/curl-cmake/CMakeLists.txt
+++ b/contrib/curl-cmake/CMakeLists.txt
@@ -19,6 +19,7 @@ set (SRCS
     "${LIBRARY_DIR}/lib/cf-h2-proxy.c"
     "${LIBRARY_DIR}/lib/cf-haproxy.c"
     "${LIBRARY_DIR}/lib/cf-https-connect.c"
+    "${LIBRARY_DIR}/lib/cf-ip-happy.c"
     "${LIBRARY_DIR}/lib/cf-socket.c"
     "${LIBRARY_DIR}/lib/cfilters.c"
     "${LIBRARY_DIR}/lib/conncache.c"
@@ -27,9 +28,9 @@ set (SRCS
     "${LIBRARY_DIR}/lib/cookie.c"
     "${LIBRARY_DIR}/lib/cshutdn.c"
     "${LIBRARY_DIR}/lib/curl_addrinfo.c"
-    "${LIBRARY_DIR}/lib/curl_des.c"
     "${LIBRARY_DIR}/lib/curl_endian.c"
     "${LIBRARY_DIR}/lib/curl_fnmatch.c"
+    "${LIBRARY_DIR}/lib/curl_fopen.c"
     "${LIBRARY_DIR}/lib/curl_get_line.c"
     "${LIBRARY_DIR}/lib/curl_gethostname.c"
     "${LIBRARY_DIR}/lib/curl_gssapi.c"
@@ -54,7 +55,6 @@ set (SRCS
     "${LIBRARY_DIR}/lib/fake_addrinfo.c"
     "${LIBRARY_DIR}/lib/file.c"
     "${LIBRARY_DIR}/lib/fileinfo.c"
-    "${LIBRARY_DIR}/lib/fopen.c"
     "${LIBRARY_DIR}/lib/formdata.c"
     "${LIBRARY_DIR}/lib/ftp.c"
     "${LIBRARY_DIR}/lib/ftplistparser.c"
@@ -81,7 +81,6 @@ set (SRCS
     "${LIBRARY_DIR}/lib/idn.c"
     "${LIBRARY_DIR}/lib/if2ip.c"
     "${LIBRARY_DIR}/lib/imap.c"
-    "${LIBRARY_DIR}/lib/krb5.c"
     "${LIBRARY_DIR}/lib/ldap.c"
     "${LIBRARY_DIR}/lib/llist.c"
     "${LIBRARY_DIR}/lib/macos.c"
@@ -93,6 +92,7 @@ set (SRCS
     "${LIBRARY_DIR}/lib/mqtt.c"
     "${LIBRARY_DIR}/lib/multi.c"
     "${LIBRARY_DIR}/lib/multi_ev.c"
+    "${LIBRARY_DIR}/lib/multi_ntfy.c"
     "${LIBRARY_DIR}/lib/netrc.c"
     "${LIBRARY_DIR}/lib/noproxy.c"
     "${LIBRARY_DIR}/lib/openldap.c"
@@ -170,16 +170,18 @@ set (SRCS
     "${LIBRARY_DIR}/lib/vtls/x509asn1.c"
     "${LIBRARY_DIR}/lib/curlx/base64.c"
     "${LIBRARY_DIR}/lib/curlx/dynbuf.c"
+    "${LIBRARY_DIR}/lib/curlx/fopen.c"
     "${LIBRARY_DIR}/lib/curlx/inet_ntop.c"
     "${LIBRARY_DIR}/lib/curlx/inet_pton.c"
     "${LIBRARY_DIR}/lib/curlx/multibyte.c"
     "${LIBRARY_DIR}/lib/curlx/nonblock.c"
+    "${LIBRARY_DIR}/lib/curlx/strerr.c"
     "${LIBRARY_DIR}/lib/curlx/strparse.c"
     "${LIBRARY_DIR}/lib/curlx/timediff.c"
     "${LIBRARY_DIR}/lib/curlx/timeval.c"
     "${LIBRARY_DIR}/lib/curlx/version_win32.c"
-    "${LIBRARY_DIR}/lib/curlx/warnless.c"
     "${LIBRARY_DIR}/lib/curlx/wait.c"
+    "${LIBRARY_DIR}/lib/curlx/warnless.c"
     "${LIBRARY_DIR}/lib/curlx/winapi.c"
 )
 

From 2ce607a51bbc880cf8ae0377c5fc0d8e8a5068ed Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 18 Nov 2025 15:15:48 +0000
Subject: [PATCH 089/112] Backport #89004 to 25.8: Fix performance of skip
 index analysis

---
 src/Processors/QueryPlan/ReadFromMergeTree.cpp           | 5 +++--
 src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp   | 2 +-
 src/Storages/MergeTree/MergeTreeIndexMinMax.cpp          | 6 +++---
 src/Storages/MergeTree/MergeTreeIndexMinMax.h            | 2 +-
 src/Storages/MergeTree/MergeTreeIndexReader.cpp          | 2 +-
 src/Storages/MergeTree/MergeTreeIndices.cpp              | 5 +++--
 src/Storages/MergeTree/MergeTreeIndices.h                | 4 ++--
 src/Storages/MergeTree/PatchParts/RangesInPatchParts.cpp | 2 +-
 8 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 61e9e34fcfea..09b6f7bf7762 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1798,7 +1798,8 @@ static void buildIndexes(
     {
         std::vector<size_t> index_sizes;
         index_sizes.reserve(skip_indexes.useful_indices.size());
-        for (const auto& part : parts)
+
+        for (const auto & part : parts)
         {
             auto &index_order = skip_indexes.per_part_index_orders.emplace_back();
             index_order.resize(skip_indexes.useful_indices.size());
@@ -1808,7 +1809,7 @@ static void buildIndexes(
 
             for (const auto &idx : skip_indexes.useful_indices)
             {
-                const auto *extension = idx.index->getDeserializedFormat(part.data_part->getDataPartStorage(), idx.index->getFileName()).extension;
+                const auto *extension = idx.index->getDeserializedFormat(part.data_part->checksums, idx.index->getFileName()).extension;
                 auto sz = part.data_part->getFileSizeOrZero(idx.index->getFileName() + extension);
                 index_sizes.emplace_back(sz);
             }
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 2a8d178a5425..9f6f19239945 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -1721,7 +1721,7 @@ std::pair<MarkRanges, RangesInDataPartReadHints> MergeTreeDataSelectExecutor::fi
     VectorSimilarityIndexCache * vector_similarity_index_cache,
     LoggerPtr log)
 {
-    if (!index_helper->getDeserializedFormat(part->getDataPartStorage(), index_helper->getFileName()))
+    if (!index_helper->getDeserializedFormat(part->checksums, index_helper->getFileName()))
     {
         LOG_DEBUG(log, "File for index {} does not exist ({}.*). Skipping it.", backQuote(index_helper->index.name),
             (fs::path(part->getDataPartStorage().getFullPath()) / index_helper->getFileName()).string());
diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
index dcbb11304246..05c3683fedbe 100644
--- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
@@ -216,11 +216,11 @@ MergeTreeIndexConditionPtr MergeTreeIndexMinMax::createIndexCondition(
     return std::make_shared<MergeTreeIndexConditionMinMax>(index, filter_dag, context);
 }
 
-MergeTreeIndexFormat MergeTreeIndexMinMax::getDeserializedFormat(const IDataPartStorage & data_part_storage, const std::string & relative_path_prefix) const
+MergeTreeIndexFormat MergeTreeIndexMinMax::getDeserializedFormat(const MergeTreeDataPartChecksums & checksums, const std::string & relative_path_prefix) const
 {
-    if (data_part_storage.existsFile(relative_path_prefix + ".idx2"))
+    if (checksums.files.contains(relative_path_prefix + ".idx2"))
         return {2, ".idx2"};
-    if (data_part_storage.existsFile(relative_path_prefix + ".idx"))
+    if (checksums.files.contains(relative_path_prefix + ".idx"))
         return {1, ".idx"};
     return {0 /* unknown */, ""};
 }
diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.h b/src/Storages/MergeTree/MergeTreeIndexMinMax.h
index 049219fb1c0e..c5aa4166b8d4 100644
--- a/src/Storages/MergeTree/MergeTreeIndexMinMax.h
+++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.h
@@ -82,7 +82,7 @@ class MergeTreeIndexMinMax : public IMergeTreeIndex
         const ActionsDAG::Node * predicate, ContextPtr context) const override;
 
     const char* getSerializedFileExtension() const override { return ".idx2"; }
-    MergeTreeIndexFormat getDeserializedFormat(const IDataPartStorage & data_part_storage, const std::string & path_prefix) const override; /// NOLINT
+    MergeTreeIndexFormat getDeserializedFormat(const MergeTreeDataPartChecksums & checksums, const std::string & path_prefix) const override; /// NOLINT
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/src/Storages/MergeTree/MergeTreeIndexReader.cpp
index bb3d45fe8bff..355a080ac92c 100644
--- a/src/Storages/MergeTree/MergeTreeIndexReader.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexReader.cpp
@@ -72,7 +72,7 @@ void MergeTreeIndexReader::initStreamIfNeeded()
     if (stream)
         return;
 
-    auto index_format = index->getDeserializedFormat(part->getDataPartStorage(), index->getFileName());
+    auto index_format = index->getDeserializedFormat(part->checksums, index->getFileName());
 
     stream = makeIndexReader(
         index_format.extension,
diff --git a/src/Storages/MergeTree/MergeTreeIndices.cpp b/src/Storages/MergeTree/MergeTreeIndices.cpp
index 5145da4bb778..39d849188b01 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndices.cpp
@@ -1,3 +1,4 @@
+#include <Storages/MergeTree/MergeTreeDataPartChecksum.h>
 #include <Storages/MergeTree/MergeTreeIndices.h>
 
 #include <Columns/IColumn.h>
@@ -23,9 +24,9 @@ Names IMergeTreeIndex::getColumnsRequiredForIndexCalc() const
 }
 
 MergeTreeIndexFormat
-IMergeTreeIndex::getDeserializedFormat(const IDataPartStorage & data_part_storage, const std::string & relative_path_prefix) const
+IMergeTreeIndex::getDeserializedFormat(const MergeTreeDataPartChecksums & checksums, const std::string & relative_path_prefix) const
 {
-    if (data_part_storage.existsFile(relative_path_prefix + ".idx"))
+    if (checksums.files.contains(relative_path_prefix + ".idx"))
         return {1, ".idx"};
     return {0 /*unknown*/, ""};
 }
diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h
index 73434c34cc74..9850b0981ce6 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.h
+++ b/src/Storages/MergeTree/MergeTreeIndices.h
@@ -70,9 +70,9 @@ evalOrRpnIndexStates(RPNEvaluationIndexUsefulnessState lhs, RPNEvaluationIndexUs
 
 class ActionsDAG;
 class Block;
-class IDataPartStorage;
 struct MergeTreeWriterSettings;
 struct SelectQueryInfo;
+struct MergeTreeDataPartChecksums;
 
 class GinIndexStore;
 using GinIndexStorePtr = std::shared_ptr<GinIndexStore>;
@@ -318,7 +318,7 @@ struct IMergeTreeIndex
     ///
     /// Return pair<extension, version>.
     virtual MergeTreeIndexFormat
-    getDeserializedFormat(const IDataPartStorage & data_part_storage, const std::string & relative_path_prefix) const;
+    getDeserializedFormat(const MergeTreeDataPartChecksums & checksums, const std::string & relative_path_prefix) const;
 
     virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0;
 
diff --git a/src/Storages/MergeTree/PatchParts/RangesInPatchParts.cpp b/src/Storages/MergeTree/PatchParts/RangesInPatchParts.cpp
index d2a14279567c..035a98b35545 100644
--- a/src/Storages/MergeTree/PatchParts/RangesInPatchParts.cpp
+++ b/src/Storages/MergeTree/PatchParts/RangesInPatchParts.cpp
@@ -267,7 +267,7 @@ MaybeMinMaxStats getPatchMinMaxStats(const DataPartPtr & patch_part, const MarkR
 
     auto index_ptr = MergeTreeIndexFactory::instance().get(*it);
     /// Check that index exists in data part. It may be absent for parts created in earlier versions.
-    if (!index_ptr->getDeserializedFormat(patch_part->getDataPartStorage(), index_ptr->getFileName()))
+    if (!index_ptr->getDeserializedFormat(patch_part->checksums, index_ptr->getFileName()))
         return {};
 
     size_t total_marks_without_final = patch_part->index_granularity->getMarksCountWithoutFinal();

From 352c449e246ed46dd4200ba77aff5db6dd25bd96 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 18 Nov 2025 18:14:50 +0000
Subject: [PATCH 090/112] Backport #90251 to 25.8: Fix incorrect skip indexes
 breaking Replicated Database

---
 src/Storages/extractKeyExpressionList.cpp         |  2 +-
 .../integration/test_replicated_database/test.py  | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/Storages/extractKeyExpressionList.cpp b/src/Storages/extractKeyExpressionList.cpp
index 5f9d73bf533c..1a7c37c58b93 100644
--- a/src/Storages/extractKeyExpressionList.cpp
+++ b/src/Storages/extractKeyExpressionList.cpp
@@ -21,7 +21,7 @@ namespace DB
 
         /// Primary key consists of one column.
         auto res = std::make_shared<ASTExpressionList>();
-        res->children.push_back(node);
+        res->children.push_back(node->clone());
         return res;
     }
 }
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 3cfdda8dca53..1bfc15698353 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -1714,6 +1714,21 @@ def test_lag_after_recovery(started_cluster):
         == "0\n"
     )
 
+
+def test_correct_skip_indexes(started_cluster):
+    competing_node.query("DROP DATABASE IF EXISTS correct_skip_indexes")
+    dummy_node.query("DROP DATABASE IF EXISTS correct_skip_indexes")
+
+    competing_node.query(
+        "CREATE DATABASE correct_skip_indexes ENGINE = Replicated('/clickhouse/databases/correct_skip_indexes', 'shard1', 'replica1');"
+        "CREATE TABLE correct_skip_indexes.test (`id` UInt64, `a` String, `b` String ALIAS a, INDEX bf_a assumeNotNull(b) TYPE bloom_filter(0.01) GRANULARITY 1) ENGINE = ReplicatedMergeTree ORDER BY (id);"
+    )
+    dummy_node.query(
+        "CREATE DATABASE correct_skip_indexes ENGINE = Replicated('/clickhouse/databases/correct_skip_indexes', 'shard1', 'replica2');"
+        "SYSTEM SYNC DATABASE REPLICA correct_skip_indexes;"
+    )
+
+
 def test_implicit_index(started_cluster):
     competing_node.query("DROP DATABASE IF EXISTS implicit_index")
     dummy_node.query("DROP DATABASE IF EXISTS implicit_index")

From f3aed2500502a9cad8ffed7f626d8ee9bc3d1749 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 19 Nov 2025 01:41:14 +0000
Subject: [PATCH 091/112] Backport #90060 to 25.8: Use `icu` `release-78.1`

---
 contrib/icu                      | 2 +-
 contrib/icu-cmake/CMakeLists.txt | 7 ++++---
 contrib/icudata                  | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/contrib/icu b/contrib/icu
index 4216173eeeb3..b29faa6d4e46 160000
--- a/contrib/icu
+++ b/contrib/icu
@@ -1 +1 @@
-Subproject commit 4216173eeeb39c1d4caaa54a68860e800412d273
+Subproject commit b29faa6d4e46f10d230b93a3c33885e7ec71bd41
diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt
index b2c5923bfe81..46deb3d1bd67 100644
--- a/contrib/icu-cmake/CMakeLists.txt
+++ b/contrib/icu-cmake/CMakeLists.txt
@@ -40,6 +40,7 @@ set(ICUUC_SOURCES
 "${ICU_SOURCE_DIR}/common/errorcode.cpp"
 "${ICU_SOURCE_DIR}/common/filteredbrk.cpp"
 "${ICU_SOURCE_DIR}/common/filterednormalizer2.cpp"
+"${ICU_SOURCE_DIR}/common/fixedstring.cpp"
 "${ICU_SOURCE_DIR}/common/icudataver.cpp"
 "${ICU_SOURCE_DIR}/common/icuplug.cpp"
 "${ICU_SOURCE_DIR}/common/loadednormalizer2impl.cpp"
@@ -478,11 +479,11 @@ enable_language(ASM)
 
 if (OS_DARWIN)
     # Fine for both x86 and ARM
-    set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/darwin_x86_64/icudt75l_dat.S")
+    set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/darwin/icudt78l_dat.S")
 elseif (ARCH_S390X)
-    set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75b_dat.S")
+    set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt78b_dat.S")
 else ()
-    set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75l_dat.S")
+    set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt78l_dat.S")
 endif ()
 # ^^ you might be confused how for different little endian platforms (x86, ARM) the same assembly files can be used.
 # These files are indeed assembly but they only contain data ('.long' directive), which makes them portable accross CPUs.
diff --git a/contrib/icudata b/contrib/icudata
index cfc05b4c3140..e3ae5bcb2b24 160000
--- a/contrib/icudata
+++ b/contrib/icudata
@@ -1 +1 @@
-Subproject commit cfc05b4c3140ff2be84291b80de8c62b1e42d0da
+Subproject commit e3ae5bcb2b24f17cd9336c4f1b25f36ed636d839

From 29b9c80a73a7dc83f10267199f96db762243656a Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 19 Nov 2025 09:13:00 +0000
Subject: [PATCH 092/112] Backport #90201 to 25.8: Use low RemoveRecursive
 request node limit in S3Queue

---
 .../ObjectStorageQueue/ObjectStorageQueueMetadata.cpp | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp
index 2755d882e4de..4a6072c5e7a2 100644
--- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp
+++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp
@@ -725,6 +725,7 @@ void ObjectStorageQueueMetadata::unregisterNonActive(const StorageID & storage_i
 
     Coordination::Error code = Coordination::Error::ZOK;
 
+    bool allow_remove_recursive = true;
     for (size_t i = 0; i < 1000; ++i)
     {
         Coordination::Requests requests;
@@ -737,7 +738,7 @@ void ObjectStorageQueueMetadata::unregisterNonActive(const StorageID & storage_i
         try
         {
             zk_client = getZooKeeper();
-            supports_remove_recursive = zk_client->isFeatureEnabled(DB::KeeperFeatureFlag::REMOVE_RECURSIVE);
+            supports_remove_recursive = allow_remove_recursive && zk_client->isFeatureEnabled(DB::KeeperFeatureFlag::REMOVE_RECURSIVE);
 
             Coordination::Stat stat;
             std::string registry_str;
@@ -782,7 +783,7 @@ void ObjectStorageQueueMetadata::unregisterNonActive(const StorageID & storage_i
                 if (supports_remove_recursive)
                 {
                     requests.push_back(zkutil::makeCheckRequest(registry_path, stat.version));
-                    requests.push_back(zkutil::makeRemoveRecursiveRequest(*zk_client, zookeeper_path, std::numeric_limits<uint32_t>::max()));
+                    requests.push_back(zkutil::makeRemoveRecursiveRequest(*zk_client, zookeeper_path, /*remove_nodes_limit=*/10000));
                 }
                 else
                 {
@@ -851,6 +852,12 @@ void ObjectStorageQueueMetadata::unregisterNonActive(const StorageID & storage_i
             return;
         }
 
+        if (!responses.empty() && supports_remove_recursive && code == Coordination::Error::ZNOTEMPTY) /// potentiall we reached RemoveRecursive node limit, let's try without it
+        {
+            allow_remove_recursive = false;
+            continue;
+        }
+
         if (Coordination::isHardwareError(code)
             || code == Coordination::Error::ZBADVERSION)
             continue;

From 4fc5cdf5e54ee1321b72b69fbe862caeaa132895 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 19 Nov 2025 09:14:18 +0000
Subject: [PATCH 093/112] Backport #89970 to 25.8: Fix URL validation in
 MongoDB engine

---
 src/Storages/StorageMongoDB.cpp               | 25 ++++++++++++++-----
 .../integration/test_storage_mongodb/test.py  | 20 +++++++++++++++
 2 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp
index bc04e59d0135..42202a230fe0 100644
--- a/src/Storages/StorageMongoDB.cpp
+++ b/src/Storages/StorageMongoDB.cpp
@@ -59,6 +59,8 @@ namespace Setting
     extern const SettingsBool mongodb_throw_on_unsupported_query;
 }
 
+static constexpr const char * MONGODB_RESERVED_CHARS = "!?#/'\",;:$&()[]*+=@";
+
 void MongoDBConfiguration::checkHosts(const ContextPtr & context) const
 {
     // Because domain records will be resolved inside the driver, we can't check resolved IPs for our restrictions.
@@ -108,6 +110,13 @@ Pipe StorageMongoDB::read(
         std::move(options), std::make_shared<const Block>(std::move(sample_block)), max_block_size));
 }
 
+static String encodeString(const String & str)
+{
+    String encoded;
+    Poco::URI::encode(str, MONGODB_RESERVED_CHARS, encoded);
+    return encoded;
+}
+
 static MongoDBConfiguration getConfigurationImpl(const StorageID * table_id, ASTs engine_args, ContextPtr context, bool allow_excessive_path_in_host)
 {
     MongoDBConfiguration configuration;
@@ -124,10 +133,12 @@ static MongoDBConfiguration getConfigurationImpl(const StorageID * table_id, AST
                 "host", "port", "user", "password", "database", "collection"}, {"options", "oid_columns"});
             String user = named_collection->get<String>("user");
             String auth_string;
-            String escaped_password;
-            Poco::URI::encode(named_collection->get<String>("password"), "!?#/'\",;:$&()[]*+=@", escaped_password);
             if (!user.empty())
-                auth_string = fmt::format("{}:{}@", user, escaped_password);
+            {
+                String escaped_user = encodeString(user);
+                String escaped_password = encodeString(named_collection->get<String>("password"));
+                auth_string = fmt::format("{}:{}@", escaped_user, escaped_password);
+            }
             configuration.uri = std::make_unique<mongocxx::uri>(fmt::format("mongodb://{}{}:{}/{}?{}",
                                                           auth_string,
                                                           named_collection->get<String>("host"),
@@ -154,10 +165,12 @@ static MongoDBConfiguration getConfigurationImpl(const StorageID * table_id, AST
 
             String user = checkAndGetLiteralArgument<String>(engine_args[3], "user");
             String auth_string;
-            String escaped_password;
-            Poco::URI::encode(checkAndGetLiteralArgument<String>(engine_args[4], "password"), "!?#/'\",;:$&()[]*+=@", escaped_password);
             if (!user.empty())
-                auth_string = fmt::format("{}:{}@", user, escaped_password);
+            {
+                String escaped_user = encodeString(user);
+                String escaped_password = encodeString(checkAndGetLiteralArgument<String>(engine_args[4], "password"));
+                auth_string = fmt::format("{}:{}@", escaped_user, escaped_password);
+            }
 
             auto host_port = checkAndGetLiteralArgument<String>(engine_args[0], "host:port");
             auto database_name = checkAndGetLiteralArgument<String>(engine_args[1], "database");
diff --git a/tests/integration/test_storage_mongodb/test.py b/tests/integration/test_storage_mongodb/test.py
index 047f46c6cb3d..9fc04890310a 100644
--- a/tests/integration/test_storage_mongodb/test.py
+++ b/tests/integration/test_storage_mongodb/test.py
@@ -1471,3 +1471,23 @@ def test_numbers_parsing(started_cluster):
 
     node.query("DROP TABLE numbers_parsing_table")
     numbers_parsing_table.drop()
+
+
+def test_url_validation(started_cluster):
+    mongo_connection = get_mongo_connection(started_cluster)
+    db = mongo_connection["test"]
+    db.command("dropAllUsersFromDatabase")
+    db.command("createUser", "root@aa.com", pwd=mongo_pass, roles=["readWrite"])
+    drop_mongo_collection_if_exists(db, "url_validation_table")
+    url_validation_table = db["url_validation_table"]
+    data = []
+    for i in range(0, 100):
+        data.append({"key": i, "data": hex(i * i)})
+    url_validation_table.insert_many(data)
+
+    node = started_cluster.instances["node"]
+    node.query(
+        f"CREATE OR REPLACE TABLE url_validation_table(key UInt64, data String) ENGINE = MongoDB('mongo1', 'test', 'url_validation_table', 'root@aa.com', '{mongo_pass}')"
+    )
+
+    assert node.query("SELECT COUNT() FROM url_validation_table") == "100\n"
\ No newline at end of file

From 2fbd68bb19da8c03df89aea950df8a7d2c35cce4 Mon Sep 17 00:00:00 2001
From: Mikhail Artemenko <mikhail.artemenko@clickhouse.com>
Date: Wed, 19 Nov 2025 10:16:25 +0000
Subject: [PATCH 094/112] call tx undo in case of commit error

---
 .../DiskObjectStorageTransaction.cpp              | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
index c3ced398954a..2064c72bd36d 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
@@ -1072,21 +1072,8 @@ void DiskObjectStorageTransaction::commit(const TransactionCommitOptionsVariant
                 getLogger("DiskObjectStorageTransaction"),
                 fmt::format("An error occurred while executing transaction's operation #{} ({})", i, operations_to_execute[i]->getInfoForLog()));
 
-            for (int64_t j = i; j >= 0; --j)
-            {
-                try
-                {
-                    operations_to_execute[j]->undo();
-                }
-                catch (...)
-                {
-                    tryLogCurrentException(
-                        getLogger("DiskObjectStorageTransaction"),
-                        fmt::format("An error occurred while undoing transaction's operation #{}", i));
+            undo();
 
-                    throw;
-                }
-            }
             throw;
         }
     }

From 99d27f2b28031dadea70080785330731c861040c Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 19 Nov 2025 12:17:47 +0000
Subject: [PATCH 095/112] Backport #87989 to 25.8: Enable saving marks in cache
 and avoid direct io for MergeTreeLazy reader

---
 src/Storages/MergeTree/MergeTreeLazilyReader.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeLazilyReader.cpp b/src/Storages/MergeTree/MergeTreeLazilyReader.cpp
index d4cede36c409..20bcc397f888 100644
--- a/src/Storages/MergeTree/MergeTreeLazilyReader.cpp
+++ b/src/Storages/MergeTree/MergeTreeLazilyReader.cpp
@@ -263,11 +263,10 @@ void MergeTreeLazilyReader::transformLazyColumns(
     const size_t rows_size = row_num_column->size();
 
     ReadSettings read_settings;
-    read_settings.direct_io_threshold = 1;
     MergeTreeReaderSettings reader_settings =
     {
         .read_settings = read_settings,
-        .save_marks_in_cache = false,
+        .save_marks_in_cache = true,
     };
 
     MutableColumns lazily_read_columns;

From 5c33a9369196d01bc8c9a22291f2c8194bf3338d Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 19 Nov 2025 14:17:20 +0000
Subject: [PATCH 096/112] Backport #86574 to 25.8: Unrevert "Fix key condition
 matches continuous range"

---
 src/Storages/MergeTree/KeyCondition.cpp       | 85 +++++++++++--------
 ...2_key_condition_continuous_range.reference |  2 +
 .../03532_key_condition_continuous_range.sh   | 32 +++++++
 ...3604_key_condition_set_tuple_bug.reference |  2 +
 .../03604_key_condition_set_tuple_bug.sql     |  4 +
 5 files changed, 89 insertions(+), 36 deletions(-)
 create mode 100644 tests/queries/0_stateless/03532_key_condition_continuous_range.reference
 create mode 100755 tests/queries/0_stateless/03532_key_condition_continuous_range.sh
 create mode 100644 tests/queries/0_stateless/03604_key_condition_set_tuple_bug.reference
 create mode 100644 tests/queries/0_stateless/03604_key_condition_set_tuple_bug.sql

diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index f85d173a94c0..d3d9f2b8dbcb 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -2945,41 +2945,31 @@ std::optional<Range> KeyCondition::applyMonotonicFunctionsChainToRange(
 bool KeyCondition::matchesExactContinuousRange() const
 {
     const Field field{};
-    auto is_always_monotonic_chain = [&field](const std::vector<FunctionBasePtr> & chain)
+    auto check_monotonicity_of_chain = [&field](const std::vector<FunctionBasePtr> & chain) -> std::pair<bool, bool>
     {
+        bool all_always_monotonic = true;
+        bool all_strict = true;
+
         for (const auto & func : chain)
         {
             if (!func || !func->hasInformationAboutMonotonicity())
-                return false;
+                return {false, false};
 
             const auto & types = func->getArgumentTypes();
             if (types.empty() || !types.front())
-                return false;
+                return {false, false};
 
             const auto monotonicity = func->getMonotonicityForRange(*types.front(), field, field);
-            if (!monotonicity.is_always_monotonic)
-                return false;
+            all_always_monotonic &= monotonicity.is_always_monotonic;
+            all_strict &= monotonicity.is_strict;
+
+            if (!all_always_monotonic && !all_strict)
+                break;
         }
 
-        return true;
+        return {all_always_monotonic, all_strict};
     };
 
-    for (const auto & elem : rpn)
-    {
-        if (!elem.monotonic_functions_chain.empty() && !is_always_monotonic_chain(elem.monotonic_functions_chain))
-            return false;
-
-        if (elem.set_index)
-        {
-            if (elem.function != RPNElement::Function::FUNCTION_IN_SET || elem.set_index->size() != 1)
-                return false;
-
-            for (const auto & mapping : elem.set_index->getIndexesMapping())
-                if (!mapping.functions.empty() && !is_always_monotonic_chain(mapping.functions))
-                    return false;
-        }
-    }
-
     enum Constraint
     {
         POINT,
@@ -2991,37 +2981,60 @@ bool KeyCondition::matchesExactContinuousRange() const
 
     for (const auto & element : rpn)
     {
-        if (element.function == RPNElement::Function::FUNCTION_AND)
+        if (element.function == RPNElement::Function::FUNCTION_AND || element.function == RPNElement::Function::FUNCTION_UNKNOWN
+            || element.function == RPNElement::Function::ALWAYS_TRUE)
         {
             continue;
         }
 
         if (element.function == RPNElement::Function::FUNCTION_IN_SET && element.set_index && element.set_index->size() == 1)
         {
-            column_constraints[element.key_column] = Constraint::POINT;
+            /// TODO: Fix MergeTreeSetIndex::checkInRange handling per-column inclusive/exclusive ranges, then remove this check.
+            if (element.set_index->getIndexesMapping().size() != 1)
+                return false;
+
+            for (const auto & mapping : element.set_index->getIndexesMapping())
+            {
+                auto [is_chain_always_monotonic, is_chain_strict] = check_monotonicity_of_chain(mapping.functions);
+                if (!is_chain_always_monotonic)
+                    return false;
+
+                chassert(mapping.key_index < key_columns.size());
+                /// For Constraint::POINT, we need to check if the function chain is strict.
+                /// For example, `toDate(event_time) in ('2025-06-03')` means a range of `event_time`: ['2025-06-03 00:00:00','2025-06-04 00:00:00')
+                /// So, POINT needs to be converted to a RANGE
+                if (is_chain_strict)
+                    column_constraints[mapping.key_index] = Constraint::POINT;
+                else
+                {
+                    /// If this key is contained by multiple elements and has been set to POINT, do not convert it to RANGE.
+                    if (column_constraints[mapping.key_index] != Constraint::POINT)
+                        column_constraints[mapping.key_index] = Constraint::RANGE;
+                }
+            }
+
             continue;
         }
 
         if (element.function == RPNElement::Function::FUNCTION_IN_RANGE)
         {
+            auto [is_chain_always_monotonic, is_chain_strict] = check_monotonicity_of_chain(element.monotonic_functions_chain);
+            if (!is_chain_always_monotonic)
+                return false;
+
+            chassert(element.key_column < key_columns.size());
             if (element.range.left == element.range.right)
             {
-                column_constraints[element.key_column] = Constraint::POINT;
+                /// For Constraint::POINT, we need to check if the function chain is strict.
+                /// For example, `toDate(event_time) = '2025-06-03'` means a range of `event_time`: ['2025-06-03 00:00:00','2025-06-04 00:00:00')
+                /// So, POINT needs to be converted to a RANGE
+                if (is_chain_strict)
+                    column_constraints[element.key_column] = Constraint::POINT;
             }
+
             if (column_constraints[element.key_column] != Constraint::POINT)
-            {
                 column_constraints[element.key_column] = Constraint::RANGE;
-            }
-            continue;
-        }
 
-        if (element.function == RPNElement::Function::FUNCTION_UNKNOWN)
-        {
-            continue;
-        }
-
-        if (element.function == RPNElement::Function::ALWAYS_TRUE)
-        {
             continue;
         }
 
diff --git a/tests/queries/0_stateless/03532_key_condition_continuous_range.reference b/tests/queries/0_stateless/03532_key_condition_continuous_range.reference
new file mode 100644
index 000000000000..1875b8bd763e
--- /dev/null
+++ b/tests/queries/0_stateless/03532_key_condition_continuous_range.reference
@@ -0,0 +1,2 @@
+2	0
+0	2
diff --git a/tests/queries/0_stateless/03532_key_condition_continuous_range.sh b/tests/queries/0_stateless/03532_key_condition_continuous_range.sh
new file mode 100755
index 000000000000..bf1c9992bf26
--- /dev/null
+++ b/tests/queries/0_stateless/03532_key_condition_continuous_range.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+# Tags: no-parallel-replicas
+# no-parallel-replicas: the ProfileEvents with the expected values are reported on the replicas the query runs in,
+# and the coordinator does not collect all ProfileEvents values.
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+readonly query_prefix=$CLICKHOUSE_DATABASE
+
+$CLICKHOUSE_CLIENT -n -q "
+DROP TABLE IF EXISTS test;
+CREATE TABLE test
+(
+    i Int16,
+    timestamp DateTime64(3, 'Asia/Shanghai')
+)
+ENGINE = MergeTree
+ORDER BY (i, timestamp);
+INSERT INTO test VALUES (1, '2025-06-05 01:00:00');"
+
+$CLICKHOUSE_CLIENT -n -q "SELECT * FROM test WHERE i = 1 and toDate(timestamp) = '2025-06-05' FORMAT Null;" --query-id="${query_prefix}_binary1"
+$CLICKHOUSE_CLIENT -n -q "SELECT * FROM test WHERE i in (1) and toDate(timestamp) > '2025-06-05' FORMAT Null;" --query-id="${query_prefix}_binary2"
+$CLICKHOUSE_CLIENT -n -q "SELECT * FROM test WHERE toDate(i) = '2025-06-05' and timestamp = '2025-06-05 01:00:00' FORMAT Null;" --query-id="${query_prefix}_generic1"
+$CLICKHOUSE_CLIENT -n -q "SELECT * FROM test WHERE toDate(i) in ('2025-06-05') and timestamp = '2025-06-05 01:00:00' FORMAT Null;" --query-id="${query_prefix}_generic2"
+$CLICKHOUSE_CLIENT -n -q "SYSTEM FLUSH LOGS query_log;"
+
+$CLICKHOUSE_CLIENT -n -q "SELECT sum(ProfileEvents['IndexBinarySearchAlgorithm']), sum(ProfileEvents['IndexGenericExclusionSearchAlgorithm']) FROM system.query_log
+    WHERE type > 1 AND event_date >= yesterday() AND query_id ILIKE '${query_prefix}_binary%' AND current_database = currentDatabase()"
+$CLICKHOUSE_CLIENT -n -q "SELECT sum(ProfileEvents['IndexBinarySearchAlgorithm']), sum(ProfileEvents['IndexGenericExclusionSearchAlgorithm']) FROM system.query_log
+    WHERE type > 1 AND event_date >= yesterday() AND query_id ILIKE '${query_prefix}_generic%' AND current_database = currentDatabase()"
diff --git a/tests/queries/0_stateless/03604_key_condition_set_tuple_bug.reference b/tests/queries/0_stateless/03604_key_condition_set_tuple_bug.reference
new file mode 100644
index 000000000000..fd3c81a4d763
--- /dev/null
+++ b/tests/queries/0_stateless/03604_key_condition_set_tuple_bug.reference
@@ -0,0 +1,2 @@
+5
+5
diff --git a/tests/queries/0_stateless/03604_key_condition_set_tuple_bug.sql b/tests/queries/0_stateless/03604_key_condition_set_tuple_bug.sql
new file mode 100644
index 000000000000..d5c9a28b3a4b
--- /dev/null
+++ b/tests/queries/0_stateless/03604_key_condition_set_tuple_bug.sql
@@ -0,0 +1,4 @@
+create table t (a String, b String, c String, d String) order by (a, b, c, d) settings index_granularity=10;
+insert into t select intDiv(number, 50), intDiv(number, 50), 0, number % 10 from numbers(50 + 10);
+select count() from t where a = '0' and b = '0' and (c, d) in ('0', '5');
+select count() from t where a = '0' and b = '0' and (c, d) in ('0', '5') settings optimize_use_implicit_projections=0;

From 72b0c1e3a53394c39548f9f76c49df588bc48dd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <raul.marin@clickhouse.com>
Date: Wed, 19 Nov 2025 14:31:48 +0100
Subject: [PATCH 097/112] Add test for optimize_use_implicit_projections

---
 ...23_incorrect_implicit_projection.reference |  3 ++
 .../03723_incorrect_implicit_projection.sql   | 45 +++++++++++++++++++
 2 files changed, 48 insertions(+)
 create mode 100644 tests/queries/0_stateless/03723_incorrect_implicit_projection.reference
 create mode 100644 tests/queries/0_stateless/03723_incorrect_implicit_projection.sql

diff --git a/tests/queries/0_stateless/03723_incorrect_implicit_projection.reference b/tests/queries/0_stateless/03723_incorrect_implicit_projection.reference
new file mode 100644
index 000000000000..84d702e25669
--- /dev/null
+++ b/tests/queries/0_stateless/03723_incorrect_implicit_projection.reference
@@ -0,0 +1,3 @@
+ad_request	2598
+2598
+2598
diff --git a/tests/queries/0_stateless/03723_incorrect_implicit_projection.sql b/tests/queries/0_stateless/03723_incorrect_implicit_projection.sql
new file mode 100644
index 000000000000..693054eb3a1f
--- /dev/null
+++ b/tests/queries/0_stateless/03723_incorrect_implicit_projection.sql
@@ -0,0 +1,45 @@
+-- See also 03604_key_condition_set_tuple_bug.sql‎
+
+DROP TABLE IF EXISTS prd_bid_events_simple_no_partition;
+CREATE TABLE prd_bid_events_simple_no_partition
+(
+    `type` LowCardinality(String),
+    `timestamp` DateTime64(9)
+)
+ENGINE = MergeTree()
+PRIMARY KEY (timestamp, type)
+ORDER BY (timestamp, type);
+
+INSERT INTO prd_bid_events_simple_no_partition
+SELECT
+    arrayElement([
+        'impression',
+        'start',
+        'firstQuartile',
+        'midpoint',
+        'thirdQuartile',
+        'complete',
+        'ad_request',
+        'random_value'
+    ], 1 + (number % 8)),
+    toDateTime64('2025-11-19 14:26:52' - toIntervalDay(number % 30) - toIntervalSecond(number % 86400) - toIntervalMillisecond(number % 1000), 9)
+FROM numbers(500000);
+
+SELECT
+    type,
+    count()
+FROM prd_bid_events_simple_no_partition
+WHERE date(timestamp) = '2025-11-01'
+GROUP BY type
+HAVING type = 'ad_request';
+
+SELECT count()
+FROM prd_bid_events_simple_no_partition
+WHERE (date(timestamp) = '2025-11-01') AND (type = 'ad_request')
+SETTINGS optimize_use_implicit_projections = 0;
+
+SELECT
+    count()
+FROM prd_bid_events_simple_no_partition
+WHERE (date(timestamp) = '2025-11-01') AND (type = 'ad_request')
+SETTINGS optimize_use_implicit_projections = 1;

From 4f5c3202c8c4c75e737690dcd5fa9ac358e35132 Mon Sep 17 00:00:00 2001
From: "Nihal Z. Miaji" <81457724+nihalzp@users.noreply.github.com>
Date: Thu, 20 Nov 2025 03:21:33 +0800
Subject: [PATCH 098/112] Rerun CI

---
 src/Interpreters/ActionsVisitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index 384857cc3538..cd9ee309b742 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -462,7 +462,7 @@ FutureSetPtr makeExplicitSet(
     DataTypes set_element_types = {left_arg_type};
     const auto * left_tuple_type = typeid_cast<const DataTypeTuple *>(left_arg_type.get());
 
-    /// Do not unpack if empty tuple or single element tuple
+    /// Do not unpack if empty tuple or single element tuple.
     if (left_tuple_type && left_tuple_type->getElements().size() > 1)
         set_element_types = left_tuple_type->getElements();
 

From 0db760d0bf97360fe79bce135a594de2c6865b85 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 20 Nov 2025 12:19:14 +0000
Subject: [PATCH 099/112] Backport #90031 to 25.8: Do size checks when
 deserializing data from aggregation states and other sources

---
 .../AggregateFunctionGroupArray.cpp           |   6 +-
 .../AggregateFunctionGroupArrayIntersect.cpp  |   6 +-
 .../Combinators/AggregateFunctionDistinct.h   |   5 +-
 src/AggregateFunctions/KeyHolderHelpers.h     |  17 +-
 src/Columns/ColumnAggregateFunction.cpp       |  17 +-
 src/Columns/ColumnAggregateFunction.h         |   4 +-
 src/Columns/ColumnArray.cpp                   |  28 +-
 src/Columns/ColumnArray.h                     |   6 +-
 src/Columns/ColumnBLOB.h                      |   4 +-
 src/Columns/ColumnCompressed.h                |   4 +-
 src/Columns/ColumnConst.h                     |  16 +-
 src/Columns/ColumnDecimal.cpp                 |  11 +-
 src/Columns/ColumnDecimal.h                   |   4 +-
 src/Columns/ColumnDynamic.cpp                 |  38 +-
 src/Columns/ColumnDynamic.h                   |   4 +-
 src/Columns/ColumnFixedString.cpp             |   9 +-
 src/Columns/ColumnFixedString.h               |   4 +-
 src/Columns/ColumnFunction.h                  |   4 +-
 src/Columns/ColumnLazy.cpp                    |   4 +-
 src/Columns/ColumnLazy.h                      |   4 +-
 src/Columns/ColumnLowCardinality.cpp          |  20 +-
 src/Columns/ColumnLowCardinality.h            |   6 +-
 src/Columns/ColumnMap.cpp                     |  12 +-
 src/Columns/ColumnMap.h                       |   6 +-
 src/Columns/ColumnNullable.cpp                |  30 +-
 src/Columns/ColumnNullable.h                  |   6 +-
 src/Columns/ColumnObject.cpp                  |  83 ++-
 src/Columns/ColumnObject.h                    |   8 +-
 src/Columns/ColumnQBit.h                      | 190 +++++
 src/Columns/ColumnReplicated.cpp              | 696 ++++++++++++++++++
 src/Columns/ColumnReplicated.h                | 224 ++++++
 src/Columns/ColumnSparse.cpp                  |  16 +-
 src/Columns/ColumnSparse.h                    |   6 +-
 src/Columns/ColumnString.cpp                  |  27 +-
 src/Columns/ColumnString.h                    |   6 +-
 src/Columns/ColumnTuple.cpp                   |  43 +-
 src/Columns/ColumnTuple.h                     |   6 +-
 src/Columns/ColumnUnique.h                    |  70 +-
 src/Columns/ColumnVariant.cpp                 |  33 +-
 src/Columns/ColumnVariant.h                   |   6 +-
 src/Columns/ColumnVector.cpp                  |  13 +-
 src/Columns/ColumnVector.h                    |   4 +-
 src/Columns/IColumn.h                         |  15 +-
 src/Columns/IColumnDummy.cpp                  |   9 +-
 src/Columns/IColumnDummy.h                    |   4 +-
 src/Columns/IColumnUnique.h                   |   6 +-
 src/Columns/tests/gtest_column_dynamic.cpp    |  56 +-
 src/Columns/tests/gtest_column_object.cpp     |  23 +-
 src/Columns/tests/gtest_column_unique.cpp     |   7 +-
 src/Dictionaries/DictionaryHelpers.cpp        |   4 +-
 src/Dictionaries/DictionaryHelpers.h          |   6 +-
 src/Dictionaries/SSDCacheDictionaryStorage.h  |  10 +-
 src/Functions/array/arrayIntersect.cpp        |   4 +-
 src/Interpreters/AggregationMethod.cpp        |   7 +-
 .../0_stateless/03716_topk_bad_data.reference |  14 +
 .../0_stateless/03716_topk_bad_data.sql       |  16 +
 56 files changed, 1542 insertions(+), 345 deletions(-)
 create mode 100644 src/Columns/ColumnQBit.h
 create mode 100644 src/Columns/ColumnReplicated.cpp
 create mode 100644 src/Columns/ColumnReplicated.h
 create mode 100644 tests/queries/0_stateless/03716_topk_bad_data.reference
 create mode 100644 tests/queries/0_stateless/03716_topk_bad_data.sql

diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
index 578099706632..2371477c084a 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@@ -1,6 +1,7 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/Helpers.h>
 #include <AggregateFunctions/FactoryHelpers.h>
+#include <AggregateFunctions/KeyHolderHelpers.h>
 #include <Interpreters/Context.h>
 #include <Core/ServerSettings.h>
 
@@ -464,7 +465,10 @@ struct GroupArrayNodeGeneral : public GroupArrayNodeBase<GroupArrayNodeGeneral>
         return node;
     }
 
-    void insertInto(IColumn & column) { std::ignore = column.deserializeAndInsertAggregationStateValueFromArena(data()); }
+    void insertInto(IColumn & column)
+    {
+        deserializeAndInsert<false>({data(), size}, column);
+    }
 };
 
 template <typename Node, bool has_sampler>
diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp
index a0008f09d24d..6305add32200 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp
@@ -21,6 +21,7 @@
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/Helpers.h>
 #include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/KeyHolderHelpers.h>
 
 #include <memory>
 
@@ -341,10 +342,7 @@ class AggregateFunctionGroupArrayIntersectGeneric final
 
         for (auto & elem : set)
         {
-            if constexpr (is_plain_column)
-                data_to.insertData(elem.getValue().data, elem.getValue().size);
-            else
-                std::ignore = data_to.deserializeAndInsertAggregationStateValueFromArena(elem.getValue().data);
+            deserializeAndInsert<is_plain_column>(elem.getValue(), data_to);
         }
     }
 };
diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h b/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h
index ea8c928d6a90..99507db6b12b 100644
--- a/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h
+++ b/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h
@@ -138,9 +138,10 @@ struct AggregateFunctionDistinctMultipleGenericData : public AggregateFunctionDi
                 Set::LookupResult it;
                 bool inserted;
                 history.emplace(ArenaKeyHolder{value, *arena}, it, inserted);
-                const char * pos = it->getValue().data;
+                ReadBufferFromString in({it->getValue().data, it->getValue().size});
+                /// Multiple columns are serialized one by one
                 for (auto & column : argument_columns)
-                    pos = column->deserializeAndInsertAggregationStateValueFromArena(pos);
+                    column->deserializeAndInsertAggregationStateValueFromArena(in);
             }
         }
     }
diff --git a/src/AggregateFunctions/KeyHolderHelpers.h b/src/AggregateFunctions/KeyHolderHelpers.h
index f19c9b7a7cb1..d1a78e855c0b 100644
--- a/src/AggregateFunctions/KeyHolderHelpers.h
+++ b/src/AggregateFunctions/KeyHolderHelpers.h
@@ -1,12 +1,18 @@
 #pragma once
 
-#include <Common/HashTable/HashTableKeyHolder.h>
 #include <Columns/IColumn.h>
+#include <Common/HashTable/HashTableKeyHolder.h>
+#include <IO/ReadBufferFromString.h>
 
 namespace DB
 {
 struct Settings;
 
+namespace ErrorCodes
+{
+    extern const int INCORRECT_DATA;
+}
+
 template <bool is_plain_column = false>
 static auto getKeyHolder(const IColumn & column, size_t row_num, Arena & arena)
 {
@@ -29,7 +35,14 @@ static void deserializeAndInsert(StringRef str, IColumn & data_to)
     if constexpr (is_plain_column)
         data_to.insertData(str.data, str.size);
     else
-        std::ignore = data_to.deserializeAndInsertAggregationStateValueFromArena(str.data);
+    {
+        ReadBufferFromString in({str.data, str.size});
+        data_to.deserializeAndInsertAggregationStateValueFromArena(in);
+        if (!in.eof())
+        {
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Extra bytes ({}) found after deserializing aggregation state", in.available());
+        }
+    }
 }
 
 }
diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp
index 988e45c62d4a..461a391501ca 100644
--- a/src/Columns/ColumnAggregateFunction.cpp
+++ b/src/Columns/ColumnAggregateFunction.cpp
@@ -609,7 +609,7 @@ StringRef ColumnAggregateFunction::serializeValueIntoArena(size_t n, Arena & are
     return out.complete();
 }
 
-const char * ColumnAggregateFunction::deserializeAndInsertFromArena(const char * src_arena)
+void ColumnAggregateFunction::deserializeAndInsertFromArena(ReadBuffer & in)
 {
     ensureOwnership();
 
@@ -618,21 +618,10 @@ const char * ColumnAggregateFunction::deserializeAndInsertFromArena(const char *
       */
     Arena & dst_arena = createOrGetArena();
     pushBackAndCreateState(data, dst_arena, func.get());
-
-    /** We will read from src_arena.
-      * There is no limit for reading - it is assumed, that we can read all that we need after src_arena pointer.
-      * Buf ReadBufferFromMemory requires some bound. We will use arbitrary big enough number, that will not overflow pointer.
-      * NOTE Technically, this is not compatible with C++ standard,
-      *  as we cannot legally compare pointers after last element + 1 of some valid memory region.
-      *  Probably this will not work under UBSan.
-      */
-    ReadBufferFromMemory read_buffer(src_arena, std::numeric_limits<char *>::max() - src_arena - 1);
-    func->deserialize(data.back(), read_buffer, version, &dst_arena);
-
-    return read_buffer.position();
+    func->deserialize(data.back(), in, version, &dst_arena);
 }
 
-const char * ColumnAggregateFunction::skipSerializedInArena(const char *) const
+void ColumnAggregateFunction::skipSerializedInArena(ReadBuffer &) const
 {
     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method skipSerializedInArena is not supported for {}", getName());
 }
diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h
index 75f3c2f28f1f..ab17001d677b 100644
--- a/src/Columns/ColumnAggregateFunction.h
+++ b/src/Columns/ColumnAggregateFunction.h
@@ -173,9 +173,9 @@ class ColumnAggregateFunction final : public COWHelper<IColumnHelper<ColumnAggre
 
     StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
 
-    const char * deserializeAndInsertFromArena(const char * src_arena) override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
 
-    const char * skipSerializedInArena(const char *) const override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
 
     void updateHashWithValue(size_t n, SipHash & hash) const override;
 
diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp
index e2348602faa0..dcc4685a659b 100644
--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@@ -296,39 +296,35 @@ std::optional<size_t> ColumnArray::getSerializedValueSize(size_t n) const
 }
 
 
-const char * ColumnArray::deserializeAndInsertFromArena(const char * pos)
+void ColumnArray::deserializeAndInsertFromArena(ReadBuffer & in)
 {
-    size_t array_size = unalignedLoad<size_t>(pos);
-    pos += sizeof(array_size);
+    size_t array_size;
+    readBinaryLittleEndian<size_t>(array_size, in);
 
     for (size_t i = 0; i < array_size; ++i)
-        pos = getData().deserializeAndInsertFromArena(pos);
+        getData().deserializeAndInsertFromArena(in);
 
     getOffsets().push_back(getOffsets().back() + array_size);
-    return pos;
 }
 
-const char * ColumnArray::deserializeAndInsertAggregationStateValueFromArena(const char * pos)
+void ColumnArray::deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in)
 {
-    size_t array_size = unalignedLoad<size_t>(pos);
-    pos += sizeof(array_size);
+    size_t array_size;
+    readBinaryLittleEndian<size_t>(array_size, in);
 
     for (size_t i = 0; i < array_size; ++i)
-        pos = getData().deserializeAndInsertAggregationStateValueFromArena(pos);
+        getData().deserializeAndInsertAggregationStateValueFromArena(in);
 
     getOffsets().push_back(getOffsets().back() + array_size);
-    return pos;
 }
 
-const char * ColumnArray::skipSerializedInArena(const char * pos) const
+void ColumnArray::skipSerializedInArena(ReadBuffer & in) const
 {
-    size_t array_size = unalignedLoad<size_t>(pos);
-    pos += sizeof(array_size);
+    size_t array_size;
+    readBinaryLittleEndian<size_t>(array_size, in);
 
     for (size_t i = 0; i < array_size; ++i)
-        pos = getData().skipSerializedInArena(pos);
-
-    return pos;
+        getData().skipSerializedInArena(in);
 }
 
 void ColumnArray::updateHashWithValue(size_t n, SipHash & hash) const
diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h
index 6f59eb85d84a..d84827eb384a 100644
--- a/src/Columns/ColumnArray.h
+++ b/src/Columns/ColumnArray.h
@@ -82,9 +82,9 @@ class ColumnArray final : public COWHelper<IColumnHelper<ColumnArray>, ColumnArr
     StringRef serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
     char * serializeValueIntoMemory(size_t, char * memory) const override;
     std::optional<size_t> getSerializedValueSize(size_t n) const override;
-    const char * deserializeAndInsertFromArena(const char * pos) override;
-    const char * deserializeAndInsertAggregationStateValueFromArena(const char * pos) override;
-    const char * skipSerializedInArena(const char * pos) const override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
+    void deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in) override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
     void updateHashWithValue(size_t n, SipHash & hash) const override;
     WeakHash32 getWeakHash32() const override;
     void updateHashFast(SipHash & hash) const override;
diff --git a/src/Columns/ColumnBLOB.h b/src/Columns/ColumnBLOB.h
index 64356d2376a8..c3f13e8f019f 100644
--- a/src/Columns/ColumnBLOB.h
+++ b/src/Columns/ColumnBLOB.h
@@ -163,8 +163,8 @@ class ColumnBLOB : public COWHelper<IColumnHelper<ColumnBLOB>, ColumnBLOB>
     void popBack(size_t) override { throwInapplicable(); }
     StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwInapplicable(); }
     char * serializeValueIntoMemory(size_t, char *) const override { throwInapplicable(); }
-    const char * deserializeAndInsertFromArena(const char *) override { throwInapplicable(); }
-    const char * skipSerializedInArena(const char *) const override { throwInapplicable(); }
+    void deserializeAndInsertFromArena(ReadBuffer &) override { throwInapplicable(); }
+    void skipSerializedInArena(ReadBuffer &) const override { throwInapplicable(); }
     void updateHashWithValue(size_t, SipHash &) const override { throwInapplicable(); }
     WeakHash32 getWeakHash32() const override { throwInapplicable(); }
     void updateHashFast(SipHash &) const override { throwInapplicable(); }
diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h
index 0f07148f143e..63cc32299301 100644
--- a/src/Columns/ColumnCompressed.h
+++ b/src/Columns/ColumnCompressed.h
@@ -99,8 +99,8 @@ class ColumnCompressed : public COWHelper<IColumnHelper<ColumnCompressed>, Colum
     void popBack(size_t) override { throwMustBeDecompressed(); }
     StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeDecompressed(); }
     char * serializeValueIntoMemory(size_t, char *) const override { throwMustBeDecompressed(); }
-    const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
-    const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); }
+    void deserializeAndInsertFromArena(ReadBuffer &) override { throwMustBeDecompressed(); }
+    void skipSerializedInArena(ReadBuffer &) const override { throwMustBeDecompressed(); }
     void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
     WeakHash32 getWeakHash32() const override { throwMustBeDecompressed(); }
     void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); }
diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h
index bfde9ca7ddff..95c696d586db 100644
--- a/src/Columns/ColumnConst.h
+++ b/src/Columns/ColumnConst.h
@@ -184,17 +184,23 @@ class ColumnConst final : public COWHelper<IColumnHelper<ColumnConst>, ColumnCon
         return data->serializeValueIntoMemory(0, memory);
     }
 
-    const char * deserializeAndInsertFromArena(const char * pos) override
+    void deserializeAndInsertFromArena(ReadBuffer & in) override
     {
-        const auto * res = data->deserializeAndInsertFromArena(pos);
+        data->deserializeAndInsertFromArena(in);
         data->popBack(1);
         ++s;
-        return res;
     }
 
-    const char * skipSerializedInArena(const char * pos) const override
+    void deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in) override
     {
-        return data->skipSerializedInArena(pos);
+        data->deserializeAndInsertAggregationStateValueFromArena(in);
+        data->popBack(1);
+        ++s;
+    }
+
+    void skipSerializedInArena(ReadBuffer & in) const override
+    {
+        data->skipSerializedInArena(in);
     }
 
     void updateHashWithValue(size_t, SipHash & hash) const override
diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp
index 51b37a381f14..cb37ca914a29 100644
--- a/src/Columns/ColumnDecimal.cpp
+++ b/src/Columns/ColumnDecimal.cpp
@@ -78,16 +78,17 @@ Float64 ColumnDecimal<T>::getFloat64(size_t n) const
 }
 
 template <is_decimal T>
-const char * ColumnDecimal<T>::deserializeAndInsertFromArena(const char * pos)
+void ColumnDecimal<T>::deserializeAndInsertFromArena(ReadBuffer & in)
 {
-    data.push_back(unalignedLoad<T>(pos));
-    return pos + sizeof(T);
+    T dec;
+    readBinaryLittleEndian(dec, in);
+    data.push_back(std::move(dec));
 }
 
 template <is_decimal T>
-const char * ColumnDecimal<T>::skipSerializedInArena(const char * pos) const
+void ColumnDecimal<T>::skipSerializedInArena(ReadBuffer & in) const
 {
-    return pos + sizeof(T);
+    in.ignore(sizeof(T));
 }
 
 template <is_decimal T>
diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h
index 482d063fe5dc..72943a696d51 100644
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@@ -103,8 +103,8 @@ class ColumnDecimal final : public COWHelper<IColumnHelper<ColumnDecimal<T>, Col
 
     Float64 getFloat64(size_t n) const final;
 
-    const char * deserializeAndInsertFromArena(const char * pos) override;
-    const char * skipSerializedInArena(const char * pos) const override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
     void updateHashWithValue(size_t n, SipHash & hash) const override;
     WeakHash32 getWeakHash32() const override;
     void updateHashFast(SipHash & hash) const override;
diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp
index eff44e53735a..a00b1a44ba18 100644
--- a/src/Columns/ColumnDynamic.cpp
+++ b/src/Columns/ColumnDynamic.cpp
@@ -14,6 +14,7 @@
 #include <Processors/Transforms/ColumnGathererTransform.h>
 #include <IO/WriteBufferFromVector.h>
 #include <IO/ReadBufferFromMemory.h>
+#include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromString.h>
 #include <Formats/FormatSettings.h>
 #include <Interpreters/convertFieldToType.h>
@@ -23,6 +24,7 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int ATTEMPT_TO_READ_AFTER_EOF;
     extern const int LOGICAL_ERROR;
     extern const int PARAMETER_OUT_OF_BOUND;
 }
@@ -769,22 +771,25 @@ StringRef ColumnDynamic::serializeValueIntoArena(size_t n, Arena & arena, const
     return res;
 }
 
-const char * ColumnDynamic::deserializeAndInsertFromArena(const char * pos)
+void ColumnDynamic::deserializeAndInsertFromArena(ReadBuffer & in)
 {
     auto & variant_col = getVariantColumn();
-    UInt8 null_bit = unalignedLoad<UInt8>(pos);
-    pos += sizeof(UInt8);
+    UInt8 null_bit;
+    readBinaryLittleEndian<UInt8>(null_bit, in);
     if (null_bit)
     {
         insertDefault();
-        return pos;
+        return;
     }
 
     /// Read variant type and value in binary format.
-    const size_t type_and_value_size = unalignedLoad<size_t>(pos);
-    pos += sizeof(type_and_value_size);
-    std::string_view type_and_value(pos, type_and_value_size);
-    pos += type_and_value_size;
+    size_t type_and_value_size;
+    readBinaryLittleEndian<size_t>(type_and_value_size, in);
+    if (in.available() < type_and_value_size)
+        throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after eof when deserializing ColumnDynamic");
+
+    std::string_view type_and_value(in.position(), type_and_value_size);
+    in.ignore(type_and_value_size);
 
     ReadBufferFromMemory buf(type_and_value.data(), type_and_value.size());
     auto variant_type = decodeDataType(buf);
@@ -809,21 +814,18 @@ const char * ColumnDynamic::deserializeAndInsertFromArena(const char * pos)
         variant_col.getLocalDiscriminators().push_back(variant_col.localDiscriminatorByGlobal(getSharedVariantDiscriminator()));
         variant_col.getOffsets().push_back(shared_variant.size() - 1);
     }
-
-    return pos;
 }
 
-const char * ColumnDynamic::skipSerializedInArena(const char * pos) const
+void ColumnDynamic::skipSerializedInArena(ReadBuffer & in) const
 {
-    UInt8 null_bit = unalignedLoad<UInt8>(pos);
-    pos += sizeof(UInt8);
+    UInt8 null_bit;
+    readBinaryLittleEndian<UInt8>(null_bit, in);
     if (null_bit)
-        return pos;
+        return;
 
-    const size_t type_and_value_size = unalignedLoad<size_t>(pos);
-    pos += sizeof(type_and_value_size);
-    pos += type_and_value_size;
-    return pos;
+    size_t type_and_value_size;
+    readBinaryLittleEndian<size_t>(type_and_value_size, in);
+    in.ignore(type_and_value_size);
 }
 
 void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const
diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h
index d174e887fb20..f46b6a465287 100644
--- a/src/Columns/ColumnDynamic.h
+++ b/src/Columns/ColumnDynamic.h
@@ -194,8 +194,8 @@ class ColumnDynamic final : public COWHelper<IColumnHelper<ColumnDynamic>, Colum
     }
 
     StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
-    const char * deserializeAndInsertFromArena(const char * pos) override;
-    const char * skipSerializedInArena(const char * pos) const override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
     std::optional<size_t> getSerializedValueSize(size_t) const override { return std::nullopt; }
 
     void updateHashWithValue(size_t n, SipHash & hash) const override;
diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp
index 817d94a0d2f7..0954922542a6 100644
--- a/src/Columns/ColumnFixedString.cpp
+++ b/src/Columns/ColumnFixedString.cpp
@@ -119,17 +119,16 @@ void ColumnFixedString::insertData(const char * pos, size_t length)
     memset(chars.data() + old_size + length, 0, n - length);
 }
 
-const char * ColumnFixedString::deserializeAndInsertFromArena(const char * pos)
+void ColumnFixedString::deserializeAndInsertFromArena(ReadBuffer & in)
 {
     size_t old_size = chars.size();
     chars.resize(old_size + n);
-    memcpy(chars.data() + old_size, pos, n);
-    return pos + n;
+    in.readStrict(reinterpret_cast<char *>(chars.data() + old_size), n);
 }
 
-const char * ColumnFixedString::skipSerializedInArena(const char * pos) const
+void ColumnFixedString::skipSerializedInArena(ReadBuffer & in) const
 {
-    return pos + n;
+    in.ignore(n);
 }
 
 void ColumnFixedString::updateHashWithValue(size_t index, SipHash & hash) const
diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h
index 7812a2bcd647..795bed3dd2d6 100644
--- a/src/Columns/ColumnFixedString.h
+++ b/src/Columns/ColumnFixedString.h
@@ -137,9 +137,9 @@ class ColumnFixedString final : public COWHelper<IColumnHelper<ColumnFixedString
         chars.resize_assume_reserved(chars.size() - n * elems);
     }
 
-    const char * deserializeAndInsertFromArena(const char * pos) override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
 
-    const char * skipSerializedInArena(const char * pos) const override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
 
     void updateHashWithValue(size_t index, SipHash & hash) const override;
 
diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h
index 2a0aeb65f499..5306a677513c 100644
--- a/src/Columns/ColumnFunction.h
+++ b/src/Columns/ColumnFunction.h
@@ -111,12 +111,12 @@ class ColumnFunction final : public COWHelper<IColumnHelper<ColumnFunction>, Col
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot serialize from {}", getName());
     }
 
-    const char * deserializeAndInsertFromArena(const char *) override
+    void deserializeAndInsertFromArena(ReadBuffer &) override
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot deserialize to {}", getName());
     }
 
-    const char * skipSerializedInArena(const char*) const override
+    void skipSerializedInArena(ReadBuffer &) const override
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot skip serialized {}", getName());
     }
diff --git a/src/Columns/ColumnLazy.cpp b/src/Columns/ColumnLazy.cpp
index a9f8c7eafc39..d01e63ac05c9 100644
--- a/src/Columns/ColumnLazy.cpp
+++ b/src/Columns/ColumnLazy.cpp
@@ -152,12 +152,12 @@ void ColumnLazy::popBack(size_t)
     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method popBack is not supported for {}", getName());
 }
 
-const char * ColumnLazy::deserializeAndInsertFromArena(const char *)
+void ColumnLazy::deserializeAndInsertFromArena(ReadBuffer &)
 {
     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method deserializeAndInsertFromArena is not supported for {}", getName());
 }
 
-const char * ColumnLazy::skipSerializedInArena(const char *) const
+void ColumnLazy::skipSerializedInArena(ReadBuffer &) const
 {
     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method skipSerializedInArena is not supported for {}", getName());
 }
diff --git a/src/Columns/ColumnLazy.h b/src/Columns/ColumnLazy.h
index e540af1b0095..2a4826c264b7 100644
--- a/src/Columns/ColumnLazy.h
+++ b/src/Columns/ColumnLazy.h
@@ -91,8 +91,8 @@ class ColumnLazy final : public COWHelper<IColumn, ColumnLazy>
 
     void insertDefault() override;
     void popBack(size_t n) override;
-    const char * deserializeAndInsertFromArena(const char * pos) override;
-    const char * skipSerializedInArena(const char * pos) const override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
     void updateHashWithValue(size_t n, SipHash & hash) const override;
     WeakHash32 getWeakHash32() const override;
     void updateHashFast(SipHash & hash) const override;
diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp
index d37e40353a56..964b43652b8a 100644
--- a/src/Columns/ColumnLowCardinality.cpp
+++ b/src/Columns/ColumnLowCardinality.cpp
@@ -309,29 +309,21 @@ void ColumnLowCardinality::collectSerializedValueSizes(PaddedPODArray<UInt64> &
     idx.collectSerializedValueSizes(sizes, dict_sizes);
 }
 
-const char * ColumnLowCardinality::deserializeAndInsertFromArena(const char * pos)
+void ColumnLowCardinality::deserializeAndInsertFromArena(ReadBuffer & in)
 {
     compactIfSharedDictionary();
-
-    const char * new_pos;
-    idx.insertPosition(getDictionary().uniqueDeserializeAndInsertFromArena(pos, new_pos));
-
-    return new_pos;
+    idx.insertPosition(getDictionary().uniqueDeserializeAndInsertFromArena(in));
 }
 
-const char * ColumnLowCardinality::deserializeAndInsertAggregationStateValueFromArena(const char * pos)
+void ColumnLowCardinality::deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in)
 {
     compactIfSharedDictionary();
-
-    const char * new_pos;
-    idx.insertPosition(getDictionary().uniqueDeserializeAndInsertAggregationStateValueFromArena(pos, new_pos));
-
-    return new_pos;
+    idx.insertPosition(getDictionary().uniqueDeserializeAndInsertAggregationStateValueFromArena(in));
 }
 
-const char * ColumnLowCardinality::skipSerializedInArena(const char * pos) const
+void ColumnLowCardinality::skipSerializedInArena(ReadBuffer & in) const
 {
-    return getDictionary().skipSerializedInArena(pos);
+    getDictionary().skipSerializedInArena(in);
 }
 
 WeakHash32 ColumnLowCardinality::getWeakHash32() const
diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h
index dcce5e27d9fc..429c58d8603c 100644
--- a/src/Columns/ColumnLowCardinality.h
+++ b/src/Columns/ColumnLowCardinality.h
@@ -107,10 +107,10 @@ class ColumnLowCardinality final : public COWHelper<IColumnHelper<ColumnLowCardi
 
     void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const override;
 
-    const char * deserializeAndInsertFromArena(const char * pos) override;
-    const char * deserializeAndInsertAggregationStateValueFromArena(const char * pos) override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
+    void deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in) override;
 
-    const char * skipSerializedInArena(const char * pos) const override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
 
     void updateHashWithValue(size_t n, SipHash & hash) const override
     {
diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp
index 6ef1278b254f..7471b7b6a276 100644
--- a/src/Columns/ColumnMap.cpp
+++ b/src/Columns/ColumnMap.cpp
@@ -170,19 +170,19 @@ std::optional<size_t> ColumnMap::getSerializedValueSize(size_t n) const
     return nested->getSerializedValueSize(n);
 }
 
-const char * ColumnMap::deserializeAndInsertFromArena(const char * pos)
+void ColumnMap::deserializeAndInsertFromArena(ReadBuffer & in)
 {
-    return nested->deserializeAndInsertFromArena(pos);
+    nested->deserializeAndInsertFromArena(in);
 }
 
-const char * ColumnMap::deserializeAndInsertAggregationStateValueFromArena(const char * pos)
+void ColumnMap::deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in)
 {
-    return nested->deserializeAndInsertAggregationStateValueFromArena(pos);
+    nested->deserializeAndInsertAggregationStateValueFromArena(in);
 }
 
-const char * ColumnMap::skipSerializedInArena(const char * pos) const
+void ColumnMap::skipSerializedInArena(ReadBuffer & in) const
 {
-    return nested->skipSerializedInArena(pos);
+    nested->skipSerializedInArena(in);
 }
 
 void ColumnMap::updateHashWithValue(size_t n, SipHash & hash) const
diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h
index 9a71714aa615..79caab2b07a0 100644
--- a/src/Columns/ColumnMap.h
+++ b/src/Columns/ColumnMap.h
@@ -60,9 +60,9 @@ class ColumnMap final : public COWHelper<IColumnHelper<ColumnMap>, ColumnMap>
     StringRef serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
     char * serializeValueIntoMemory(size_t n, char * memory) const override;
     std::optional<size_t> getSerializedValueSize(size_t n) const override;
-    const char * deserializeAndInsertFromArena(const char * pos) override;
-    const char * deserializeAndInsertAggregationStateValueFromArena(const char * pos) override;
-    const char * skipSerializedInArena(const char * pos) const override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
+    void deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in) override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
     void updateHashWithValue(size_t n, SipHash & hash) const override;
     WeakHash32 getWeakHash32() const override;
     void updateHashFast(SipHash & hash) const override;
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 072926203cca..010722971a5d 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -225,45 +225,39 @@ std::optional<size_t> ColumnNullable::getSerializedValueSize(size_t n) const
     return 1 + *nested_size; /// +1 for null mask byte.
 }
 
-const char * ColumnNullable::deserializeAndInsertFromArena(const char * pos)
+void ColumnNullable::deserializeAndInsertFromArena(ReadBuffer & in)
 {
-    UInt8 val = unalignedLoad<UInt8>(pos);
-    pos += sizeof(val);
+    UInt8 val;
+    readBinaryLittleEndian<UInt8>(val, in);
 
     getNullMapData().push_back(val);
 
     if (val == 0)
-        pos = getNestedColumn().deserializeAndInsertFromArena(pos);
+        getNestedColumn().deserializeAndInsertFromArena(in);
     else
         getNestedColumn().insertDefault();
-
-    return pos;
 }
 
-const char * ColumnNullable::deserializeAndInsertAggregationStateValueFromArena(const char * pos)
+void ColumnNullable::deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in)
 {
-    UInt8 val = unalignedLoad<UInt8>(pos);
-    pos += sizeof(val);
+    UInt8 val;
+    readBinaryLittleEndian<UInt8>(val, in);
 
     getNullMapData().push_back(val);
 
     if (val == 0)
-        pos = getNestedColumn().deserializeAndInsertAggregationStateValueFromArena(pos);
+        getNestedColumn().deserializeAndInsertAggregationStateValueFromArena(in);
     else
         getNestedColumn().insertDefault();
-
-    return pos;
 }
 
-const char * ColumnNullable::skipSerializedInArena(const char * pos) const
+void ColumnNullable::skipSerializedInArena(ReadBuffer & in) const
 {
-    UInt8 val = unalignedLoad<UInt8>(pos);
-    pos += sizeof(val);
+    UInt8 val;
+    readBinaryLittleEndian<UInt8>(val, in);
 
     if (val == 0)
-        return getNestedColumn().skipSerializedInArena(pos);
-
-    return pos;
+        getNestedColumn().skipSerializedInArena(in);
 }
 
 #if !defined(DEBUG_OR_SANITIZER_BUILD)
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index 12398b782ad7..a0b80c03305c 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -70,9 +70,9 @@ class ColumnNullable final : public COWHelper<IColumnHelper<ColumnNullable>, Col
     StringRef serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
     char * serializeValueIntoMemory(size_t n, char * memory) const override;
     std::optional<size_t> getSerializedValueSize(size_t n) const override;
-    const char * deserializeAndInsertFromArena(const char * pos) override;
-    const char * deserializeAndInsertAggregationStateValueFromArena(const char * pos) override;
-    const char * skipSerializedInArena(const char * pos) const override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
+    void deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in) override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
 #if !defined(DEBUG_OR_SANITIZER_BUILD)
     void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
 #else
diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp
index 12c7bd54550a..5e5ee27d492b 100644
--- a/src/Columns/ColumnObject.cpp
+++ b/src/Columns/ColumnObject.cpp
@@ -14,6 +14,7 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int ATTEMPT_TO_READ_AFTER_EOF;
     extern const int NOT_IMPLEMENTED;
     extern const int LOGICAL_ERROR;
 }
@@ -984,59 +985,73 @@ void ColumnObject::serializePathAndValueIntoArena(DB::Arena & arena, const char
     res.size += sizeof(size_t) + path_size + sizeof(size_t) + value_size;
 }
 
-const char * ColumnObject::deserializeAndInsertFromArena(const char * pos)
+void ColumnObject::deserializeAndInsertFromArena(ReadBuffer & in)
 {
     /// First deserialize typed paths. They come first.
     for (auto path : sorted_typed_paths)
-        pos = typed_paths.find(path)->second->deserializeAndInsertFromArena(pos);
+        typed_paths.find(path)->second->deserializeAndInsertFromArena(in);
 
     /// Second deserialize all other paths and values and insert them into dynamic paths or shared data.
-    return deserializeDynamicPathsAndSharedDataFromArena(pos);
+    deserializeDynamicPathsAndSharedDataFromArena(in);
 }
 
-const char * ColumnObject::deserializeAndInsertAggregationStateValueFromArena(const char * pos)
+void ColumnObject::deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in)
 {
     /// First deserialize typed paths. They come first.
     for (auto path : sorted_typed_paths)
-        pos = typed_paths.find(path)->second->deserializeAndInsertAggregationStateValueFromArena(pos);
+        typed_paths.find(path)->second->deserializeAndInsertAggregationStateValueFromArena(in);
 
     /// Second deserialize all other paths and values and insert them into dynamic paths or shared data.
-    return deserializeDynamicPathsAndSharedDataFromArena(pos);
+    deserializeDynamicPathsAndSharedDataFromArena(in);
 }
 
-const char * ColumnObject::deserializeDynamicPathsAndSharedDataFromArena(const char * pos)
+void ColumnObject::deserializeDynamicPathsAndSharedDataFromArena(ReadBuffer & in)
 {
     size_t current_size = size();
-    auto num_paths = unalignedLoad<size_t>(pos);
-    pos += sizeof(size_t);
+    size_t num_paths;
+    readBinaryLittleEndian<size_t>(num_paths, in);
+
     const auto [shared_data_paths, shared_data_values] = getSharedDataPathsAndValues();
     for (size_t i = 0; i != num_paths; ++i)
     {
-        auto path_size = unalignedLoad<size_t>(pos);
-        pos += sizeof(size_t);
-        std::string_view path(pos, path_size);
-        pos += path_size;
+        size_t path_size;
+        readBinaryLittleEndian<size_t>(path_size, in);
+
+        if (in.available() < path_size)
+            throw Exception(
+                ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF,
+                "Attempt to read {} bytes for the path size, but only {} bytes are available",
+                path_size,
+                in.available());
+        std::string_view path(in.position(), path_size);
+        in.ignore(path_size);
+
         /// Deserialize binary value and try to insert it to dynamic paths or shared data.
-        auto value_size = unalignedLoad<size_t>(pos);
-        pos += sizeof(size_t);
-        std::string_view value(pos, value_size);
-        pos += value_size;
+        size_t value_size;
+        readBinaryLittleEndian<size_t>(value_size, in);
+
         /// Check if we have this path in dynamic paths.
         if (auto dynamic_it = dynamic_paths.find(path); dynamic_it != dynamic_paths.end())
         {
-            ReadBufferFromMemory buf(value.data(), value.size());
-            getDynamicSerialization()->deserializeBinary(*dynamic_it->second, buf, getFormatSettings());
+            getDynamicSerialization()->deserializeBinary(*dynamic_it->second, in, getFormatSettings());
         }
         /// Try to add a new dynamic path.
         else if (auto * dynamic_path_column = tryToAddNewDynamicPath(path))
         {
-            ReadBufferFromMemory buf(value.data(), value.size());
-            getDynamicSerialization()->deserializeBinary(*dynamic_path_column, buf, getFormatSettings());
+            getDynamicSerialization()->deserializeBinary(*dynamic_path_column, in, getFormatSettings());
         }
         /// Limit on dynamic paths is reached, add this path to shared data.
         /// Serialized paths are sorted, so we can insert right away.
         else
         {
+            if (in.available() < value_size)
+                throw Exception(
+                    ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF,
+                    "Attempt to read {} bytes for the value size, but only {} bytes are available",
+                    value_size,
+                    in.available());
+            std::string_view value(in.position(), value_size);
+            in.ignore(value_size);
             shared_data_paths->insertData(path.data(), path.size());
             shared_data_values->insertData(value.data(), value.size());
         }
@@ -1050,30 +1065,28 @@ const char * ColumnObject::deserializeDynamicPathsAndSharedDataFromArena(const c
         if (column->size() == current_size)
             column->insertDefault();
     }
-
-    return pos;
 }
 
-const char * ColumnObject::skipSerializedInArena(const char * pos) const
+void ColumnObject::skipSerializedInArena(ReadBuffer & in) const
 {
     /// First, skip all values of typed paths;
     for (auto path : sorted_typed_paths)
-        pos = typed_paths.find(path)->second->skipSerializedInArena(pos);
+        typed_paths.find(path)->second->skipSerializedInArena(in);
 
     /// Second, skip all other paths and values.
-    auto num_paths = unalignedLoad<size_t>(pos);
-    pos += sizeof(size_t);
+    size_t num_paths;
+    readBinaryLittleEndian<size_t>(num_paths, in);
+
     for (size_t i = 0; i != num_paths; ++i)
     {
-        auto path_size = unalignedLoad<size_t>(pos);
-        pos += sizeof(size_t);
-        std::string_view path(pos, path_size);
-        pos += path_size;
-        auto value_size = unalignedLoad<size_t>(pos);
-        pos += sizeof(size_t) + value_size;
-    }
+        size_t path_size;
+        readBinaryLittleEndian<size_t>(path_size, in);
+        in.ignore(path_size);
 
-    return pos;
+        size_t value_size;
+        readBinaryLittleEndian<size_t>(value_size, in);
+        in.ignore(value_size);
+    }
 }
 
 void ColumnObject::updateHashWithValue(size_t n, SipHash & hash) const
diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h
index 2f25e0a1230f..c21b77f07b33 100644
--- a/src/Columns/ColumnObject.h
+++ b/src/Columns/ColumnObject.h
@@ -145,9 +145,9 @@ class ColumnObject final : public COWHelper<IColumnHelper<ColumnObject>, ColumnO
 
     StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
     StringRef serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
-    const char * deserializeAndInsertFromArena(const char * pos) override;
-    const char * deserializeAndInsertAggregationStateValueFromArena(const char * pos) override;
-    const char * skipSerializedInArena(const char * pos) const override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
+    void deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in) override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
     std::optional<size_t> getSerializedValueSize(size_t) const override { return std::nullopt; }
 
     void updateHashWithValue(size_t n, SipHash & hash) const override;
@@ -290,7 +290,7 @@ class ColumnObject final : public COWHelper<IColumnHelper<ColumnObject>, ColumnO
     void insertFromSharedDataAndFillRemainingDynamicPaths(const ColumnObject & src_object_column, std::vector<std::string_view> && src_dynamic_paths_for_shared_data, size_t start, size_t length);
     void serializePathAndValueIntoArena(Arena & arena, const char *& begin, StringRef path, StringRef value, StringRef & res) const;
     void serializeDynamicPathsAndSharedDataIntoArena(size_t n, Arena & arena, const char *& begin, StringRef & res) const;
-    const char * deserializeDynamicPathsAndSharedDataFromArena(const char * pos);
+    void deserializeDynamicPathsAndSharedDataFromArena(ReadBuffer & in);
 
     /// Map path -> column for paths with explicitly specified types.
     /// This set of paths is constant and cannot be changed.
diff --git a/src/Columns/ColumnQBit.h b/src/Columns/ColumnQBit.h
new file mode 100644
index 000000000000..e8592160d812
--- /dev/null
+++ b/src/Columns/ColumnQBit.h
@@ -0,0 +1,190 @@
+#pragma once
+
+#include <Columns/ColumnTuple.h>
+#include <Columns/IColumn.h>
+#include <Core/Field.h>
+#include <Common/WeakHash.h>
+#include <Common/assert_cast.h>
+
+#include <base/StringRef.h>
+
+
+namespace DB
+{
+
+/** Column that represents bit-transposed vectors for efficient vector search operations.
+  *
+  * This column type stores array data in a bit-transposed layout, where bits are grouped by position
+  * rather than by vector element. For example, with Float32 vectors, there are 32 groups (one for each bit),
+  * and each group contains the corresponding bit from all vector elements.
+  *
+  * This column is designed to store the output of the transposeBits() function calls, which convert one float within
+  * a regular array into this bit-transposed format. Currently supported numeric types include:
+  * - Float64 (64 bit groups)
+  * - Float32 (32 bit groups)
+  * - BFloat16 (16 bit groups)
+  *
+  * Internal structure:
+  * - For a Float32 array, the underlying storage is a tuple of 32 FixedString columns
+  * - Each FixedString column stores all bits from a specific bit position (0-31)
+  * - The length of each FixedString equals the number of vector elements (padded to a multiple of 8)
+  *
+  * A key optimization for vector search operations is the ability to read only p bit groups
+  * (p < 32 for Float32) to reconstruct vectors with p bits of precision. This allows for:
+  * - Reduced I/O operations during search
+  * - Progressive refinement of search results
+  * - Adjustable precision-performance tradeoffs
+  *
+  * Visual representation:
+  *
+  * ┌──────┐     ┌─────────────────────────────────────────┐
+  * │ QBit │────▶│Tuple(FixedString, FixedString, ... x32) │
+  * └──────┘     └─────────────────────────────────────────┘
+  *                 │            │                │
+  *                 ▼            ▼                ▼
+  *              ┌────┐       ┌────┐           ┌────┐
+  *              │Bit0│       │Bit1│    ...    │Bit31
+  *              └────┘       └────┘           └────┘
+  *
+  * The implementation delegates most operations to the underlying tuple column,
+  * providing array-specific semantics where needed.
+  */
+class ColumnQBit final : public COWHelper<IColumnHelper<ColumnQBit>, ColumnQBit>
+{
+private:
+    friend class COWHelper<IColumnHelper<ColumnQBit>, ColumnQBit>;
+
+    /// The actual storage for the data
+    WrappedPtr tuple;
+    /// Number of elements in the original vectors. We will store dimension elements padded to a multiple of 8 (padding elements are 0)
+    size_t dimension = 0;
+
+    explicit ColumnQBit(MutableColumnPtr && tuple_, size_t dimension);
+
+
+public:
+    /** Create immutable column using immutable arguments. This arguments may be shared with other columns.
+      * Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
+      */
+    using Base = COWHelper<IColumnHelper<ColumnQBit>, ColumnQBit>;
+
+    static Ptr create(const ColumnPtr & column, size_t dimension) { return Base::create(column->assumeMutable(), dimension); }
+    static MutablePtr create(MutableColumnPtr && tuple_, size_t dimension) { return Base::create(std::move(tuple_), dimension); }
+
+    const char * getFamilyName() const override { return "QBit"; }
+    TypeIndex getDataType() const override { return TypeIndex::QBit; }
+    std::string getName() const override;
+    MutableColumnPtr cloneResized(size_t new_size) const override;
+    bool canBeInsideNullable() const override { return true; }
+
+    /// Number of rows
+    size_t size() const override { return tuple->size(); }
+    /// Number of columns in the tuple, which corresponds to the number of bit groups
+    size_t getBitsCount() const;
+    /// Number of elements in the vectors
+    size_t getDimension() const { return dimension; }
+
+    Field operator[](size_t n) const override;
+    void get(size_t n, Field & res) const override;
+    DataTypePtr getValueNameAndTypeImpl(WriteBufferFromOwnString & name_buf, size_t n, const Options & options) const override;
+
+    StringRef getDataAt(size_t n) const override { return tuple->getDataAt(n); }
+    void insertData(const char * pos, size_t length) override { tuple->insertData(pos, length); }
+    void insert(const Field & x) override { tuple->insert(x); }
+    bool tryInsert(const Field & x) override { return tuple->tryInsert(x); }
+    bool isDefaultAt(size_t n) const override { return tuple->isDefaultAt(n); }
+
+#if !defined(DEBUG_OR_SANITIZER_BUILD)
+    void insertFrom(const IColumn & src_, size_t n) override;
+    void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
+    void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
+#else
+    void doInsertFrom(const IColumn & src_, size_t n) override;
+    void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
+    void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
+#endif
+
+#if !defined(DEBUG_OR_SANITIZER_BUILD)
+    int compareAt(size_t, size_t, const IColumn &, int) const override
+#else
+    int doCompareAt(size_t, size_t, const IColumn &, int) const override
+#endif
+    {
+        return 0;
+    }
+
+    void insertDefault() override { tuple->insertDefault(); }
+    void popBack(size_t n) override { tuple->popBack(n); }
+    StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override
+    {
+        return tuple->serializeValueIntoArena(n, arena, begin);
+    }
+    char * serializeValueIntoMemory(size_t n, char * memory) const override { return tuple->serializeValueIntoMemory(n, memory); }
+    void deserializeAndInsertFromArena(ReadBuffer & in) override { tuple->deserializeAndInsertFromArena(in); }
+    void skipSerializedInArena(ReadBuffer & in) const override { tuple->skipSerializedInArena(in); }
+    void updateHashWithValue(size_t n, SipHash & hash) const override { tuple->updateHashWithValue(n, hash); }
+    void updateHashFast(SipHash & hash) const override { tuple->updateHashFast(hash); }
+    WeakHash32 getWeakHash32() const override { return tuple->getWeakHash32(); }
+
+    void expand(const Filter & mask, bool inverted) override;
+    ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
+    ColumnPtr permute(const Permutation & perm, size_t limit) const override;
+    ColumnPtr index(const IColumn & indexes, size_t limit) const override;
+    ColumnPtr replicate(const Offsets & offsets) const override;
+    ColumnPtr compress(bool force_compression) const override;
+
+    void getExtremes(Field & min, Field & max) const override { tuple->getExtremes(min, max); }
+    void getPermutation(
+        PermutationSortDirection direction,
+        PermutationSortStability stability,
+        size_t limit,
+        int nan_direction_hint,
+        Permutation & res) const override
+    {
+        tuple->getPermutation(direction, stability, limit, nan_direction_hint, res);
+    }
+
+    void updatePermutation(
+        PermutationSortDirection direction,
+        PermutationSortStability stability,
+        size_t limit,
+        int nan_direction_hint,
+        Permutation & res,
+        EqualRanges & equal_ranges) const override
+    {
+        tuple->updatePermutation(direction, stability, limit, nan_direction_hint, res, equal_ranges);
+    }
+
+    void reserve(size_t n) override { tuple->reserve(n); }
+    void prepareForSquashing(const Columns & source_columns, size_t factor) override;
+    void shrinkToFit() override { tuple->shrinkToFit(); }
+    void ensureOwnership() override { tuple->ensureOwnership(); }
+    void protect() override { tuple->protect(); }
+
+    size_t capacity() const override { return tuple->capacity(); }
+    size_t byteSize() const override { return tuple->byteSize(); }
+    size_t byteSizeAt(size_t n) const override { return tuple->byteSizeAt(n); }
+    size_t allocatedBytes() const override { return tuple->allocatedBytes(); }
+    void updateCheckpoint(ColumnCheckpoint & checkpoint) const override { tuple->updateCheckpoint(checkpoint); }
+    void rollback(const ColumnCheckpoint & checkpoint) override { tuple->rollback(checkpoint); }
+    ColumnCheckpointPtr getCheckpoint() const override { return tuple->getCheckpoint(); }
+
+    void forEachMutableSubcolumn(MutableColumnCallback callback) override;
+    void forEachMutableSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
+    void forEachSubcolumn(ColumnCallback callback) const override;
+    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
+    void finalize() override { tuple->finalize(); }
+
+    bool structureEquals(const IColumn & rhs) const override { return tuple->structureEquals(rhs); }
+    bool isFinalized() const override { return tuple->isFinalized(); }
+
+    /// Efficient access to the underlying tuple
+    const ColumnPtr & getTuple() const { return tuple; }
+    const IColumn & getTupleColumn() const { return *tuple; }
+    const ColumnTuple & getNestedData() const { return assert_cast<const ColumnTuple &>(getTupleColumn()); }
+    ColumnPtr & getTuple() { return tuple; }
+    IColumn & getTupleColumn() { return *tuple.get(); }
+    ColumnTuple & getNestedData() { return assert_cast<ColumnTuple &>(getTupleColumn()); }
+};
+
+}
diff --git a/src/Columns/ColumnReplicated.cpp b/src/Columns/ColumnReplicated.cpp
new file mode 100644
index 000000000000..7250a5a6f622
--- /dev/null
+++ b/src/Columns/ColumnReplicated.cpp
@@ -0,0 +1,696 @@
+#include <Columns/ColumnCompressed.h>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnReplicated.h>
+#include <Common/WeakHash.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
+    extern const int LOGICAL_ERROR;
+}
+
+std::atomic<UInt64> ColumnReplicated::global_id_counter = 0;
+
+ColumnReplicated::ColumnReplicated(MutableColumnPtr && nested_column_)
+    : nested_column(std::move(nested_column_)), id(global_id_counter.fetch_add(1))
+{
+    indexes.insertIndexesRange(0, nested_column->size());
+}
+
+ColumnReplicated::ColumnReplicated(MutableColumnPtr && nested_column_, MutableColumnPtr && indexes_)
+    : nested_column(std::move(nested_column_))
+    , indexes(std::move(indexes_))
+    , id(global_id_counter.fetch_add(1))
+{
+}
+
+ColumnReplicated::ColumnReplicated(MutableColumnPtr && nested_column_, ColumnIndex && indexes_)
+    : nested_column(std::move(nested_column_))
+    , indexes(std::move(indexes_))
+    , id(global_id_counter.fetch_add(1))
+{
+}
+
+MutableColumnPtr ColumnReplicated::cloneResized(size_t new_size) const
+{
+    if (new_size == 0)
+        return cloneEmpty();
+
+    if (new_size == size())
+        return create(mutate(nested_column), mutate(indexes.getIndexes()));
+
+    /// If new size is larger than current size, we fill all new rows with default value of nested column.
+    if (new_size > size())
+    {
+        auto new_nested_column = mutate(nested_column);
+        new_nested_column->insertDefault();
+        auto new_indexes = ColumnIndex(mutate(indexes.getIndexes()));
+        new_indexes.insertManyIndexes(new_nested_column->size() - 1, new_size - size());
+        return create(std::move(new_nested_column), std::move(new_indexes));
+    }
+
+    auto res = create(nested_column->cloneEmpty());
+    res->insertRangeFrom(*this, 0, new_size);
+    return res;
+}
+
+MutableColumnPtr ColumnReplicated::cloneEmpty() const
+{
+    return create(nested_column->cloneEmpty(), indexes.getIndexes()->cloneEmpty());
+}
+
+bool ColumnReplicated::isDefaultAt(size_t n) const
+{
+    return nested_column->isDefaultAt(indexes.getIndexAt(n));
+}
+
+bool ColumnReplicated::isNullAt(size_t n) const
+{
+    return nested_column->isNullAt(indexes.getIndexAt(n));
+}
+
+Field ColumnReplicated::operator[](size_t n) const
+{
+    return (*nested_column)[indexes.getIndexAt(n)];
+}
+
+void ColumnReplicated::get(size_t n, Field & res) const
+{
+    nested_column->get(indexes.getIndexAt(n), res);
+}
+
+DataTypePtr ColumnReplicated::getValueNameAndTypeImpl(WriteBufferFromOwnString & name_buf, size_t n, const IColumn::Options & options) const
+{
+    return nested_column->getValueNameAndTypeImpl(name_buf, indexes.getIndexAt(n), options);
+}
+
+bool ColumnReplicated::getBool(size_t n) const
+{
+    return nested_column->getBool(indexes.getIndexAt(n));
+}
+
+Float64 ColumnReplicated::getFloat64(size_t n) const
+{
+    return nested_column->getFloat64(indexes.getIndexAt(n));
+}
+
+Float32 ColumnReplicated::getFloat32(size_t n) const
+{
+    return nested_column->getFloat32(indexes.getIndexAt(n));
+}
+
+UInt64 ColumnReplicated::getUInt(size_t n) const
+{
+    return nested_column->getUInt(indexes.getIndexAt(n));
+}
+
+Int64 ColumnReplicated::getInt(size_t n) const
+{
+    return nested_column->getInt(indexes.getIndexAt(n));
+}
+
+UInt64 ColumnReplicated::get64(size_t n) const
+{
+    return nested_column->get64(indexes.getIndexAt(n));
+}
+
+StringRef ColumnReplicated::getDataAt(size_t n) const
+{
+    return nested_column->getDataAt(indexes.getIndexAt(n));
+}
+
+ColumnPtr ColumnReplicated::convertToFullColumnIfReplicated() const
+{
+    return nested_column->index(*indexes.getIndexes(), 0);
+}
+
+void ColumnReplicated::insertData(const char * pos, size_t length)
+{
+    nested_column->insertData(pos, length);
+    indexes.insertIndex(nested_column->size() - 1);
+}
+
+StringRef ColumnReplicated::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+{
+    return nested_column->serializeValueIntoArena(indexes.getIndexAt(n), arena, begin);
+}
+
+StringRef ColumnReplicated::serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+{
+    return nested_column->serializeAggregationStateValueIntoArena(indexes.getIndexAt(n), arena, begin);
+}
+
+char * ColumnReplicated::serializeValueIntoMemory(size_t n, char * memory) const
+{
+    return nested_column->serializeValueIntoMemory(indexes.getIndexAt(n), memory);
+}
+
+std::optional<size_t> ColumnReplicated::getSerializedValueSize(size_t n) const
+{
+    return nested_column->getSerializedValueSize(indexes.getIndexAt(n));
+}
+
+void ColumnReplicated::deserializeAndInsertFromArena(ReadBuffer & in)
+{
+    nested_column->deserializeAndInsertFromArena(in);
+    indexes.insertIndex(nested_column->size() - 1);
+}
+
+void ColumnReplicated::deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in)
+{
+    nested_column->deserializeAndInsertAggregationStateValueFromArena(in);
+    indexes.insertIndex(nested_column->size() - 1);
+}
+
+void ColumnReplicated::skipSerializedInArena(ReadBuffer & in) const
+{
+    nested_column->skipSerializedInArena(in);
+}
+
+#if !defined(DEBUG_OR_SANITIZER_BUILD)
+void ColumnReplicated::insertRangeFrom(const IColumn & src, size_t start, size_t length)
+#else
+void ColumnReplicated::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
+#endif
+{
+    if (length == 0)
+        return;
+
+    if (start + length > src.size())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Parameter out of bound in ColumnReplicated::insertRangeFrom method.");
+
+    if (const auto * src_replicated = typeid_cast<const ColumnReplicated *>(&src))
+    {
+        /// Optimization for case when we insert the whole column (may happen in squashing).
+        if (start == 0 && length == src_replicated->size())
+        {
+            indexes.insertIndexesRangeWithShift(*src_replicated->getIndexesColumn(), start, length, nested_column->size(), nested_column->size() + src_replicated->getNestedColumn()->size());
+            nested_column->insertRangeFrom(*src_replicated->getNestedColumn(), 0, src_replicated->getNestedColumn()->size());
+        }
+        else
+        {
+            /// Use insertion_cache to avoid copying of values from source nested column if
+            /// we already inserted them earlier and can use indexes of already inserted values.
+            auto & indexes_match = insertion_cache[src_replicated->id];
+
+            auto insert = [&](size_t, size_t src_index)
+            {
+                auto it = indexes_match.find(src_index);
+                if (it == indexes_match.end())
+                {
+                    nested_column->insertFrom(*src_replicated->nested_column, src_index);
+                    it = indexes_match.emplace(src_index, nested_column->size() - 1).first;
+                }
+
+                indexes.insertIndex(it->second);
+            };
+
+            src_replicated->indexes.callForIndexes(std::move(insert), start, start + length);
+        }
+    }
+    else
+    {
+        size_t old_size = nested_column->size();
+        nested_column->insertRangeFrom(src, start, length);
+        indexes.insertIndexesRange(old_size, length);
+    }
+}
+
+void ColumnReplicated::insert(const Field & x)
+{
+    nested_column->insert(x);
+    indexes.insertIndex(nested_column->size() - 1);
+}
+
+bool ColumnReplicated::tryInsert(const Field & x)
+{
+    if (nested_column->tryInsert(x))
+    {
+        indexes.insertIndex(nested_column->size() - 1);
+        return true;
+    }
+
+    return false;
+}
+
+#if !defined(DEBUG_OR_SANITIZER_BUILD)
+void ColumnReplicated::insertFrom(const IColumn & src, size_t n)
+#else
+void ColumnReplicated::doInsertFrom(const IColumn & src, size_t n)
+#endif
+{
+    if (const auto * src_replicated = typeid_cast<const ColumnReplicated *>(&src))
+    {
+        /// Use insertion_cache to avoid copying of values from source nested column if
+        /// we already inserted them earlier and can use indexes of already inserted values.
+        auto & indexes_match = insertion_cache[src_replicated->id];
+        auto src_index = src_replicated->indexes.getIndexAt(n);
+        auto it = indexes_match.find(src_index);
+        if (it == indexes_match.end())
+        {
+            nested_column->insertFrom(*src_replicated->nested_column, src_index);
+            it = indexes_match.emplace(src_index, nested_column->size() - 1).first;
+        }
+
+        indexes.insertIndex(it->second);
+    }
+    else
+    {
+        nested_column->insertFrom(src, n);
+        indexes.insertIndex(nested_column->size() - 1);
+    }
+}
+
+#if !defined(DEBUG_OR_SANITIZER_BUILD)
+void ColumnReplicated::insertManyFrom(const IColumn & src, size_t n, size_t length)
+#else
+void ColumnReplicated::doInsertManyFrom(const IColumn & src, size_t n, size_t length)
+#endif
+{
+    if (const auto * src_replicated = typeid_cast<const ColumnReplicated *>(&src))
+    {
+        /// Use insertion_cache to avoid copying of values from source nested column if
+        /// we already inserted them earlier and can use indexes of already inserted values.
+        auto & indexes_match = insertion_cache[src_replicated->id];
+        auto src_index = src_replicated->indexes.getIndexAt(n);
+        auto it = indexes_match.find(src_index);
+        if (it == indexes_match.end())
+        {
+            nested_column->insertFrom(*src_replicated->nested_column, src_index);
+            it = indexes_match.emplace(src_index, nested_column->size() - 1).first;
+        }
+
+        indexes.insertManyIndexes(it->second, length);
+    }
+    else
+    {
+        nested_column->insertFrom(src, n);
+        indexes.insertManyIndexes(nested_column->size() - 1, length);
+    }
+}
+
+void ColumnReplicated::insertDefault()
+{
+    nested_column->insertDefault();
+    indexes.insertIndex(nested_column->size() - 1);
+}
+
+void ColumnReplicated::insertManyDefaults(size_t length)
+{
+    nested_column->insertDefault();
+    indexes.insertManyIndexes(nested_column->size() - 1, length);
+}
+
+void ColumnReplicated::popBack(size_t n)
+{
+    indexes.popBack(n);
+    nested_column = indexes.removeUnusedRowsInIndexedData(std::move(nested_column));
+}
+
+ColumnPtr ColumnReplicated::filter(const Filter & filt, ssize_t result_size_hint) const
+{
+    if (size() != filt.size())
+        throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of filter ({}) doesn't match size of column ({})", filt.size(), size());
+
+    auto filtered_indexes = ColumnIndex(indexes.getIndexes()->filter(filt, result_size_hint));
+    auto filtered_nested_column = filtered_indexes.removeUnusedRowsInIndexedData(nested_column);
+    return create(filtered_nested_column, std::move(filtered_indexes));
+}
+
+void ColumnReplicated::expand(const Filter & mask, bool inverted)
+{
+    indexes.expand(mask, inverted);
+}
+
+ColumnPtr ColumnReplicated::permute(const Permutation & perm, size_t limit) const
+{
+    if (size() != perm.size())
+        throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of permutation ({}) doesn't match size of column ({})", perm.size(), size());
+
+    auto permuted_indexes = ColumnIndex(indexes.getIndexes()->permute(perm, limit));
+    auto filtered_nested_column = permuted_indexes.removeUnusedRowsInIndexedData(nested_column);
+    return create(filtered_nested_column, std::move(permuted_indexes));
+}
+
+ColumnPtr ColumnReplicated::index(const IColumn & res_indexes, size_t limit) const
+{
+    auto indexed_indexes = ColumnIndex(indexes.getIndexes()->index(res_indexes, limit));
+    auto filtered_nested_column = indexed_indexes.removeUnusedRowsInIndexedData(nested_column);
+    return create(filtered_nested_column, std::move(indexed_indexes));
+}
+
+#if !defined(DEBUG_OR_SANITIZER_BUILD)
+int ColumnReplicated::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
+#else
+int ColumnReplicated::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
+#endif
+{
+    if (const auto * rhs_replicated = typeid_cast<const ColumnReplicated *>(&rhs))
+        return nested_column->compareAt(indexes.getIndexAt(n), rhs_replicated->indexes.getIndexAt(m), *rhs_replicated->nested_column, nan_direction_hint);
+
+    return nested_column->compareAt(indexes.getIndexAt(n), m, rhs, nan_direction_hint);
+}
+
+int ColumnReplicated::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const
+{
+    if (const auto * rhs_replicated = typeid_cast<const ColumnReplicated *>(&rhs))
+        return nested_column->compareAtWithCollation(indexes.getIndexAt(n), rhs_replicated->indexes.getIndexAt(m), *rhs_replicated->nested_column, nan_direction_hint, collator);
+
+    return nested_column->compareAtWithCollation(indexes.getIndexAt(n), m, rhs, nan_direction_hint, collator);
+}
+
+bool ColumnReplicated::hasEqualValues() const
+{
+    return indexes.getIndexes()->hasEqualValues();
+}
+
+struct ColumnReplicated::ComparatorBase
+{
+    const ColumnReplicated & parent;
+    int nan_direction_hint;
+
+    ComparatorBase(const ColumnReplicated & parent_, int nan_direction_hint_)
+        : parent(parent_), nan_direction_hint(nan_direction_hint_)
+    {
+    }
+
+    ALWAYS_INLINE int compare(size_t lhs, size_t rhs) const
+    {
+        int res = parent.compareAt(lhs, rhs, parent, nan_direction_hint);
+
+        return res;
+    }
+};
+
+struct ColumnReplicated::ComparatorCollationBase
+{
+    const ColumnReplicated & parent;
+    int nan_direction_hint;
+    const Collator * collator;
+
+    ComparatorCollationBase(const ColumnReplicated & parent_, int nan_direction_hint_, const Collator * collator_)
+        : parent(parent_), nan_direction_hint(nan_direction_hint_), collator(collator_)
+    {
+    }
+
+    ALWAYS_INLINE int compare(size_t lhs, size_t rhs) const
+    {
+        int res = parent.compareAtWithCollation(lhs, rhs, parent, nan_direction_hint, *collator);
+
+        return res;
+    }
+};
+
+void ColumnReplicated::getPermutation(PermutationSortDirection direction, PermutationSortStability stability,
+                                size_t limit, int nan_direction_hint, Permutation & res) const
+{
+    if (direction == PermutationSortDirection::Ascending && stability == PermutationSortStability::Unstable)
+        getPermutationImpl(limit, res, ComparatorAscendingUnstable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
+    else if (direction == PermutationSortDirection::Ascending && stability == PermutationSortStability::Stable)
+        getPermutationImpl(limit, res, ComparatorAscendingStable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
+    else if (direction == PermutationSortDirection::Descending && stability == PermutationSortStability::Unstable)
+        getPermutationImpl(limit, res, ComparatorDescendingUnstable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
+    else if (direction == PermutationSortDirection::Descending && stability == PermutationSortStability::Stable)
+        getPermutationImpl(limit, res, ComparatorDescendingStable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
+}
+
+void ColumnReplicated::updatePermutation(PermutationSortDirection direction, PermutationSortStability stability,
+                                size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const
+{
+    auto comparator_equal = ComparatorEqual(*this, nan_direction_hint);
+
+    if (direction == PermutationSortDirection::Ascending && stability == PermutationSortStability::Unstable)
+        updatePermutationImpl(limit, res, equal_ranges, ComparatorAscendingUnstable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort());
+    else if (direction == PermutationSortDirection::Ascending && stability == PermutationSortStability::Stable)
+        updatePermutationImpl(limit, res, equal_ranges, ComparatorAscendingStable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort());
+    else if (direction == PermutationSortDirection::Descending && stability == PermutationSortStability::Unstable)
+        updatePermutationImpl(limit, res, equal_ranges, ComparatorDescendingUnstable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort());
+    else if (direction == PermutationSortDirection::Descending && stability == PermutationSortStability::Stable)
+        updatePermutationImpl(limit, res, equal_ranges, ComparatorDescendingStable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort());
+}
+
+void ColumnReplicated::getPermutationWithCollation(const Collator & collator, PermutationSortDirection direction, PermutationSortStability stability,
+                                            size_t limit, int nan_direction_hint, Permutation & res) const
+{
+    if (direction == PermutationSortDirection::Ascending && stability == PermutationSortStability::Unstable)
+        getPermutationImpl(limit, res, ComparatorCollationAscendingUnstable(*this, nan_direction_hint, &collator), DefaultSort(), DefaultPartialSort());
+    else if (direction == PermutationSortDirection::Ascending && stability == PermutationSortStability::Stable)
+        getPermutationImpl(limit, res, ComparatorCollationAscendingStable(*this, nan_direction_hint, &collator), DefaultSort(), DefaultPartialSort());
+    else if (direction == PermutationSortDirection::Descending && stability == PermutationSortStability::Unstable)
+        getPermutationImpl(limit, res, ComparatorCollationDescendingUnstable(*this, nan_direction_hint, &collator), DefaultSort(), DefaultPartialSort());
+    else if (direction == PermutationSortDirection::Descending && stability == PermutationSortStability::Stable)
+        getPermutationImpl(limit, res, ComparatorCollationDescendingStable(*this, nan_direction_hint, &collator), DefaultSort(), DefaultPartialSort());
+}
+
+void ColumnReplicated::updatePermutationWithCollation(const Collator & collator, PermutationSortDirection direction, PermutationSortStability stability,
+                                            size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const
+{
+    auto comparator_equal = ComparatorCollationEqual(*this, nan_direction_hint, &collator);
+
+    if (direction == PermutationSortDirection::Ascending && stability == PermutationSortStability::Unstable)
+        updatePermutationImpl(
+            limit,
+            res,
+            equal_ranges,
+            ComparatorCollationAscendingUnstable(*this, nan_direction_hint, &collator),
+            comparator_equal,
+            DefaultSort(),
+            DefaultPartialSort());
+    else if (direction == PermutationSortDirection::Ascending && stability == PermutationSortStability::Stable)
+        updatePermutationImpl(
+            limit,
+            res,
+            equal_ranges,
+            ComparatorCollationAscendingStable(*this, nan_direction_hint, &collator),
+            comparator_equal,
+            DefaultSort(),
+            DefaultPartialSort());
+    else if (direction == PermutationSortDirection::Descending && stability == PermutationSortStability::Unstable)
+        updatePermutationImpl(
+            limit,
+            res,
+            equal_ranges,
+            ComparatorCollationDescendingUnstable(*this, nan_direction_hint, &collator),
+            comparator_equal,
+            DefaultSort(),
+            DefaultPartialSort());
+    else if (direction == PermutationSortDirection::Descending && stability == PermutationSortStability::Stable)
+        updatePermutationImpl(
+            limit,
+            res,
+            equal_ranges,
+            ComparatorCollationDescendingStable(*this, nan_direction_hint, &collator),
+            comparator_equal,
+            DefaultSort(),
+            DefaultPartialSort());
+}
+
+size_t ColumnReplicated::byteSize() const
+{
+    return indexes.getIndexes()->byteSize() + nested_column->byteSize();
+}
+
+size_t ColumnReplicated::byteSizeAt(size_t n) const
+{
+    return nested_column->byteSizeAt(indexes.getIndexAt(n));
+}
+
+size_t ColumnReplicated::allocatedBytes() const
+{
+    return indexes.getIndexes()->allocatedBytes() + nested_column->allocatedBytes();
+}
+
+void ColumnReplicated::protect()
+{
+    indexes.getIndexesPtr()->protect();
+    nested_column->protect();
+}
+
+ColumnPtr ColumnReplicated::replicate(const Offsets & offsets) const
+{
+    auto replicated_indexes = ColumnIndex(indexes.getIndexes()->replicate(offsets));
+    auto filtered_nested_column = replicated_indexes.removeUnusedRowsInIndexedData(nested_column);
+    return create(filtered_nested_column, std::move(replicated_indexes));
+}
+
+void ColumnReplicated::updateHashWithValue(size_t n, SipHash & hash) const
+{
+    nested_column->updateHashWithValue(indexes.getIndexAt(n), hash);
+}
+
+WeakHash32 ColumnReplicated::getWeakHash32() const
+{
+    WeakHash32 nested_column_hash = nested_column->getWeakHash32();
+    return indexes.getWeakHash(nested_column_hash);
+}
+
+void ColumnReplicated::updateHashFast(SipHash & hash) const
+{
+    indexes.getIndexes()->updateHashFast(hash);
+    nested_column->updateHashFast(hash);
+}
+
+void ColumnReplicated::getExtremes(Field & min, Field & max) const
+{
+    /// It might happen that some indexes are unused, so we cannot call nested_column->getExtremes.
+    nested_column->index(*indexes.getIndexes(), 0)->getExtremes(min, max);
+}
+
+void ColumnReplicated::getIndicesOfNonDefaultRows(Offsets & result_indexes, size_t from, size_t limit) const
+{
+    PaddedPODArray<UInt8> default_values_mask(nested_column->size());
+    for (size_t i = 0; i != nested_column->size(); ++i)
+        default_values_mask[i] = !nested_column->isDefaultAt(i);
+
+    size_t to = limit && from + limit < size() ? from + limit : size();
+    indexes.getIndexesByMask(result_indexes, default_values_mask, from, to);
+}
+
+UInt64 ColumnReplicated::getNumberOfDefaultRows() const
+{
+    std::unordered_set<size_t> indexes_of_default_values;
+    for (size_t i = 0; i != nested_column->size(); ++i)
+    {
+        if (nested_column->isDefaultAt(i))
+            indexes_of_default_values.insert(i);
+    }
+
+    size_t result = 0;
+    auto add = [&](size_t, size_t index)
+    {
+        result += indexes_of_default_values.contains(index);
+    };
+
+    indexes.callForIndexes(std::move(add), 0, size());
+    return result;
+}
+
+ColumnPtr ColumnReplicated::compress(bool force_compression) const
+{
+    auto nested_column_compressed = nested_column->compress(force_compression);
+    auto indexes_compressed = indexes.getIndexes()->compress(force_compression);
+
+    size_t byte_size = nested_column_compressed->byteSize() + indexes_compressed->byteSize();
+
+    return ColumnCompressed::create(size(), byte_size,
+        [my_nested_column_compressed = std::move(nested_column_compressed), my_indexes_compressed = std::move(indexes_compressed)]
+        {
+            return ColumnReplicated::create(my_nested_column_compressed->decompress(), my_indexes_compressed->decompress());
+        });
+}
+
+ColumnCheckpointPtr ColumnReplicated::getCheckpoint() const
+{
+    return std::make_shared<ColumnCheckpointWithNested>(size(), nested_column->getCheckpoint());
+}
+
+void ColumnReplicated::updateCheckpoint(ColumnCheckpoint & checkpoint) const
+{
+    checkpoint.size = size();
+    nested_column->updateCheckpoint(*assert_cast<ColumnCheckpointWithNested &>(checkpoint).nested);
+}
+
+void ColumnReplicated::rollback(const ColumnCheckpoint & checkpoint)
+{
+    const auto & nested = *assert_cast<const ColumnCheckpointWithNested &>(checkpoint).nested;
+
+    nested_column->rollback(nested);
+    indexes.resizeAssumeReserve(nested.size);
+}
+
+void ColumnReplicated::forEachMutableSubcolumn(MutableColumnCallback callback)
+{
+    callback(nested_column);
+    callback(indexes.getIndexesPtr());
+}
+
+void ColumnReplicated::forEachMutableSubcolumnRecursively(RecursiveMutableColumnCallback callback)
+{
+    callback(*nested_column);
+    nested_column->forEachMutableSubcolumnRecursively(callback);
+    callback(*indexes.getIndexesPtr());
+    indexes.getIndexesPtr()->forEachMutableSubcolumnRecursively(callback);
+}
+
+void ColumnReplicated::forEachSubcolumn(ColumnCallback callback) const
+{
+    callback(nested_column);
+    callback(indexes.getIndexes());
+}
+
+void ColumnReplicated::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
+{
+    callback(*nested_column);
+    nested_column->forEachSubcolumnRecursively(callback);
+    callback(*indexes.getIndexes());
+    indexes.getIndexes()->forEachSubcolumnRecursively(callback);
+}
+
+bool ColumnReplicated::structureEquals(const IColumn & rhs) const
+{
+    if (const auto * rhs_replicated = typeid_cast<const ColumnReplicated *>(&rhs))
+        return nested_column->structureEquals(*rhs_replicated->nested_column);
+    return false;
+}
+
+void ColumnReplicated::takeDynamicStructureFromSourceColumns(const Columns & source_columns, std::optional<size_t> max_dynamic_subcolumns)
+{
+    Columns source_nested_columns;
+    source_nested_columns.reserve(source_columns.size());
+    for (const auto & source_column : source_columns)
+    {
+        if (const auto * rhs_replicated = typeid_cast<const ColumnReplicated *>(source_column.get()))
+            source_nested_columns.emplace_back(rhs_replicated->nested_column);
+        else
+            source_nested_columns.emplace_back(source_column);
+    }
+
+    nested_column->takeDynamicStructureFromSourceColumns(source_nested_columns, max_dynamic_subcolumns);
+}
+
+void ColumnReplicated::takeDynamicStructureFromColumn(const ColumnPtr & source_column)
+{
+    if (const auto * rhs_replicated = typeid_cast<const ColumnReplicated *>(source_column.get()))
+        nested_column->takeDynamicStructureFromColumn(rhs_replicated->nested_column);
+    else
+        nested_column->takeDynamicStructureFromColumn(source_column);
+}
+
+namespace
+{
+
+template <typename T>
+ColumnPtr convertOffsetsToIndexesImpl(const IColumn::Offsets & offsets)
+{
+    auto result = ColumnVector<T>::create();
+    auto & data = result->getData();
+    data.reserve_exact(offsets.back());
+    for (size_t i = 0; i != offsets.size(); ++i)
+        data.resize_fill(data.size() + offsets[i] - offsets[i - 1], i);
+    return result;
+}
+
+}
+
+ColumnPtr convertOffsetsToIndexes(const IColumn::Offsets & offsets)
+{
+    size_t max_index = offsets.size();
+    if (max_index <= std::numeric_limits<UInt8>::max())
+        return convertOffsetsToIndexesImpl<UInt8>(offsets);
+    if (max_index <= std::numeric_limits<UInt16>::max())
+        return convertOffsetsToIndexesImpl<UInt16>(offsets);
+    if (max_index <= std::numeric_limits<UInt32>::max())
+        return convertOffsetsToIndexesImpl<UInt32>(offsets);
+    return convertOffsetsToIndexesImpl<UInt64>(offsets);
+}
+
+bool isLazyReplicationUseful(const ColumnPtr & column)
+{
+    return !column->isConst() && !column->isReplicated() && !column->lowCardinality() && (!column->isFixedAndContiguous() || column->sizeOfValueIfFixed() > 8);
+}
+
+
+}
diff --git a/src/Columns/ColumnReplicated.h b/src/Columns/ColumnReplicated.h
new file mode 100644
index 000000000000..bde48cdac5d0
--- /dev/null
+++ b/src/Columns/ColumnReplicated.h
@@ -0,0 +1,224 @@
+#pragma once
+
+#include <Columns/IColumn.h>
+#include <Columns/ColumnIndex.h>
+
+class Collator;
+
+namespace DB
+{
+
+
+/** Column for replicated representation.
+ *  It stores original column and indexes in this column.
+ *  Used to perform lazy column replication.
+ */
+class ColumnReplicated final : public COWHelper<IColumnHelper<ColumnReplicated>, ColumnReplicated>
+{
+private:
+    friend class COWHelper<IColumnHelper<ColumnReplicated>, ColumnReplicated>;
+
+    explicit ColumnReplicated(MutableColumnPtr && nested_column_);
+    ColumnReplicated(MutableColumnPtr && nested_column_, MutableColumnPtr && indexes_);
+    ColumnReplicated(MutableColumnPtr && nested_column_, ColumnIndex && indexes_);
+    ColumnReplicated(const ColumnReplicated &) = default;
+
+public:
+    using Base = COWHelper<IColumnHelper<ColumnReplicated>, ColumnReplicated>;
+
+    static Ptr create(const ColumnPtr & nested_column_, const ColumnPtr & indexes_)
+    {
+        return Base::create(nested_column_->assumeMutable(), indexes_->assumeMutable());
+    }
+
+    static Ptr create(const ColumnPtr & nested_column_)
+    {
+        return Base::create(nested_column_->assumeMutable());
+    }
+
+    static MutablePtr create(MutableColumnPtr && nested_column_, MutableColumnPtr && indexes_)
+    {
+        return Base::create(std::move(nested_column_), std::move(indexes_));
+    }
+
+    static MutablePtr create(MutableColumnPtr && nested_column_)
+    {
+        return Base::create(std::move(nested_column_));
+    }
+
+    static MutablePtr create(MutableColumnPtr && nested_column_, ColumnIndex && indexes_)
+    {
+        return Base::create(std::move(nested_column_), std::move(indexes_));
+    }
+
+    static Ptr create(ColumnPtr & nested_column_, ColumnIndex && indexes_)
+    {
+        return Base::create(nested_column_->assumeMutable(), std::move(indexes_));
+    }
+
+    bool isReplicated() const override { return true; }
+    const char * getFamilyName() const override { return "Replicated"; }
+    std::string getName() const override { return "Replicated(" + nested_column->getName() + ")"; }
+    TypeIndex getDataType() const override { return nested_column->getDataType(); }
+    MutableColumnPtr cloneResized(size_t new_size) const override;
+    MutableColumnPtr cloneEmpty() const override;
+    size_t size() const override { return indexes.getIndexes()->size(); }
+    bool isDefaultAt(size_t n) const override;
+    bool isNullAt(size_t n) const override;
+    Field operator[](size_t n) const override;
+    void get(size_t n, Field & res) const override;
+    DataTypePtr getValueNameAndTypeImpl(WriteBufferFromOwnString & name_buf, size_t n, const IColumn::Options & options) const override;
+    bool getBool(size_t n) const override;
+    Float64 getFloat64(size_t n) const override;
+    Float32 getFloat32(size_t n) const override;
+    UInt64 getUInt(size_t n) const override;
+    Int64 getInt(size_t n) const override;
+    UInt64 get64(size_t n) const override;
+    StringRef getDataAt(size_t n) const override;
+
+    ColumnPtr convertToFullColumnIfReplicated() const override;
+
+    void insertData(const char * pos, size_t length) override;
+    StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+    StringRef serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+    char * serializeValueIntoMemory(size_t n, char * memory) const override;
+    std::optional<size_t> getSerializedValueSize(size_t n) const override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
+    void deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in) override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
+#if !defined(DEBUG_OR_SANITIZER_BUILD)
+    void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
+#else
+    void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
+#endif
+    void insert(const Field & x) override;
+    bool tryInsert(const Field & x) override;
+#if !defined(DEBUG_OR_SANITIZER_BUILD)
+    void insertFrom(const IColumn & src, size_t n) override;
+#else
+    void doInsertFrom(const IColumn & src, size_t n) override;
+#endif
+#if !defined(DEBUG_OR_SANITIZER_BUILD)
+    void insertManyFrom(const IColumn & src, size_t n, size_t length) override;
+#else
+    void doInsertManyFrom(const IColumn & src, size_t n, size_t length) override;
+#endif
+    void insertDefault() override;
+    void insertManyDefaults(size_t length) override;
+
+    void popBack(size_t n) override;
+    ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
+    void expand(const Filter & mask, bool inverted) override;
+    ColumnPtr permute(const Permutation & perm, size_t limit) const override;
+
+    ColumnPtr index(const IColumn & res_indexes, size_t limit) const override;
+
+#if !defined(DEBUG_OR_SANITIZER_BUILD)
+    int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
+#else
+    int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
+#endif
+
+    int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override;
+    bool hasEqualValues() const override;
+
+
+    struct ComparatorBase;
+
+    using ComparatorAscendingUnstable = ComparatorAscendingUnstableImpl<ComparatorBase>;
+    using ComparatorAscendingStable = ComparatorAscendingStableImpl<ComparatorBase>;
+    using ComparatorDescendingUnstable = ComparatorDescendingUnstableImpl<ComparatorBase>;
+    using ComparatorDescendingStable = ComparatorDescendingStableImpl<ComparatorBase>;
+    using ComparatorEqual = ComparatorEqualImpl<ComparatorBase>;
+
+    struct ComparatorCollationBase;
+
+    using ComparatorCollationAscendingUnstable = ComparatorAscendingUnstableImpl<ComparatorCollationBase>;
+    using ComparatorCollationAscendingStable = ComparatorAscendingStableImpl<ComparatorCollationBase>;
+    using ComparatorCollationDescendingUnstable = ComparatorDescendingUnstableImpl<ComparatorCollationBase>;
+    using ComparatorCollationDescendingStable = ComparatorDescendingStableImpl<ComparatorCollationBase>;
+    using ComparatorCollationEqual = ComparatorEqualImpl<ComparatorCollationBase>;
+
+    void getPermutation(PermutationSortDirection direction, PermutationSortStability stability,
+                        size_t limit, int nan_direction_hint, Permutation & res) const override;
+
+    void updatePermutation(PermutationSortDirection direction, PermutationSortStability stability,
+                        size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const override;
+
+    void getPermutationWithCollation(const Collator & collator, PermutationSortDirection direction, PermutationSortStability stability,
+                        size_t limit, int nan_direction_hint, Permutation & res) const override;
+
+    void updatePermutationWithCollation(const Collator & collator, PermutationSortDirection direction, PermutationSortStability stability,
+                        size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
+
+    size_t byteSize() const override;
+    size_t byteSizeAt(size_t n) const override;
+    size_t allocatedBytes() const override;
+    void protect() override;
+    ColumnPtr replicate(const Offsets & offsets) const override;
+    void updateHashWithValue(size_t n, SipHash & hash) const override;
+    WeakHash32 getWeakHash32() const override;
+    void updateHashFast(SipHash & hash) const override;
+    void getExtremes(Field & min, Field & max) const override;
+
+    void getIndicesOfNonDefaultRows(Offsets & result_indexes, size_t from, size_t limit) const override;
+    UInt64 getNumberOfDefaultRows() const override;
+
+    ColumnPtr compress(bool force_compression) const override;
+
+    ColumnCheckpointPtr getCheckpoint() const override;
+    void updateCheckpoint(ColumnCheckpoint & checkpoint) const override;
+    void rollback(const ColumnCheckpoint & checkpoint) override;
+
+    void forEachMutableSubcolumn(MutableColumnCallback callback) override;
+    void forEachMutableSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
+    void forEachSubcolumn(ColumnCallback callback) const override;
+    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
+
+    bool structureEquals(const IColumn & rhs) const override;
+
+    bool isNullable() const override { return nested_column->isNullable(); }
+    bool isFixedAndContiguous() const override { return false; }
+    bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); }
+    size_t sizeOfValueIfFixed() const override { return nested_column->sizeOfValueIfFixed(); }
+    bool isCollationSupported() const override { return nested_column->isCollationSupported(); }
+
+    bool hasDynamicStructure() const override { return nested_column->hasDynamicStructure(); }
+    void takeDynamicStructureFromSourceColumns(const Columns & source_columns, std::optional<size_t> max_dynamic_subcolumns) override;
+    void takeDynamicStructureFromColumn(const ColumnPtr & source_column) override;
+
+    const ColumnIndex & getIndexes() const { return indexes; }
+    ColumnIndex & getIndexes() { return indexes; }
+
+    const ColumnPtr & getIndexesColumn() const { return indexes.getIndexes(); }
+
+    const ColumnPtr & getNestedColumn() const { return nested_column; }
+    WrappedPtr & getNestedColumn() { return nested_column; }
+
+private:
+    WrappedPtr nested_column;
+    ColumnIndex indexes;
+
+    /// Unique id taken from static global_id_counter field at creation.
+    /// It's used as the key in the insertion cache.
+    UInt64 id;
+    /// During inserts into ColumnReplicated from another ColumnReplicated we remember
+    /// what values at what index we already inserted to avoid copying of these values on each call
+    /// of insertFrom/insertRangeFrom/insertManyFrom.
+    /// It helps to reduce memory usage during sorting/merge-sorting of replicated columns where
+    /// we create empty ColumnReplicated and do insertFrom/insertRangeFrom/insertManyFrom from
+    /// source columns.
+    /// Mapping is the following: id -> (source_index -> inserted_index).
+    std::unordered_map<UInt64, std::unordered_map<size_t, size_t>> insertion_cache;
+
+    /// Global counter used to create a unique id for each ColumnReplicated instance.
+    static std::atomic<UInt64> global_id_counter;
+};
+
+ColumnPtr recursiveRemoveReplicated(const ColumnPtr & column);
+ColumnPtr convertOffsetsToIndexes(const IColumn::Offsets & offsets);
+
+/// For some columns like Const/LowCardinality/Int* lazy replication is useless and can lead to worse performance.
+bool isLazyReplicationUseful(const ColumnPtr & column);
+
+}
diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp
index 86318a8184b5..0eaaae97c165 100644
--- a/src/Columns/ColumnSparse.cpp
+++ b/src/Columns/ColumnSparse.cpp
@@ -178,23 +178,19 @@ std::optional<size_t> ColumnSparse::getSerializedValueSize(size_t n) const
     return values->getSerializedValueSize(getValueIndex(n));
 }
 
-const char * ColumnSparse::deserializeAndInsertFromArena(const char * pos)
+void ColumnSparse::deserializeAndInsertFromArena(ReadBuffer & in)
 {
-    const char * res = nullptr;
-    insertSingleValue([&](IColumn & column) { res = column.deserializeAndInsertFromArena(pos); });
-    return res;
+    insertSingleValue([&](IColumn & column) { column.deserializeAndInsertFromArena(in); });
 }
 
-const char * ColumnSparse::deserializeAndInsertAggregationStateValueFromArena(const char * pos)
+void ColumnSparse::deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in)
 {
-    const char * res = nullptr;
-    insertSingleValue([&](IColumn & column) { res = column.deserializeAndInsertAggregationStateValueFromArena(pos); });
-    return res;
+    insertSingleValue([&](IColumn & column) { column.deserializeAndInsertAggregationStateValueFromArena(in); });
 }
 
-const char * ColumnSparse::skipSerializedInArena(const char * pos) const
+void ColumnSparse::skipSerializedInArena(ReadBuffer & in) const
 {
-    return values->skipSerializedInArena(pos);
+    values->skipSerializedInArena(in);
 }
 
 #if !defined(DEBUG_OR_SANITIZER_BUILD)
diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h
index ab1cecfc51d7..f51982bd2f2f 100644
--- a/src/Columns/ColumnSparse.h
+++ b/src/Columns/ColumnSparse.h
@@ -82,9 +82,9 @@ class ColumnSparse final : public COWHelper<IColumnHelper<ColumnSparse>, ColumnS
     StringRef serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
     char * serializeValueIntoMemory(size_t n, char * memory) const override;
     std::optional<size_t> getSerializedValueSize(size_t n) const override;
-    const char * deserializeAndInsertFromArena(const char * pos) override;
-    const char * deserializeAndInsertAggregationStateValueFromArena(const char * pos) override;
-    const char * skipSerializedInArena(const char *) const override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
+    void deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in) override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
 #if !defined(DEBUG_OR_SANITIZER_BUILD)
     void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
 #else
diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp
index c54bf5d1da3d..11f6c21385fc 100644
--- a/src/Columns/ColumnString.cpp
+++ b/src/Columns/ColumnString.cpp
@@ -317,40 +317,39 @@ void ColumnString::batchSerializeValueIntoMemory(std::vector<char *> & memories)
     }
 }
 
-const char * ColumnString::deserializeAndInsertFromArena(const char * pos)
+void ColumnString::deserializeAndInsertFromArena(ReadBuffer & in)
 {
-    const size_t string_size = unalignedLoad<size_t>(pos);
-    pos += sizeof(string_size);
+    size_t string_size;
+    readBinaryLittleEndian<size_t>(string_size, in);
 
     const size_t old_size = chars.size();
     const size_t new_size = old_size + string_size;
     chars.resize(new_size);
-    memcpy(chars.data() + old_size, pos, string_size);
+    in.readStrict(reinterpret_cast<char *>(chars.data() + old_size), string_size);
 
     offsets.push_back(new_size);
-    return pos + string_size;
 }
 
-const char * ColumnString::deserializeAndInsertAggregationStateValueFromArena(const char * pos)
+void ColumnString::deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in)
 {
     /// Serialized value contains string values with 0 byte at the end for compatibility.
-    const size_t string_size_with_zero_byte = unalignedLoad<size_t>(pos);
-    pos += sizeof(string_size_with_zero_byte);
+    size_t string_size_with_zero_byte;
+    readBinaryLittleEndian<size_t>(string_size_with_zero_byte, in);
 
     const size_t old_size = chars.size();
     const size_t new_size = old_size + string_size_with_zero_byte - 1;
     chars.resize(new_size);
-    memcpy(chars.data() + old_size, pos, string_size_with_zero_byte - 1);
+    in.readStrict(reinterpret_cast<char *>(chars.data() + old_size), string_size_with_zero_byte - 1);
+    in.ignore(1); /// ignore the 0 byte at the end.
 
     offsets.push_back(new_size);
-    return pos + string_size_with_zero_byte;
 }
 
-const char * ColumnString::skipSerializedInArena(const char * pos) const
+void ColumnString::skipSerializedInArena(ReadBuffer & in) const
 {
-    const size_t string_size = unalignedLoad<size_t>(pos);
-    pos += sizeof(string_size);
-    return pos + string_size;
+    size_t string_size;
+    readBinaryLittleEndian<size_t>(string_size, in);
+    in.ignore(string_size);
 }
 
 ColumnPtr ColumnString::index(const IColumn & indexes, size_t limit) const
diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h
index 977576fe7b27..807f75de0b1f 100644
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@@ -212,10 +212,10 @@ class ColumnString final : public COWHelper<IColumnHelper<ColumnString>, ColumnS
 
     void batchSerializeValueIntoMemory(std::vector<char *> & memories) const override;
 
-    const char * deserializeAndInsertFromArena(const char * pos) override;
-    const char * deserializeAndInsertAggregationStateValueFromArena(const char * pos) override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
+    void deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in) override;
 
-    const char * skipSerializedInArena(const char * pos) const override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
 
     void updateHashWithValue(size_t n, SipHash & hash) const override;
 
diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp
index d7f89b03d064..8da05b044c91 100644
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@@ -352,6 +352,12 @@ StringRef ColumnTuple::serializeAggregationStateValueIntoArena(size_t n, Arena &
 
 char * ColumnTuple::serializeValueIntoMemory(size_t n, char * memory) const
 {
+    if (columns.empty())
+    {
+        *memory = 0;
+        return memory + 1;
+    }
+
     for (const auto & column : columns)
         memory = column->serializeValueIntoMemory(n, memory);
 
@@ -360,6 +366,9 @@ char * ColumnTuple::serializeValueIntoMemory(size_t n, char * memory) const
 
 std::optional<size_t> ColumnTuple::getSerializedValueSize(size_t n) const
 {
+    if (columns.empty())
+        return 1;
+
     size_t res = 0;
     for (const auto & column : columns)
     {
@@ -373,38 +382,44 @@ std::optional<size_t> ColumnTuple::getSerializedValueSize(size_t n) const
 }
 
 
-const char * ColumnTuple::deserializeAndInsertFromArena(const char * pos)
+void ColumnTuple::deserializeAndInsertFromArena(ReadBuffer & in)
 {
     ++column_length;
 
     if (columns.empty())
-        return pos + 1;
+    {
+        in.ignore(1);
+        return;
+    }
 
     for (auto & column : columns)
-        pos = column->deserializeAndInsertFromArena(pos);
-
-    return pos;
+        column->deserializeAndInsertFromArena(in);
 }
 
-const char * ColumnTuple::deserializeAndInsertAggregationStateValueFromArena(const char * pos)
+void ColumnTuple::deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in)
 {
     ++column_length;
 
     if (columns.empty())
-        return pos + 1;
+    {
+        in.ignore(1);
+        return;
+    }
 
     for (auto & column : columns)
-        pos = column->deserializeAndInsertAggregationStateValueFromArena(pos);
-
-    return pos;
+        column->deserializeAndInsertAggregationStateValueFromArena(in);
 }
 
-const char * ColumnTuple::skipSerializedInArena(const char * pos) const
+void ColumnTuple::skipSerializedInArena(ReadBuffer & in) const
 {
-    for (const auto & column : columns)
-        pos = column->skipSerializedInArena(pos);
+    if (columns.empty())
+    {
+        in.ignore(1);
+        return;
+    }
 
-    return pos;
+    for (const auto & column : columns)
+        column->skipSerializedInArena(in);
 }
 
 void ColumnTuple::updateHashWithValue(size_t n, SipHash & hash) const
diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h
index f6a9879e1610..14758ae6bee5 100644
--- a/src/Columns/ColumnTuple.h
+++ b/src/Columns/ColumnTuple.h
@@ -81,9 +81,9 @@ class ColumnTuple final : public COWHelper<IColumnHelper<ColumnTuple>, ColumnTup
     StringRef serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
     char * serializeValueIntoMemory(size_t n, char * memory) const override;
     std::optional<size_t> getSerializedValueSize(size_t n) const override;
-    const char * deserializeAndInsertFromArena(const char * pos) override;
-    const char * deserializeAndInsertAggregationStateValueFromArena(const char * pos) override;
-    const char * skipSerializedInArena(const char * pos) const override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
+    void deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in) override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
     void updateHashWithValue(size_t n, SipHash & hash) const override;
     WeakHash32 getWeakHash32() const override;
     void updateHashFast(SipHash & hash) const override;
diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h
index 3cefa4eec67c..c3a533ccaef7 100644
--- a/src/Columns/ColumnUnique.h
+++ b/src/Columns/ColumnUnique.h
@@ -27,6 +27,7 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int ATTEMPT_TO_READ_AFTER_EOF;
     extern const int LOGICAL_ERROR;
     extern const int ILLEGAL_COLUMN;
     extern const int NOT_IMPLEMENTED;
@@ -65,8 +66,8 @@ class ColumnUnique final : public COWHelper<IColumnUnique, ColumnUnique<ColumnTy
     IColumnUnique::IndexesWithOverflow uniqueInsertRangeWithOverflow(const IColumn & src, size_t start, size_t length,
                                                                      size_t max_dictionary_size) override;
     size_t uniqueInsertData(const char * pos, size_t length) override;
-    size_t uniqueDeserializeAndInsertFromArena(const char * pos, const char *& new_pos) override;
-    size_t uniqueDeserializeAndInsertAggregationStateValueFromArena(const char * pos, const char *& new_pos) override;
+    size_t uniqueDeserializeAndInsertFromArena(ReadBuffer & in) override;
+    size_t uniqueDeserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in) override;
 
     size_t getDefaultValueIndex() const override { return 0; }
     size_t getNullValueIndex() const override;
@@ -91,7 +92,7 @@ class ColumnUnique final : public COWHelper<IColumnUnique, ColumnUnique<ColumnTy
     void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const override;
     StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
     char * serializeValueIntoMemory(size_t n, char * memory) const override;
-    const char * skipSerializedInArena(const char * pos) const override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
     StringRef serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
     void updateHashWithValue(size_t n, SipHash & hash_func) const override;
 
@@ -520,68 +521,79 @@ StringRef ColumnUnique<ColumnType>::serializeAggregationStateValueIntoArena(size
 }
 
 template <typename ColumnType>
-size_t ColumnUnique<ColumnType>::uniqueDeserializeAndInsertFromArena(const char * pos, const char *& new_pos)
+size_t ColumnUnique<ColumnType>::uniqueDeserializeAndInsertFromArena(ReadBuffer & in)
 {
     if (is_nullable)
     {
-        UInt8 val = unalignedLoad<UInt8>(pos);
-        pos += sizeof(val);
+        UInt8 val;
+        readBinaryLittleEndian<UInt8>(val, in);
 
         if (val)
-        {
-            new_pos = pos;
             return getNullValueIndex();
-        }
     }
 
     /// Numbers, FixedString
     if (size_of_value_if_fixed)
     {
-        new_pos = pos + size_of_value_if_fixed;
-        return uniqueInsertData(pos, size_of_value_if_fixed);
+        if (in.available() < size_of_value_if_fixed)
+            throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Not enough data to deserialize fixed size value in ColumnUnique.");
+
+        size_t ret = uniqueInsertData(in.position(), size_of_value_if_fixed);
+        in.ignore(size_of_value_if_fixed);
+        return ret;
     }
 
     /// String
-    const size_t string_size = unalignedLoad<size_t>(pos);
-    pos += sizeof(string_size);
-    new_pos = pos + string_size;
-
-    return uniqueInsertData(pos, string_size);
+    size_t string_size;
+    readBinaryLittleEndian<size_t>(string_size, in);
+    if (in.available() < string_size)
+        throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Not enough data to deserialize string value in ColumnUnique.");
+
+    size_t ret = uniqueInsertData(in.position(), string_size);
+    in.ignore(string_size);
+    return ret;
 }
 
 template <typename ColumnType>
-size_t ColumnUnique<ColumnType>::uniqueDeserializeAndInsertAggregationStateValueFromArena(const char * pos, const char *& new_pos)
+size_t ColumnUnique<ColumnType>::uniqueDeserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in)
 {
     if (is_nullable)
     {
-        UInt8 val = unalignedLoad<UInt8>(pos);
-        pos += sizeof(val);
+        UInt8 val;
+        readBinaryLittleEndian<UInt8>(val, in);
 
         if (val)
-        {
-            new_pos = pos;
             return getNullValueIndex();
-        }
+
     }
 
     /// Numbers, FixedString
     if (size_of_value_if_fixed)
     {
-        new_pos = pos + size_of_value_if_fixed;
-        return uniqueInsertData(pos, size_of_value_if_fixed);
+        if (in.available() < size_of_value_if_fixed)
+            throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Not enough data to deserialize fixed size value in ColumnUnique.");
+
+        size_t ret = uniqueInsertData(in.position(), size_of_value_if_fixed);
+        in.ignore(size_of_value_if_fixed);
+        return ret;
+
     }
 
     /// String
     /// For compatibility, serialized string value contains zero byte at the end, we just ignore this byte.
-    const size_t string_size_with_zero_byte = unalignedLoad<size_t>(pos);
-    pos += sizeof(string_size_with_zero_byte);
-    new_pos = pos + string_size_with_zero_byte;
+    size_t string_size_with_zero_byte;
+    readBinaryLittleEndian<size_t>(string_size_with_zero_byte, in);
+    if (in.available() < string_size_with_zero_byte)
+        throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Not enough data to deserialize string value in ColumnUnique.");
+
+    size_t ret = uniqueInsertData(in.position(), string_size_with_zero_byte - 1);
+    in.ignore(string_size_with_zero_byte);
 
-    return uniqueInsertData(pos, string_size_with_zero_byte - 1);
+    return ret;
 }
 
 template <typename ColumnType>
-const char * ColumnUnique<ColumnType>::skipSerializedInArena(const char *) const
+void ColumnUnique<ColumnType>::skipSerializedInArena(ReadBuffer &) const
 {
     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method skipSerializedInArena is not supported for {}", this->getName());
 }
diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp
index 0d6e7365c5d9..111a501a739f 100644
--- a/src/Columns/ColumnVariant.cpp
+++ b/src/Columns/ColumnVariant.cpp
@@ -822,48 +822,51 @@ StringRef ColumnVariant::serializeAggregationStateValueIntoArena(size_t n, Arena
     return res;
 }
 
-const char * ColumnVariant::deserializeAndInsertFromArena(const char * pos)
+void ColumnVariant::deserializeAndInsertFromArena(ReadBuffer & in)
 {
     /// During any serialization/deserialization we should always use global discriminators.
-    Discriminator global_discr = unalignedLoad<Discriminator>(pos);
-    pos += sizeof(global_discr);
+    Discriminator global_discr;
+    readBinaryLittleEndian<Discriminator>(global_discr, in);
+
     Discriminator local_discr = localDiscriminatorByGlobal(global_discr);
     getLocalDiscriminators().push_back(local_discr);
     if (local_discr == NULL_DISCRIMINATOR)
     {
         getOffsets().emplace_back();
-        return pos;
+        return;
     }
 
     getOffsets().push_back(variants[local_discr]->size());
-    return variants[local_discr]->deserializeAndInsertFromArena(pos);
+    variants[local_discr]->deserializeAndInsertFromArena(in);
 }
 
-const char * ColumnVariant::deserializeAndInsertAggregationStateValueFromArena(const char * pos)
+void ColumnVariant::deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in)
 {
     /// During any serialization/deserialization we should always use global discriminators.
-    Discriminator global_discr = unalignedLoad<Discriminator>(pos);
-    pos += sizeof(global_discr);
+    Discriminator global_discr;
+    readBinaryLittleEndian<Discriminator>(global_discr, in);
+
     Discriminator local_discr = localDiscriminatorByGlobal(global_discr);
     getLocalDiscriminators().push_back(local_discr);
     if (local_discr == NULL_DISCRIMINATOR)
     {
         getOffsets().emplace_back();
-        return pos;
+        return;
     }
 
     getOffsets().push_back(variants[local_discr]->size());
-    return variants[local_discr]->deserializeAndInsertAggregationStateValueFromArena(pos);
+    variants[local_discr]->deserializeAndInsertAggregationStateValueFromArena(in);
 }
 
-const char * ColumnVariant::skipSerializedInArena(const char * pos) const
+void ColumnVariant::skipSerializedInArena(ReadBuffer & in) const
 {
-    Discriminator global_discr = unalignedLoad<Discriminator>(pos);
-    pos += sizeof(global_discr);
+    Discriminator global_discr;
+    readBinaryLittleEndian<Discriminator>(global_discr, in);
+
     if (global_discr == NULL_DISCRIMINATOR)
-        return pos;
+        return;
 
-    return variants[localDiscriminatorByGlobal(global_discr)]->skipSerializedInArena(pos);
+    variants[localDiscriminatorByGlobal(global_discr)]->skipSerializedInArena(in);
 }
 
 char * ColumnVariant::serializeValueIntoMemory(size_t n, char * memory) const
diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h
index 6bf29b641127..8af324755333 100644
--- a/src/Columns/ColumnVariant.h
+++ b/src/Columns/ColumnVariant.h
@@ -215,9 +215,9 @@ class ColumnVariant final : public COWHelper<IColumnHelper<ColumnVariant>, Colum
     void popBack(size_t n) override;
     StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
     StringRef serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
-    const char * deserializeAndInsertFromArena(const char * pos) override;
-    const char * deserializeAndInsertAggregationStateValueFromArena(const char * pos) override;
-    const char * skipSerializedInArena(const char * pos) const override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
+    void deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in) override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
     char * serializeValueIntoMemory(size_t n, char * memory) const override;
     std::optional<size_t> getSerializedValueSize(size_t n) const override;
 
diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp
index 3fdf75fed79e..d03ccca0f5e9 100644
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@@ -24,6 +24,8 @@
 #include <Common/findExtreme.h>
 #include <Common/iota.h>
 #include <DataTypes/FieldToDataType.h>
+#include <IO/Operators.h>
+#include <IO/ReadHelpers.h>
 
 #include <bit>
 #include <cstring>
@@ -56,16 +58,17 @@ namespace ErrorCodes
 }
 
 template <typename T>
-const char * ColumnVector<T>::deserializeAndInsertFromArena(const char * pos)
+void ColumnVector<T>::deserializeAndInsertFromArena(ReadBuffer & in)
 {
-    data.emplace_back(unalignedLoad<T>(pos));
-    return pos + sizeof(T);
+    T element;
+    readBinaryLittleEndian<T>(element, in);
+    data.emplace_back(std::move(element));
 }
 
 template <typename T>
-const char * ColumnVector<T>::skipSerializedInArena(const char * pos) const
+void ColumnVector<T>::skipSerializedInArena(ReadBuffer & in) const
 {
-    return pos + sizeof(T);
+    in.ignore(sizeof(T));
 }
 
 template <typename T>
diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index 06fe60c7b174..b5154ee38520 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -106,9 +106,9 @@ class ColumnVector final : public COWHelper<IColumnHelper<ColumnVector<T>, Colum
         data.resize_assume_reserved(data.size() - n);
     }
 
-    const char * deserializeAndInsertFromArena(const char * pos) override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
 
-    const char * skipSerializedInArena(const char * pos) const override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
 
     void updateHashWithValue(size_t n, SipHash & hash) const override;
 
diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h
index cab3d8b6eb1f..31ae50a72d22 100644
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@@ -32,6 +32,8 @@ class WeakHash32;
 class ColumnConst;
 class IDataType;
 class Block;
+class ReadBuffer;
+struct ColumnsInfo;
 using DataTypePtr = std::shared_ptr<const IDataType>;
 using IColumnPermutation = PaddedPODArray<size_t>;
 using IColumnFilter = PaddedPODArray<UInt8>;
@@ -308,19 +310,18 @@ class IColumn : public COW<IColumn>
     virtual void collectSerializedValueSizes(PaddedPODArray<UInt64> & /* sizes */, const UInt8 * /* is_null */) const;
 
     /// Deserializes a value that was serialized using IColumn::serializeValueIntoArena method.
-    /// Returns pointer to the position after the read data.
-    [[nodiscard]] virtual const char * deserializeAndInsertFromArena(const char * pos) = 0;
+    /// Note that it needs to deal with user input
+    virtual void deserializeAndInsertFromArena(ReadBuffer & in) = 0;
 
     /// Deserializes a value that was serialized using IColumn::serializeAggregationStateValueIntoArena method.
-    /// Returns pointer to the position after the read data.
-    [[nodiscard]] virtual const char * deserializeAndInsertAggregationStateValueFromArena(const char * pos)
+    /// Note that it needs to deal with user input
+    virtual void deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in)
     {
-        return deserializeAndInsertFromArena(pos);
+        deserializeAndInsertFromArena(in);
     }
 
     /// Skip previously serialized value that was serialized using IColumn::serializeValueIntoArena method.
-    /// Returns a pointer to the position after the deserialized data.
-    [[nodiscard]] virtual const char * skipSerializedInArena(const char *) const = 0;
+    virtual void skipSerializedInArena(ReadBuffer & in) const = 0;
 
     /// Update state of hash function with value of n-th element.
     /// On subsequent calls of this method for sequence of column values of arbitrary types,
diff --git a/src/Columns/IColumnDummy.cpp b/src/Columns/IColumnDummy.cpp
index 1d1d9f53cec4..4532a6a79141 100644
--- a/src/Columns/IColumnDummy.cpp
+++ b/src/Columns/IColumnDummy.cpp
@@ -3,6 +3,7 @@
 #include <Core/Field.h>
 #include <Common/Arena.h>
 #include <Common/iota.h>
+#include <IO/ReadBuffer.h>
 
 
 namespace DB
@@ -48,15 +49,15 @@ StringRef IColumnDummy::serializeValueIntoArena(size_t /*n*/, Arena & arena, cha
     return { res, 1 };
 }
 
-const char * IColumnDummy::deserializeAndInsertFromArena(const char * pos)
+void IColumnDummy::deserializeAndInsertFromArena(ReadBuffer & in)
 {
     ++s;
-    return pos + 1;
+    in.ignore(1);
 }
 
-const char * IColumnDummy::skipSerializedInArena(const char * pos) const
+void IColumnDummy::skipSerializedInArena(ReadBuffer & in) const
 {
-    return pos;
+    in.ignore(1);
 }
 
 ColumnPtr IColumnDummy::filter(const Filter & filt, ssize_t /*result_size_hint*/) const
diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h
index f5e0cf779d37..a66c2223edd5 100644
--- a/src/Columns/IColumnDummy.h
+++ b/src/Columns/IColumnDummy.h
@@ -57,9 +57,9 @@ class IColumnDummy : public IColumnHelper<IColumnDummy>
 
     StringRef serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin) const override;
 
-    const char * deserializeAndInsertFromArena(const char * pos) override;
+    void deserializeAndInsertFromArena(ReadBuffer & in) override;
 
-    const char * skipSerializedInArena(const char * pos) const override;
+    void skipSerializedInArena(ReadBuffer & in) const override;
 
     void updateHashWithValue(size_t /*n*/, SipHash & /*hash*/) const override
     {
diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h
index cd5a46405744..5c0d60de8a30 100644
--- a/src/Columns/IColumnUnique.h
+++ b/src/Columns/IColumnUnique.h
@@ -68,8 +68,8 @@ class IColumnUnique : public IColumn
     virtual size_t getNestedTypeDefaultValueIndex() const = 0;  /// removeNullable()->getDefault() value index
     virtual bool canContainNulls() const = 0;
 
-    virtual size_t uniqueDeserializeAndInsertFromArena(const char * pos, const char *& new_pos) = 0;
-    virtual size_t uniqueDeserializeAndInsertAggregationStateValueFromArena(const char * pos, const char *& new_pos) = 0;
+    virtual size_t uniqueDeserializeAndInsertFromArena(ReadBuffer & in) = 0;
+    virtual size_t uniqueDeserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in) = 0;
 
     /// Returns dictionary hash which is SipHash is applied to each row of nested column.
     virtual UInt128 getHash() const = 0;
@@ -116,7 +116,7 @@ class IColumnUnique : public IColumn
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method gather is not supported for ColumnUnique.");
     }
 
-    const char * deserializeAndInsertFromArena(const char *) override
+    void deserializeAndInsertFromArena(ReadBuffer &) override
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method deserializeAndInsertFromArena is not supported for ColumnUnique.");
     }
diff --git a/src/Columns/tests/gtest_column_dynamic.cpp b/src/Columns/tests/gtest_column_dynamic.cpp
index 539926d03635..0679bbd5243c 100644
--- a/src/Columns/tests/gtest_column_dynamic.cpp
+++ b/src/Columns/tests/gtest_column_dynamic.cpp
@@ -1,7 +1,8 @@
 #include <Columns/ColumnDynamic.h>
 #include <Columns/ColumnString.h>
-#include <Common/Arena.h>
+#include <IO/ReadBufferFromString.h>
 #include <gtest/gtest.h>
+#include <Common/Arena.h>
 
 using namespace DB;
 
@@ -755,10 +756,12 @@ TEST(ColumnDynamic, SerializeDeserializeFromArena1)
     column->serializeValueIntoArena(1, arena, pos);
     column->serializeValueIntoArena(2, arena, pos);
     column->serializeValueIntoArena(3, arena, pos);
-    pos = column->deserializeAndInsertFromArena(ref1.data);
-    pos = column->deserializeAndInsertFromArena(pos);
-    pos = column->deserializeAndInsertFromArena(pos);
-    column->deserializeAndInsertFromArena(pos);
+
+    ReadBufferFromString in({ref1.data, arena.usedBytes()});
+    column->deserializeAndInsertFromArena(in);
+    column->deserializeAndInsertFromArena(in);
+    column->deserializeAndInsertFromArena(in);
+    column->deserializeAndInsertFromArena(in);
 
     ASSERT_EQ((*column)[column->size() - 4], 42);
     ASSERT_EQ((*column)[column->size() - 3], 42.42);
@@ -782,10 +785,11 @@ TEST(ColumnDynamic, SerializeDeserializeFromArena2)
     column_from->serializeValueIntoArena(3, arena, pos);
 
     auto column_to = ColumnDynamic::create(254);
-    pos = column_to->deserializeAndInsertFromArena(ref1.data);
-    pos = column_to->deserializeAndInsertFromArena(pos);
-    pos = column_to->deserializeAndInsertFromArena(pos);
-    column_to->deserializeAndInsertFromArena(pos);
+    ReadBufferFromString in({ref1.data, arena.usedBytes()});
+    column_to->deserializeAndInsertFromArena(in);
+    column_to->deserializeAndInsertFromArena(in);
+    column_to->deserializeAndInsertFromArena(in);
+    column_to->deserializeAndInsertFromArena(in);
 
     ASSERT_EQ((*column_to)[column_to->size() - 4], 42);
     ASSERT_EQ((*column_to)[column_to->size() - 3], 42.42);
@@ -814,10 +818,11 @@ TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow1)
     column_from->serializeValueIntoArena(3, arena, pos);
 
     auto column_to = getDynamicWithManyVariants(253);
-    pos = column_to->deserializeAndInsertFromArena(ref1.data);
-    pos = column_to->deserializeAndInsertFromArena(pos);
-    pos = column_to->deserializeAndInsertFromArena(pos);
-    column_to->deserializeAndInsertFromArena(pos);
+    ReadBufferFromString in({ref1.data, arena.usedBytes()});
+    column_to->deserializeAndInsertFromArena(in);
+    column_to->deserializeAndInsertFromArena(in);
+    column_to->deserializeAndInsertFromArena(in);
+    column_to->deserializeAndInsertFromArena(in);
 
     ASSERT_EQ((*column_to)[column_to->size() - 4], 42);
     ASSERT_EQ((*column_to)[column_to->size() - 3], 42.42);
@@ -848,11 +853,12 @@ TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow2)
 
     auto column_to = ColumnDynamic::create(2);
     column_to->insert(Field(42.42));
-    pos = column_to->deserializeAndInsertFromArena(ref1.data);
-    pos = column_to->deserializeAndInsertFromArena(pos);
-    pos = column_to->deserializeAndInsertFromArena(pos);
-    pos = column_to->deserializeAndInsertFromArena(pos);
-    column_to->deserializeAndInsertFromArena(pos);
+    ReadBufferFromString in({ref1.data, arena.usedBytes()});
+    column_to->deserializeAndInsertFromArena(in);
+    column_to->deserializeAndInsertFromArena(in);
+    column_to->deserializeAndInsertFromArena(in);
+    column_to->deserializeAndInsertFromArena(in);
+    column_to->deserializeAndInsertFromArena(in);
 
     ASSERT_EQ((*column_to)[column_to->size() - 5], 42);
     ASSERT_EQ((*column_to)[column_to->size() - 4], 42.42);
@@ -879,16 +885,16 @@ TEST(ColumnDynamic, skipSerializedInArena)
     auto ref1 = column_from->serializeValueIntoArena(0, arena, pos);
     column_from->serializeValueIntoArena(1, arena, pos);
     column_from->serializeValueIntoArena(2, arena, pos);
-    auto ref4 = column_from->serializeValueIntoArena(3, arena, pos);
+    column_from->serializeValueIntoArena(3, arena, pos);
 
-    const char * end = ref4.data + ref4.size;
     auto column_to = ColumnDynamic::create(254);
-    pos = column_to->skipSerializedInArena(ref1.data);
-    pos = column_to->skipSerializedInArena(pos);
-    pos = column_to->skipSerializedInArena(pos);
-    pos = column_to->skipSerializedInArena(pos);
+    ReadBufferFromString in({ref1.data, arena.usedBytes()});
+    column_to->skipSerializedInArena(in);
+    column_to->skipSerializedInArena(in);
+    column_to->skipSerializedInArena(in);
+    column_to->skipSerializedInArena(in);
 
-    ASSERT_EQ(pos, end);
+    ASSERT_TRUE(in.eof());
     ASSERT_EQ(column_to->getVariantInfo().variant_name_to_discriminator.at("SharedVariant"), 0);
     ASSERT_EQ(column_to->getVariantInfo().variant_names, Names{"SharedVariant"});
 }
diff --git a/src/Columns/tests/gtest_column_object.cpp b/src/Columns/tests/gtest_column_object.cpp
index 408c76cd58d8..fb2ba8c0be6c 100644
--- a/src/Columns/tests/gtest_column_object.cpp
+++ b/src/Columns/tests/gtest_column_object.cpp
@@ -1,7 +1,8 @@
-#include <Columns/ColumnString.h>
 #include <Columns/ColumnObject.h>
+#include <Columns/ColumnString.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <IO/ReadBufferFromMemory.h>
+#include <IO/ReadBufferFromString.h>
 
 #include <Common/Arena.h>
 #include <Core/Field.h>
@@ -318,9 +319,11 @@ TEST(ColumnObject, SerializeDeserializerFromArena)
 
     auto col2 = type->createColumn();
     auto & col_object2 = assert_cast<ColumnObject &>(*col);
-    pos = col_object2.deserializeAndInsertFromArena(ref1.data);
-    pos = col_object2.deserializeAndInsertFromArena(pos);
-    col_object2.deserializeAndInsertFromArena(pos);
+    ReadBufferFromString in({ref1.data, arena.usedBytes()});
+    col_object2.deserializeAndInsertFromArena(in);
+    col_object2.deserializeAndInsertFromArena(in);
+    col_object2.deserializeAndInsertFromArena(in);
+    ASSERT_TRUE(in.eof());
 
     ASSERT_EQ(col_object2[0], (Object{{"b.d", Field(42u)}, {"a.b", Array{"Str1", "Str2"}}, {"a.a", Tuple{"Str3", 441u}}, {"a.c", Field("Str4")}, {"a.d", Array{Field(45), Field(46)}}, {"a.e", Field(47)}}));
     ASSERT_EQ(col_object2[1], (Object{{"b.d", Field{0u}}, {"a.b", Array{}}, {"b.a", Field(48)}, {"b.b", Array{Field(49), Field(50)}}}));
@@ -340,14 +343,14 @@ TEST(ColumnObject, SkipSerializedInArena)
     const char * pos = nullptr;
     auto ref1 = col_object.serializeValueIntoArena(0, arena, pos);
     col_object.serializeValueIntoArena(1, arena, pos);
-    auto ref3 = col_object.serializeValueIntoArena(2, arena, pos);
+    col_object.serializeValueIntoArena(2, arena, pos);
 
-    const char * end = ref3.data + ref3.size;
     auto col2 = type->createColumn();
-    pos = col2->skipSerializedInArena(ref1.data);
-    pos = col2->skipSerializedInArena(pos);
-    pos = col2->skipSerializedInArena(pos);
-    ASSERT_EQ(pos, end);
+    ReadBufferFromString in({ref1.data, arena.usedBytes()});
+    col2->skipSerializedInArena(in);
+    col2->skipSerializedInArena(in);
+    col2->skipSerializedInArena(in);
+    ASSERT_TRUE(in.eof());
 }
 
 TEST(ColumnObject, rollback)
diff --git a/src/Columns/tests/gtest_column_unique.cpp b/src/Columns/tests/gtest_column_unique.cpp
index 15208da70fb2..ddebfe076082 100644
--- a/src/Columns/tests/gtest_column_unique.cpp
+++ b/src/Columns/tests/gtest_column_unique.cpp
@@ -6,6 +6,7 @@
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <IO/ReadBufferFromString.h>
 
 #include <gtest/gtest.h>
 
@@ -118,9 +119,9 @@ void column_unique_unique_deserialize_from_arena_impl(ColumnType & column, const
         for (size_t i = 0; i < num_values; ++i)
         {
             auto ref = column_unique_pattern->serializeValueIntoArena(idx->getUInt(i), arena, pos);
-            const char * new_pos;
-            column_unique->uniqueDeserializeAndInsertFromArena(ref.data, new_pos);
-            ASSERT_EQ(new_pos - ref.data, ref.size) << "Deserialized data has different sizes at position " << i;
+            ReadBufferFromString in({ref.data, ref.size});
+            column_unique->uniqueDeserializeAndInsertFromArena(in);
+            ASSERT_TRUE(in.eof()) << "Deserialized data has different sizes at position " << i;
 
             ASSERT_EQ(column_unique_pattern->getNestedNotNullableColumn()->getDataAt(idx->getUInt(i)),
                       column_unique->getNestedNotNullableColumn()->getDataAt(idx->getUInt(i)))
diff --git a/src/Dictionaries/DictionaryHelpers.cpp b/src/Dictionaries/DictionaryHelpers.cpp
index 504322e8de71..cd8ce7d4c11a 100644
--- a/src/Dictionaries/DictionaryHelpers.cpp
+++ b/src/Dictionaries/DictionaryHelpers.cpp
@@ -18,10 +18,10 @@ MutableColumns deserializeColumnsFromKeys(
     for (size_t index = start; index < end; ++index)
     {
         const auto & key = keys[index];
-        const auto * ptr = key.data;
+        ReadBufferFromString in({key.data, key.size});
 
         for (auto & result_column : result_columns)
-            ptr = result_column->deserializeAndInsertFromArena(ptr);
+            result_column->deserializeAndInsertFromArena(in);
     }
 
     return result_columns;
diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h
index 0791bfa83d7b..c249e2c2053d 100644
--- a/src/Dictionaries/DictionaryHelpers.h
+++ b/src/Dictionaries/DictionaryHelpers.h
@@ -229,7 +229,7 @@ static inline void insertDefaultValuesIntoColumns( /// NOLINT
 static inline void deserializeAndInsertIntoColumns( /// NOLINT
     MutableColumns & columns,
     const DictionaryStorageFetchRequest & fetch_request,
-    const char * place_for_serialized_columns)
+    ReadBuffer & in)
 {
     size_t columns_size = columns.size();
 
@@ -238,9 +238,9 @@ static inline void deserializeAndInsertIntoColumns( /// NOLINT
         const auto & column = columns[column_index];
 
         if (fetch_request.shouldFillResultColumnWithIndex(column_index))
-            place_for_serialized_columns = column->deserializeAndInsertFromArena(place_for_serialized_columns);
+            column->deserializeAndInsertFromArena(in);
         else
-            place_for_serialized_columns = column->skipSerializedInArena(place_for_serialized_columns);
+            column->skipSerializedInArena(in);
     }
 }
 
diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h
index 062d13918b2f..25ea519dc7fc 100644
--- a/src/Dictionaries/SSDCacheDictionaryStorage.h
+++ b/src/Dictionaries/SSDCacheDictionaryStorage.h
@@ -1072,7 +1072,10 @@ class SSDCacheDictionaryStorage final : public ICacheDictionaryStorage
 
                     const auto & partition = memory_buffer_partitions[cell.in_memory_partition_index];
                     char * serialized_columns_place = partition.getPlace(cell.index);
-                    deserializeAndInsertIntoColumns(result.fetched_columns, fetch_request, serialized_columns_place);
+                    /// This is larger size than necessary but we don't know the exact size of serialized columns
+                    size_t size = partition.block_size;
+                    ReadBufferFromString in({serialized_columns_place, size});
+                    deserializeAndInsertIntoColumns(result.fetched_columns, fetch_request, in);
                     break;
                 }
                 case Cell::on_disk:
@@ -1119,7 +1122,10 @@ class SSDCacheDictionaryStorage final : public ICacheDictionaryStorage
             for (auto & key_in_block : keys_in_block)
             {
                 char * key_data = block_data + key_in_block.offset_in_block;
-                deserializeAndInsertIntoColumns(result.fetched_columns, fetch_request, key_data);
+                /// This is larger size than necessary but we don't know the exact size of serialized columns
+                size_t size = configuration.read_buffer_blocks_size;
+                ReadBufferFromString in({key_data, size});
+                deserializeAndInsertIntoColumns(result.fetched_columns, fetch_request, in);
 
                 result.key_index_to_state[key_in_block.key_index].setFetchedColumnIndex(
                     default_mask ? fetched_columns_index_without_default : fetched_columns_index);
diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp
index 518ea3c09c3f..3bf8396bb694 100644
--- a/src/Functions/array/arrayIntersect.cpp
+++ b/src/Functions/array/arrayIntersect.cpp
@@ -24,6 +24,7 @@
 #include <base/range.h>
 #include <base/TypeLists.h>
 #include <Interpreters/castColumn.h>
+#include <IO/ReadBufferFromString.h>
 
 
 namespace DB
@@ -753,7 +754,8 @@ void FunctionArrayIntersect<Mode>::insertElement(typename Map::LookupResult & pa
     }
     else
     {
-        std::ignore = result_data.deserializeAndInsertFromArena(pair->getKey().data);
+        ReadBufferFromString in({pair->getKey().data, pair->getKey().size});
+        result_data.deserializeAndInsertFromArena(in);
     }
     if (use_null_map)
         null_map.push_back(0);
diff --git a/src/Interpreters/AggregationMethod.cpp b/src/Interpreters/AggregationMethod.cpp
index 7cd9d02db047..d5fbe360cb97 100644
--- a/src/Interpreters/AggregationMethod.cpp
+++ b/src/Interpreters/AggregationMethod.cpp
@@ -1,5 +1,6 @@
-#include <Interpreters/AggregationMethod.h>
 #include <Interpreters/AggregatedData.h>
+#include <Interpreters/AggregationMethod.h>
+#include <IO/ReadBufferFromString.h>
 
 namespace DB
 {
@@ -191,9 +192,9 @@ template struct AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel, fa
 template <typename TData, bool nullable, bool prealloc>
 void AggregationMethodSerialized<TData, nullable, prealloc>::insertKeyIntoColumns(StringRef key, std::vector<IColumn *> & key_columns, const Sizes &)
 {
-    const auto * pos = key.data;
+    ReadBufferFromString buf({key.data, key.size});
     for (auto & column : key_columns)
-        pos = column->deserializeAndInsertFromArena(pos);
+        column->deserializeAndInsertFromArena(buf);
 }
 
 template struct AggregationMethodSerialized<AggregatedDataWithStringKey>;
diff --git a/tests/queries/0_stateless/03716_topk_bad_data.reference b/tests/queries/0_stateless/03716_topk_bad_data.reference
new file mode 100644
index 000000000000..f4149ae191d0
--- /dev/null
+++ b/tests/queries/0_stateless/03716_topk_bad_data.reference
@@ -0,0 +1,14 @@
+-- {echo On}
+SELECT finalizeAggregation(CAST(unhex('012A0300000000000000030000000000000043434303000000000000004141410400000000000000414141410100800200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'), 'AggregateFunction(approx_top_k(3), Array(Array(String)))')); -- { serverError CANNOT_READ_ALL_DATA }
+SELECT finalizeAggregation(CAST(unhex('012A0300000000000000030000000000000043434303000000000000004141410400000000000000414141410100800200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'), 'AggregateFunction(topK(3), Array(Array(String)))')); -- { serverError CANNOT_READ_ALL_DATA }
+SELECT finalizeAggregation(CAST(unhex('012A0300000000000000030000000000000043434303000000000000004141410400000000000000414141410100800200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'), 'AggregateFunction(approx_top_k(3), Array(Array(String)))'));
+[]
+SELECT finalizeAggregation(CAST(unhex('012A0300000000000000030000000000000043434303000000000000004141410400000000000000414141410100800200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'), 'AggregateFunction(topK(3), Array(Array(String)))'));
+[]
+-- State: Select hex(topKState(3)((number, number)) as t) AS v, toTypeName(t) from numbers(1)
+-- 01100000000000000000000000000000000001004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
+SELECT finalizeAggregation(CAST(unhex('01100000000000000000000000000000000001004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'), 'AggregateFunction(topK(3), Tuple(UInt64, UInt64))'));
+[(0,0)]
+-- Add some extra bytes
+SELECT finalizeAggregation(CAST(unhex('01122000000000000000000000000000000000010040000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002454'), 'AggregateFunction(topK(3), Tuple(UInt64, UInt64))')); -- { serverError INCORRECT_DATA }
+SELECT finalizeAggregation(CAST(unhex('012A03000000000000000300000000000000434343030000000000000041414104000000000000004141414101008002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001'), 'AggregateFunction(topK(3), Array(Array(String)))')); -- { serverError ATTEMPT_TO_READ_AFTER_EOF }
diff --git a/tests/queries/0_stateless/03716_topk_bad_data.sql b/tests/queries/0_stateless/03716_topk_bad_data.sql
new file mode 100644
index 000000000000..9bfc1dfe7265
--- /dev/null
+++ b/tests/queries/0_stateless/03716_topk_bad_data.sql
@@ -0,0 +1,16 @@
+-- {echo On}
+SELECT finalizeAggregation(CAST(unhex('012A0300000000000000030000000000000043434303000000000000004141410400000000000000414141410100800200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'), 'AggregateFunction(approx_top_k(3), Array(Array(String)))')); -- { serverError CANNOT_READ_ALL_DATA }
+SELECT finalizeAggregation(CAST(unhex('012A0300000000000000030000000000000043434303000000000000004141410400000000000000414141410100800200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'), 'AggregateFunction(topK(3), Array(Array(String)))')); -- { serverError CANNOT_READ_ALL_DATA }
+
+
+SELECT finalizeAggregation(CAST(unhex('012A0300000000000000030000000000000043434303000000000000004141410400000000000000414141410100800200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'), 'AggregateFunction(approx_top_k(3), Array(Array(String)))'));
+SELECT finalizeAggregation(CAST(unhex('012A0300000000000000030000000000000043434303000000000000004141410400000000000000414141410100800200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'), 'AggregateFunction(topK(3), Array(Array(String)))'));
+
+-- State: Select hex(topKState(3)((number, number)) as t) AS v, toTypeName(t) from numbers(1)
+-- 01100000000000000000000000000000000001004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
+SELECT finalizeAggregation(CAST(unhex('01100000000000000000000000000000000001004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'), 'AggregateFunction(topK(3), Tuple(UInt64, UInt64))'));
+
+-- Add some extra bytes
+SELECT finalizeAggregation(CAST(unhex('01122000000000000000000000000000000000010040000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002454'), 'AggregateFunction(topK(3), Tuple(UInt64, UInt64))')); -- { serverError INCORRECT_DATA }
+
+SELECT finalizeAggregation(CAST(unhex('012A03000000000000000300000000000000434343030000000000000041414104000000000000004141414101008002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001'), 'AggregateFunction(topK(3), Array(Array(String)))')); -- { serverError ATTEMPT_TO_READ_AFTER_EOF }

From 4b2fdfc67e0615eb2e3dd004082e8b09a693d369 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <raul.marin@clickhouse.com>
Date: Thu, 20 Nov 2025 15:52:30 +0100
Subject: [PATCH 100/112] Fix build

---
 src/Columns/ColumnArray.cpp          | 1 +
 src/Columns/ColumnObjectDeprecated.h | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp
index dcc4685a659b..1413e54b145e 100644
--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@@ -17,6 +17,7 @@
 #include <Common/WeakHash.h>
 #include <Common/HashTable/Hash.h>
 #include <cstring> // memcpy
+#include <IO/ReadHelpers.h>
 
 
 namespace DB
diff --git a/src/Columns/ColumnObjectDeprecated.h b/src/Columns/ColumnObjectDeprecated.h
index 3401d03f6fde..c7dd2b66158c 100644
--- a/src/Columns/ColumnObjectDeprecated.h
+++ b/src/Columns/ColumnObjectDeprecated.h
@@ -252,8 +252,8 @@ class ColumnObjectDeprecated final : public COWHelper<IColumnHelper<ColumnObject
     void insertData(const char *, size_t) override { throwMustBeConcrete(); }
     StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeConcrete(); }
     char * serializeValueIntoMemory(size_t, char *) const override { throwMustBeConcrete(); }
-    const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); }
-    const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
+    void deserializeAndInsertFromArena(ReadBuffer &) override { throwMustBeConcrete(); }
+    void skipSerializedInArena(ReadBuffer &) const override { throwMustBeConcrete(); }
     void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
     WeakHash32 getWeakHash32() const override { throwMustBeConcrete(); }
     void updateHashFast(SipHash &) const override;

From 0986e3e0307a705a1a6f0d2993dcd532d06d8621 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <raul.marin@clickhouse.com>
Date: Thu, 20 Nov 2025 19:48:19 +0100
Subject: [PATCH 101/112] Fix bad merge

---
 src/Columns/ColumnQBit.h         | 190 ---------
 src/Columns/ColumnReplicated.cpp | 696 -------------------------------
 src/Columns/ColumnReplicated.h   | 224 ----------
 3 files changed, 1110 deletions(-)
 delete mode 100644 src/Columns/ColumnQBit.h
 delete mode 100644 src/Columns/ColumnReplicated.cpp
 delete mode 100644 src/Columns/ColumnReplicated.h

diff --git a/src/Columns/ColumnQBit.h b/src/Columns/ColumnQBit.h
deleted file mode 100644
index e8592160d812..000000000000
--- a/src/Columns/ColumnQBit.h
+++ /dev/null
@@ -1,190 +0,0 @@
-#pragma once
-
-#include <Columns/ColumnTuple.h>
-#include <Columns/IColumn.h>
-#include <Core/Field.h>
-#include <Common/WeakHash.h>
-#include <Common/assert_cast.h>
-
-#include <base/StringRef.h>
-
-
-namespace DB
-{
-
-/** Column that represents bit-transposed vectors for efficient vector search operations.
-  *
-  * This column type stores array data in a bit-transposed layout, where bits are grouped by position
-  * rather than by vector element. For example, with Float32 vectors, there are 32 groups (one for each bit),
-  * and each group contains the corresponding bit from all vector elements.
-  *
-  * This column is designed to store the output of the transposeBits() function calls, which convert one float within
-  * a regular array into this bit-transposed format. Currently supported numeric types include:
-  * - Float64 (64 bit groups)
-  * - Float32 (32 bit groups)
-  * - BFloat16 (16 bit groups)
-  *
-  * Internal structure:
-  * - For a Float32 array, the underlying storage is a tuple of 32 FixedString columns
-  * - Each FixedString column stores all bits from a specific bit position (0-31)
-  * - The length of each FixedString equals the number of vector elements (padded to a multiple of 8)
-  *
-  * A key optimization for vector search operations is the ability to read only p bit groups
-  * (p < 32 for Float32) to reconstruct vectors with p bits of precision. This allows for:
-  * - Reduced I/O operations during search
-  * - Progressive refinement of search results
-  * - Adjustable precision-performance tradeoffs
-  *
-  * Visual representation:
-  *
-  * ┌──────┐     ┌─────────────────────────────────────────┐
-  * │ QBit │────▶│Tuple(FixedString, FixedString, ... x32) │
-  * └──────┘     └─────────────────────────────────────────┘
-  *                 │            │                │
-  *                 ▼            ▼                ▼
-  *              ┌────┐       ┌────┐           ┌────┐
-  *              │Bit0│       │Bit1│    ...    │Bit31
-  *              └────┘       └────┘           └────┘
-  *
-  * The implementation delegates most operations to the underlying tuple column,
-  * providing array-specific semantics where needed.
-  */
-class ColumnQBit final : public COWHelper<IColumnHelper<ColumnQBit>, ColumnQBit>
-{
-private:
-    friend class COWHelper<IColumnHelper<ColumnQBit>, ColumnQBit>;
-
-    /// The actual storage for the data
-    WrappedPtr tuple;
-    /// Number of elements in the original vectors. We will store dimension elements padded to a multiple of 8 (padding elements are 0)
-    size_t dimension = 0;
-
-    explicit ColumnQBit(MutableColumnPtr && tuple_, size_t dimension);
-
-
-public:
-    /** Create immutable column using immutable arguments. This arguments may be shared with other columns.
-      * Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
-      */
-    using Base = COWHelper<IColumnHelper<ColumnQBit>, ColumnQBit>;
-
-    static Ptr create(const ColumnPtr & column, size_t dimension) { return Base::create(column->assumeMutable(), dimension); }
-    static MutablePtr create(MutableColumnPtr && tuple_, size_t dimension) { return Base::create(std::move(tuple_), dimension); }
-
-    const char * getFamilyName() const override { return "QBit"; }
-    TypeIndex getDataType() const override { return TypeIndex::QBit; }
-    std::string getName() const override;
-    MutableColumnPtr cloneResized(size_t new_size) const override;
-    bool canBeInsideNullable() const override { return true; }
-
-    /// Number of rows
-    size_t size() const override { return tuple->size(); }
-    /// Number of columns in the tuple, which corresponds to the number of bit groups
-    size_t getBitsCount() const;
-    /// Number of elements in the vectors
-    size_t getDimension() const { return dimension; }
-
-    Field operator[](size_t n) const override;
-    void get(size_t n, Field & res) const override;
-    DataTypePtr getValueNameAndTypeImpl(WriteBufferFromOwnString & name_buf, size_t n, const Options & options) const override;
-
-    StringRef getDataAt(size_t n) const override { return tuple->getDataAt(n); }
-    void insertData(const char * pos, size_t length) override { tuple->insertData(pos, length); }
-    void insert(const Field & x) override { tuple->insert(x); }
-    bool tryInsert(const Field & x) override { return tuple->tryInsert(x); }
-    bool isDefaultAt(size_t n) const override { return tuple->isDefaultAt(n); }
-
-#if !defined(DEBUG_OR_SANITIZER_BUILD)
-    void insertFrom(const IColumn & src_, size_t n) override;
-    void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
-    void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
-#else
-    void doInsertFrom(const IColumn & src_, size_t n) override;
-    void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
-    void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
-#endif
-
-#if !defined(DEBUG_OR_SANITIZER_BUILD)
-    int compareAt(size_t, size_t, const IColumn &, int) const override
-#else
-    int doCompareAt(size_t, size_t, const IColumn &, int) const override
-#endif
-    {
-        return 0;
-    }
-
-    void insertDefault() override { tuple->insertDefault(); }
-    void popBack(size_t n) override { tuple->popBack(n); }
-    StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override
-    {
-        return tuple->serializeValueIntoArena(n, arena, begin);
-    }
-    char * serializeValueIntoMemory(size_t n, char * memory) const override { return tuple->serializeValueIntoMemory(n, memory); }
-    void deserializeAndInsertFromArena(ReadBuffer & in) override { tuple->deserializeAndInsertFromArena(in); }
-    void skipSerializedInArena(ReadBuffer & in) const override { tuple->skipSerializedInArena(in); }
-    void updateHashWithValue(size_t n, SipHash & hash) const override { tuple->updateHashWithValue(n, hash); }
-    void updateHashFast(SipHash & hash) const override { tuple->updateHashFast(hash); }
-    WeakHash32 getWeakHash32() const override { return tuple->getWeakHash32(); }
-
-    void expand(const Filter & mask, bool inverted) override;
-    ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
-    ColumnPtr permute(const Permutation & perm, size_t limit) const override;
-    ColumnPtr index(const IColumn & indexes, size_t limit) const override;
-    ColumnPtr replicate(const Offsets & offsets) const override;
-    ColumnPtr compress(bool force_compression) const override;
-
-    void getExtremes(Field & min, Field & max) const override { tuple->getExtremes(min, max); }
-    void getPermutation(
-        PermutationSortDirection direction,
-        PermutationSortStability stability,
-        size_t limit,
-        int nan_direction_hint,
-        Permutation & res) const override
-    {
-        tuple->getPermutation(direction, stability, limit, nan_direction_hint, res);
-    }
-
-    void updatePermutation(
-        PermutationSortDirection direction,
-        PermutationSortStability stability,
-        size_t limit,
-        int nan_direction_hint,
-        Permutation & res,
-        EqualRanges & equal_ranges) const override
-    {
-        tuple->updatePermutation(direction, stability, limit, nan_direction_hint, res, equal_ranges);
-    }
-
-    void reserve(size_t n) override { tuple->reserve(n); }
-    void prepareForSquashing(const Columns & source_columns, size_t factor) override;
-    void shrinkToFit() override { tuple->shrinkToFit(); }
-    void ensureOwnership() override { tuple->ensureOwnership(); }
-    void protect() override { tuple->protect(); }
-
-    size_t capacity() const override { return tuple->capacity(); }
-    size_t byteSize() const override { return tuple->byteSize(); }
-    size_t byteSizeAt(size_t n) const override { return tuple->byteSizeAt(n); }
-    size_t allocatedBytes() const override { return tuple->allocatedBytes(); }
-    void updateCheckpoint(ColumnCheckpoint & checkpoint) const override { tuple->updateCheckpoint(checkpoint); }
-    void rollback(const ColumnCheckpoint & checkpoint) override { tuple->rollback(checkpoint); }
-    ColumnCheckpointPtr getCheckpoint() const override { return tuple->getCheckpoint(); }
-
-    void forEachMutableSubcolumn(MutableColumnCallback callback) override;
-    void forEachMutableSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
-    void forEachSubcolumn(ColumnCallback callback) const override;
-    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
-    void finalize() override { tuple->finalize(); }
-
-    bool structureEquals(const IColumn & rhs) const override { return tuple->structureEquals(rhs); }
-    bool isFinalized() const override { return tuple->isFinalized(); }
-
-    /// Efficient access to the underlying tuple
-    const ColumnPtr & getTuple() const { return tuple; }
-    const IColumn & getTupleColumn() const { return *tuple; }
-    const ColumnTuple & getNestedData() const { return assert_cast<const ColumnTuple &>(getTupleColumn()); }
-    ColumnPtr & getTuple() { return tuple; }
-    IColumn & getTupleColumn() { return *tuple.get(); }
-    ColumnTuple & getNestedData() { return assert_cast<ColumnTuple &>(getTupleColumn()); }
-};
-
-}
diff --git a/src/Columns/ColumnReplicated.cpp b/src/Columns/ColumnReplicated.cpp
deleted file mode 100644
index 7250a5a6f622..000000000000
--- a/src/Columns/ColumnReplicated.cpp
+++ /dev/null
@@ -1,696 +0,0 @@
-#include <Columns/ColumnCompressed.h>
-#include <Columns/ColumnConst.h>
-#include <Columns/ColumnReplicated.h>
-#include <Common/WeakHash.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
-    extern const int LOGICAL_ERROR;
-}
-
-std::atomic<UInt64> ColumnReplicated::global_id_counter = 0;
-
-ColumnReplicated::ColumnReplicated(MutableColumnPtr && nested_column_)
-    : nested_column(std::move(nested_column_)), id(global_id_counter.fetch_add(1))
-{
-    indexes.insertIndexesRange(0, nested_column->size());
-}
-
-ColumnReplicated::ColumnReplicated(MutableColumnPtr && nested_column_, MutableColumnPtr && indexes_)
-    : nested_column(std::move(nested_column_))
-    , indexes(std::move(indexes_))
-    , id(global_id_counter.fetch_add(1))
-{
-}
-
-ColumnReplicated::ColumnReplicated(MutableColumnPtr && nested_column_, ColumnIndex && indexes_)
-    : nested_column(std::move(nested_column_))
-    , indexes(std::move(indexes_))
-    , id(global_id_counter.fetch_add(1))
-{
-}
-
-MutableColumnPtr ColumnReplicated::cloneResized(size_t new_size) const
-{
-    if (new_size == 0)
-        return cloneEmpty();
-
-    if (new_size == size())
-        return create(mutate(nested_column), mutate(indexes.getIndexes()));
-
-    /// If new size is larger than current size, we fill all new rows with default value of nested column.
-    if (new_size > size())
-    {
-        auto new_nested_column = mutate(nested_column);
-        new_nested_column->insertDefault();
-        auto new_indexes = ColumnIndex(mutate(indexes.getIndexes()));
-        new_indexes.insertManyIndexes(new_nested_column->size() - 1, new_size - size());
-        return create(std::move(new_nested_column), std::move(new_indexes));
-    }
-
-    auto res = create(nested_column->cloneEmpty());
-    res->insertRangeFrom(*this, 0, new_size);
-    return res;
-}
-
-MutableColumnPtr ColumnReplicated::cloneEmpty() const
-{
-    return create(nested_column->cloneEmpty(), indexes.getIndexes()->cloneEmpty());
-}
-
-bool ColumnReplicated::isDefaultAt(size_t n) const
-{
-    return nested_column->isDefaultAt(indexes.getIndexAt(n));
-}
-
-bool ColumnReplicated::isNullAt(size_t n) const
-{
-    return nested_column->isNullAt(indexes.getIndexAt(n));
-}
-
-Field ColumnReplicated::operator[](size_t n) const
-{
-    return (*nested_column)[indexes.getIndexAt(n)];
-}
-
-void ColumnReplicated::get(size_t n, Field & res) const
-{
-    nested_column->get(indexes.getIndexAt(n), res);
-}
-
-DataTypePtr ColumnReplicated::getValueNameAndTypeImpl(WriteBufferFromOwnString & name_buf, size_t n, const IColumn::Options & options) const
-{
-    return nested_column->getValueNameAndTypeImpl(name_buf, indexes.getIndexAt(n), options);
-}
-
-bool ColumnReplicated::getBool(size_t n) const
-{
-    return nested_column->getBool(indexes.getIndexAt(n));
-}
-
-Float64 ColumnReplicated::getFloat64(size_t n) const
-{
-    return nested_column->getFloat64(indexes.getIndexAt(n));
-}
-
-Float32 ColumnReplicated::getFloat32(size_t n) const
-{
-    return nested_column->getFloat32(indexes.getIndexAt(n));
-}
-
-UInt64 ColumnReplicated::getUInt(size_t n) const
-{
-    return nested_column->getUInt(indexes.getIndexAt(n));
-}
-
-Int64 ColumnReplicated::getInt(size_t n) const
-{
-    return nested_column->getInt(indexes.getIndexAt(n));
-}
-
-UInt64 ColumnReplicated::get64(size_t n) const
-{
-    return nested_column->get64(indexes.getIndexAt(n));
-}
-
-StringRef ColumnReplicated::getDataAt(size_t n) const
-{
-    return nested_column->getDataAt(indexes.getIndexAt(n));
-}
-
-ColumnPtr ColumnReplicated::convertToFullColumnIfReplicated() const
-{
-    return nested_column->index(*indexes.getIndexes(), 0);
-}
-
-void ColumnReplicated::insertData(const char * pos, size_t length)
-{
-    nested_column->insertData(pos, length);
-    indexes.insertIndex(nested_column->size() - 1);
-}
-
-StringRef ColumnReplicated::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
-{
-    return nested_column->serializeValueIntoArena(indexes.getIndexAt(n), arena, begin);
-}
-
-StringRef ColumnReplicated::serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const
-{
-    return nested_column->serializeAggregationStateValueIntoArena(indexes.getIndexAt(n), arena, begin);
-}
-
-char * ColumnReplicated::serializeValueIntoMemory(size_t n, char * memory) const
-{
-    return nested_column->serializeValueIntoMemory(indexes.getIndexAt(n), memory);
-}
-
-std::optional<size_t> ColumnReplicated::getSerializedValueSize(size_t n) const
-{
-    return nested_column->getSerializedValueSize(indexes.getIndexAt(n));
-}
-
-void ColumnReplicated::deserializeAndInsertFromArena(ReadBuffer & in)
-{
-    nested_column->deserializeAndInsertFromArena(in);
-    indexes.insertIndex(nested_column->size() - 1);
-}
-
-void ColumnReplicated::deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in)
-{
-    nested_column->deserializeAndInsertAggregationStateValueFromArena(in);
-    indexes.insertIndex(nested_column->size() - 1);
-}
-
-void ColumnReplicated::skipSerializedInArena(ReadBuffer & in) const
-{
-    nested_column->skipSerializedInArena(in);
-}
-
-#if !defined(DEBUG_OR_SANITIZER_BUILD)
-void ColumnReplicated::insertRangeFrom(const IColumn & src, size_t start, size_t length)
-#else
-void ColumnReplicated::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
-#endif
-{
-    if (length == 0)
-        return;
-
-    if (start + length > src.size())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Parameter out of bound in ColumnReplicated::insertRangeFrom method.");
-
-    if (const auto * src_replicated = typeid_cast<const ColumnReplicated *>(&src))
-    {
-        /// Optimization for case when we insert the whole column (may happen in squashing).
-        if (start == 0 && length == src_replicated->size())
-        {
-            indexes.insertIndexesRangeWithShift(*src_replicated->getIndexesColumn(), start, length, nested_column->size(), nested_column->size() + src_replicated->getNestedColumn()->size());
-            nested_column->insertRangeFrom(*src_replicated->getNestedColumn(), 0, src_replicated->getNestedColumn()->size());
-        }
-        else
-        {
-            /// Use insertion_cache to avoid copying of values from source nested column if
-            /// we already inserted them earlier and can use indexes of already inserted values.
-            auto & indexes_match = insertion_cache[src_replicated->id];
-
-            auto insert = [&](size_t, size_t src_index)
-            {
-                auto it = indexes_match.find(src_index);
-                if (it == indexes_match.end())
-                {
-                    nested_column->insertFrom(*src_replicated->nested_column, src_index);
-                    it = indexes_match.emplace(src_index, nested_column->size() - 1).first;
-                }
-
-                indexes.insertIndex(it->second);
-            };
-
-            src_replicated->indexes.callForIndexes(std::move(insert), start, start + length);
-        }
-    }
-    else
-    {
-        size_t old_size = nested_column->size();
-        nested_column->insertRangeFrom(src, start, length);
-        indexes.insertIndexesRange(old_size, length);
-    }
-}
-
-void ColumnReplicated::insert(const Field & x)
-{
-    nested_column->insert(x);
-    indexes.insertIndex(nested_column->size() - 1);
-}
-
-bool ColumnReplicated::tryInsert(const Field & x)
-{
-    if (nested_column->tryInsert(x))
-    {
-        indexes.insertIndex(nested_column->size() - 1);
-        return true;
-    }
-
-    return false;
-}
-
-#if !defined(DEBUG_OR_SANITIZER_BUILD)
-void ColumnReplicated::insertFrom(const IColumn & src, size_t n)
-#else
-void ColumnReplicated::doInsertFrom(const IColumn & src, size_t n)
-#endif
-{
-    if (const auto * src_replicated = typeid_cast<const ColumnReplicated *>(&src))
-    {
-        /// Use insertion_cache to avoid copying of values from source nested column if
-        /// we already inserted them earlier and can use indexes of already inserted values.
-        auto & indexes_match = insertion_cache[src_replicated->id];
-        auto src_index = src_replicated->indexes.getIndexAt(n);
-        auto it = indexes_match.find(src_index);
-        if (it == indexes_match.end())
-        {
-            nested_column->insertFrom(*src_replicated->nested_column, src_index);
-            it = indexes_match.emplace(src_index, nested_column->size() - 1).first;
-        }
-
-        indexes.insertIndex(it->second);
-    }
-    else
-    {
-        nested_column->insertFrom(src, n);
-        indexes.insertIndex(nested_column->size() - 1);
-    }
-}
-
-#if !defined(DEBUG_OR_SANITIZER_BUILD)
-void ColumnReplicated::insertManyFrom(const IColumn & src, size_t n, size_t length)
-#else
-void ColumnReplicated::doInsertManyFrom(const IColumn & src, size_t n, size_t length)
-#endif
-{
-    if (const auto * src_replicated = typeid_cast<const ColumnReplicated *>(&src))
-    {
-        /// Use insertion_cache to avoid copying of values from source nested column if
-        /// we already inserted them earlier and can use indexes of already inserted values.
-        auto & indexes_match = insertion_cache[src_replicated->id];
-        auto src_index = src_replicated->indexes.getIndexAt(n);
-        auto it = indexes_match.find(src_index);
-        if (it == indexes_match.end())
-        {
-            nested_column->insertFrom(*src_replicated->nested_column, src_index);
-            it = indexes_match.emplace(src_index, nested_column->size() - 1).first;
-        }
-
-        indexes.insertManyIndexes(it->second, length);
-    }
-    else
-    {
-        nested_column->insertFrom(src, n);
-        indexes.insertManyIndexes(nested_column->size() - 1, length);
-    }
-}
-
-void ColumnReplicated::insertDefault()
-{
-    nested_column->insertDefault();
-    indexes.insertIndex(nested_column->size() - 1);
-}
-
-void ColumnReplicated::insertManyDefaults(size_t length)
-{
-    nested_column->insertDefault();
-    indexes.insertManyIndexes(nested_column->size() - 1, length);
-}
-
-void ColumnReplicated::popBack(size_t n)
-{
-    indexes.popBack(n);
-    nested_column = indexes.removeUnusedRowsInIndexedData(std::move(nested_column));
-}
-
-ColumnPtr ColumnReplicated::filter(const Filter & filt, ssize_t result_size_hint) const
-{
-    if (size() != filt.size())
-        throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of filter ({}) doesn't match size of column ({})", filt.size(), size());
-
-    auto filtered_indexes = ColumnIndex(indexes.getIndexes()->filter(filt, result_size_hint));
-    auto filtered_nested_column = filtered_indexes.removeUnusedRowsInIndexedData(nested_column);
-    return create(filtered_nested_column, std::move(filtered_indexes));
-}
-
-void ColumnReplicated::expand(const Filter & mask, bool inverted)
-{
-    indexes.expand(mask, inverted);
-}
-
-ColumnPtr ColumnReplicated::permute(const Permutation & perm, size_t limit) const
-{
-    if (size() != perm.size())
-        throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of permutation ({}) doesn't match size of column ({})", perm.size(), size());
-
-    auto permuted_indexes = ColumnIndex(indexes.getIndexes()->permute(perm, limit));
-    auto filtered_nested_column = permuted_indexes.removeUnusedRowsInIndexedData(nested_column);
-    return create(filtered_nested_column, std::move(permuted_indexes));
-}
-
-ColumnPtr ColumnReplicated::index(const IColumn & res_indexes, size_t limit) const
-{
-    auto indexed_indexes = ColumnIndex(indexes.getIndexes()->index(res_indexes, limit));
-    auto filtered_nested_column = indexed_indexes.removeUnusedRowsInIndexedData(nested_column);
-    return create(filtered_nested_column, std::move(indexed_indexes));
-}
-
-#if !defined(DEBUG_OR_SANITIZER_BUILD)
-int ColumnReplicated::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
-#else
-int ColumnReplicated::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
-#endif
-{
-    if (const auto * rhs_replicated = typeid_cast<const ColumnReplicated *>(&rhs))
-        return nested_column->compareAt(indexes.getIndexAt(n), rhs_replicated->indexes.getIndexAt(m), *rhs_replicated->nested_column, nan_direction_hint);
-
-    return nested_column->compareAt(indexes.getIndexAt(n), m, rhs, nan_direction_hint);
-}
-
-int ColumnReplicated::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const
-{
-    if (const auto * rhs_replicated = typeid_cast<const ColumnReplicated *>(&rhs))
-        return nested_column->compareAtWithCollation(indexes.getIndexAt(n), rhs_replicated->indexes.getIndexAt(m), *rhs_replicated->nested_column, nan_direction_hint, collator);
-
-    return nested_column->compareAtWithCollation(indexes.getIndexAt(n), m, rhs, nan_direction_hint, collator);
-}
-
-bool ColumnReplicated::hasEqualValues() const
-{
-    return indexes.getIndexes()->hasEqualValues();
-}
-
-struct ColumnReplicated::ComparatorBase
-{
-    const ColumnReplicated & parent;
-    int nan_direction_hint;
-
-    ComparatorBase(const ColumnReplicated & parent_, int nan_direction_hint_)
-        : parent(parent_), nan_direction_hint(nan_direction_hint_)
-    {
-    }
-
-    ALWAYS_INLINE int compare(size_t lhs, size_t rhs) const
-    {
-        int res = parent.compareAt(lhs, rhs, parent, nan_direction_hint);
-
-        return res;
-    }
-};
-
-struct ColumnReplicated::ComparatorCollationBase
-{
-    const ColumnReplicated & parent;
-    int nan_direction_hint;
-    const Collator * collator;
-
-    ComparatorCollationBase(const ColumnReplicated & parent_, int nan_direction_hint_, const Collator * collator_)
-        : parent(parent_), nan_direction_hint(nan_direction_hint_), collator(collator_)
-    {
-    }
-
-    ALWAYS_INLINE int compare(size_t lhs, size_t rhs) const
-    {
-        int res = parent.compareAtWithCollation(lhs, rhs, parent, nan_direction_hint, *collator);
-
-        return res;
-    }
-};
-
-void ColumnReplicated::getPermutation(PermutationSortDirection direction, PermutationSortStability stability,
-                                size_t limit, int nan_direction_hint, Permutation & res) const
-{
-    if (direction == PermutationSortDirection::Ascending && stability == PermutationSortStability::Unstable)
-        getPermutationImpl(limit, res, ComparatorAscendingUnstable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
-    else if (direction == PermutationSortDirection::Ascending && stability == PermutationSortStability::Stable)
-        getPermutationImpl(limit, res, ComparatorAscendingStable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
-    else if (direction == PermutationSortDirection::Descending && stability == PermutationSortStability::Unstable)
-        getPermutationImpl(limit, res, ComparatorDescendingUnstable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
-    else if (direction == PermutationSortDirection::Descending && stability == PermutationSortStability::Stable)
-        getPermutationImpl(limit, res, ComparatorDescendingStable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
-}
-
-void ColumnReplicated::updatePermutation(PermutationSortDirection direction, PermutationSortStability stability,
-                                size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const
-{
-    auto comparator_equal = ComparatorEqual(*this, nan_direction_hint);
-
-    if (direction == PermutationSortDirection::Ascending && stability == PermutationSortStability::Unstable)
-        updatePermutationImpl(limit, res, equal_ranges, ComparatorAscendingUnstable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort());
-    else if (direction == PermutationSortDirection::Ascending && stability == PermutationSortStability::Stable)
-        updatePermutationImpl(limit, res, equal_ranges, ComparatorAscendingStable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort());
-    else if (direction == PermutationSortDirection::Descending && stability == PermutationSortStability::Unstable)
-        updatePermutationImpl(limit, res, equal_ranges, ComparatorDescendingUnstable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort());
-    else if (direction == PermutationSortDirection::Descending && stability == PermutationSortStability::Stable)
-        updatePermutationImpl(limit, res, equal_ranges, ComparatorDescendingStable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort());
-}
-
-void ColumnReplicated::getPermutationWithCollation(const Collator & collator, PermutationSortDirection direction, PermutationSortStability stability,
-                                            size_t limit, int nan_direction_hint, Permutation & res) const
-{
-    if (direction == PermutationSortDirection::Ascending && stability == PermutationSortStability::Unstable)
-        getPermutationImpl(limit, res, ComparatorCollationAscendingUnstable(*this, nan_direction_hint, &collator), DefaultSort(), DefaultPartialSort());
-    else if (direction == PermutationSortDirection::Ascending && stability == PermutationSortStability::Stable)
-        getPermutationImpl(limit, res, ComparatorCollationAscendingStable(*this, nan_direction_hint, &collator), DefaultSort(), DefaultPartialSort());
-    else if (direction == PermutationSortDirection::Descending && stability == PermutationSortStability::Unstable)
-        getPermutationImpl(limit, res, ComparatorCollationDescendingUnstable(*this, nan_direction_hint, &collator), DefaultSort(), DefaultPartialSort());
-    else if (direction == PermutationSortDirection::Descending && stability == PermutationSortStability::Stable)
-        getPermutationImpl(limit, res, ComparatorCollationDescendingStable(*this, nan_direction_hint, &collator), DefaultSort(), DefaultPartialSort());
-}
-
-void ColumnReplicated::updatePermutationWithCollation(const Collator & collator, PermutationSortDirection direction, PermutationSortStability stability,
-                                            size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const
-{
-    auto comparator_equal = ComparatorCollationEqual(*this, nan_direction_hint, &collator);
-
-    if (direction == PermutationSortDirection::Ascending && stability == PermutationSortStability::Unstable)
-        updatePermutationImpl(
-            limit,
-            res,
-            equal_ranges,
-            ComparatorCollationAscendingUnstable(*this, nan_direction_hint, &collator),
-            comparator_equal,
-            DefaultSort(),
-            DefaultPartialSort());
-    else if (direction == PermutationSortDirection::Ascending && stability == PermutationSortStability::Stable)
-        updatePermutationImpl(
-            limit,
-            res,
-            equal_ranges,
-            ComparatorCollationAscendingStable(*this, nan_direction_hint, &collator),
-            comparator_equal,
-            DefaultSort(),
-            DefaultPartialSort());
-    else if (direction == PermutationSortDirection::Descending && stability == PermutationSortStability::Unstable)
-        updatePermutationImpl(
-            limit,
-            res,
-            equal_ranges,
-            ComparatorCollationDescendingUnstable(*this, nan_direction_hint, &collator),
-            comparator_equal,
-            DefaultSort(),
-            DefaultPartialSort());
-    else if (direction == PermutationSortDirection::Descending && stability == PermutationSortStability::Stable)
-        updatePermutationImpl(
-            limit,
-            res,
-            equal_ranges,
-            ComparatorCollationDescendingStable(*this, nan_direction_hint, &collator),
-            comparator_equal,
-            DefaultSort(),
-            DefaultPartialSort());
-}
-
-size_t ColumnReplicated::byteSize() const
-{
-    return indexes.getIndexes()->byteSize() + nested_column->byteSize();
-}
-
-size_t ColumnReplicated::byteSizeAt(size_t n) const
-{
-    return nested_column->byteSizeAt(indexes.getIndexAt(n));
-}
-
-size_t ColumnReplicated::allocatedBytes() const
-{
-    return indexes.getIndexes()->allocatedBytes() + nested_column->allocatedBytes();
-}
-
-void ColumnReplicated::protect()
-{
-    indexes.getIndexesPtr()->protect();
-    nested_column->protect();
-}
-
-ColumnPtr ColumnReplicated::replicate(const Offsets & offsets) const
-{
-    auto replicated_indexes = ColumnIndex(indexes.getIndexes()->replicate(offsets));
-    auto filtered_nested_column = replicated_indexes.removeUnusedRowsInIndexedData(nested_column);
-    return create(filtered_nested_column, std::move(replicated_indexes));
-}
-
-void ColumnReplicated::updateHashWithValue(size_t n, SipHash & hash) const
-{
-    nested_column->updateHashWithValue(indexes.getIndexAt(n), hash);
-}
-
-WeakHash32 ColumnReplicated::getWeakHash32() const
-{
-    WeakHash32 nested_column_hash = nested_column->getWeakHash32();
-    return indexes.getWeakHash(nested_column_hash);
-}
-
-void ColumnReplicated::updateHashFast(SipHash & hash) const
-{
-    indexes.getIndexes()->updateHashFast(hash);
-    nested_column->updateHashFast(hash);
-}
-
-void ColumnReplicated::getExtremes(Field & min, Field & max) const
-{
-    /// It might happen that some indexes are unused, so we cannot call nested_column->getExtremes.
-    nested_column->index(*indexes.getIndexes(), 0)->getExtremes(min, max);
-}
-
-void ColumnReplicated::getIndicesOfNonDefaultRows(Offsets & result_indexes, size_t from, size_t limit) const
-{
-    PaddedPODArray<UInt8> default_values_mask(nested_column->size());
-    for (size_t i = 0; i != nested_column->size(); ++i)
-        default_values_mask[i] = !nested_column->isDefaultAt(i);
-
-    size_t to = limit && from + limit < size() ? from + limit : size();
-    indexes.getIndexesByMask(result_indexes, default_values_mask, from, to);
-}
-
-UInt64 ColumnReplicated::getNumberOfDefaultRows() const
-{
-    std::unordered_set<size_t> indexes_of_default_values;
-    for (size_t i = 0; i != nested_column->size(); ++i)
-    {
-        if (nested_column->isDefaultAt(i))
-            indexes_of_default_values.insert(i);
-    }
-
-    size_t result = 0;
-    auto add = [&](size_t, size_t index)
-    {
-        result += indexes_of_default_values.contains(index);
-    };
-
-    indexes.callForIndexes(std::move(add), 0, size());
-    return result;
-}
-
-ColumnPtr ColumnReplicated::compress(bool force_compression) const
-{
-    auto nested_column_compressed = nested_column->compress(force_compression);
-    auto indexes_compressed = indexes.getIndexes()->compress(force_compression);
-
-    size_t byte_size = nested_column_compressed->byteSize() + indexes_compressed->byteSize();
-
-    return ColumnCompressed::create(size(), byte_size,
-        [my_nested_column_compressed = std::move(nested_column_compressed), my_indexes_compressed = std::move(indexes_compressed)]
-        {
-            return ColumnReplicated::create(my_nested_column_compressed->decompress(), my_indexes_compressed->decompress());
-        });
-}
-
-ColumnCheckpointPtr ColumnReplicated::getCheckpoint() const
-{
-    return std::make_shared<ColumnCheckpointWithNested>(size(), nested_column->getCheckpoint());
-}
-
-void ColumnReplicated::updateCheckpoint(ColumnCheckpoint & checkpoint) const
-{
-    checkpoint.size = size();
-    nested_column->updateCheckpoint(*assert_cast<ColumnCheckpointWithNested &>(checkpoint).nested);
-}
-
-void ColumnReplicated::rollback(const ColumnCheckpoint & checkpoint)
-{
-    const auto & nested = *assert_cast<const ColumnCheckpointWithNested &>(checkpoint).nested;
-
-    nested_column->rollback(nested);
-    indexes.resizeAssumeReserve(nested.size);
-}
-
-void ColumnReplicated::forEachMutableSubcolumn(MutableColumnCallback callback)
-{
-    callback(nested_column);
-    callback(indexes.getIndexesPtr());
-}
-
-void ColumnReplicated::forEachMutableSubcolumnRecursively(RecursiveMutableColumnCallback callback)
-{
-    callback(*nested_column);
-    nested_column->forEachMutableSubcolumnRecursively(callback);
-    callback(*indexes.getIndexesPtr());
-    indexes.getIndexesPtr()->forEachMutableSubcolumnRecursively(callback);
-}
-
-void ColumnReplicated::forEachSubcolumn(ColumnCallback callback) const
-{
-    callback(nested_column);
-    callback(indexes.getIndexes());
-}
-
-void ColumnReplicated::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
-{
-    callback(*nested_column);
-    nested_column->forEachSubcolumnRecursively(callback);
-    callback(*indexes.getIndexes());
-    indexes.getIndexes()->forEachSubcolumnRecursively(callback);
-}
-
-bool ColumnReplicated::structureEquals(const IColumn & rhs) const
-{
-    if (const auto * rhs_replicated = typeid_cast<const ColumnReplicated *>(&rhs))
-        return nested_column->structureEquals(*rhs_replicated->nested_column);
-    return false;
-}
-
-void ColumnReplicated::takeDynamicStructureFromSourceColumns(const Columns & source_columns, std::optional<size_t> max_dynamic_subcolumns)
-{
-    Columns source_nested_columns;
-    source_nested_columns.reserve(source_columns.size());
-    for (const auto & source_column : source_columns)
-    {
-        if (const auto * rhs_replicated = typeid_cast<const ColumnReplicated *>(source_column.get()))
-            source_nested_columns.emplace_back(rhs_replicated->nested_column);
-        else
-            source_nested_columns.emplace_back(source_column);
-    }
-
-    nested_column->takeDynamicStructureFromSourceColumns(source_nested_columns, max_dynamic_subcolumns);
-}
-
-void ColumnReplicated::takeDynamicStructureFromColumn(const ColumnPtr & source_column)
-{
-    if (const auto * rhs_replicated = typeid_cast<const ColumnReplicated *>(source_column.get()))
-        nested_column->takeDynamicStructureFromColumn(rhs_replicated->nested_column);
-    else
-        nested_column->takeDynamicStructureFromColumn(source_column);
-}
-
-namespace
-{
-
-template <typename T>
-ColumnPtr convertOffsetsToIndexesImpl(const IColumn::Offsets & offsets)
-{
-    auto result = ColumnVector<T>::create();
-    auto & data = result->getData();
-    data.reserve_exact(offsets.back());
-    for (size_t i = 0; i != offsets.size(); ++i)
-        data.resize_fill(data.size() + offsets[i] - offsets[i - 1], i);
-    return result;
-}
-
-}
-
-ColumnPtr convertOffsetsToIndexes(const IColumn::Offsets & offsets)
-{
-    size_t max_index = offsets.size();
-    if (max_index <= std::numeric_limits<UInt8>::max())
-        return convertOffsetsToIndexesImpl<UInt8>(offsets);
-    if (max_index <= std::numeric_limits<UInt16>::max())
-        return convertOffsetsToIndexesImpl<UInt16>(offsets);
-    if (max_index <= std::numeric_limits<UInt32>::max())
-        return convertOffsetsToIndexesImpl<UInt32>(offsets);
-    return convertOffsetsToIndexesImpl<UInt64>(offsets);
-}
-
-bool isLazyReplicationUseful(const ColumnPtr & column)
-{
-    return !column->isConst() && !column->isReplicated() && !column->lowCardinality() && (!column->isFixedAndContiguous() || column->sizeOfValueIfFixed() > 8);
-}
-
-
-}
diff --git a/src/Columns/ColumnReplicated.h b/src/Columns/ColumnReplicated.h
deleted file mode 100644
index bde48cdac5d0..000000000000
--- a/src/Columns/ColumnReplicated.h
+++ /dev/null
@@ -1,224 +0,0 @@
-#pragma once
-
-#include <Columns/IColumn.h>
-#include <Columns/ColumnIndex.h>
-
-class Collator;
-
-namespace DB
-{
-
-
-/** Column for replicated representation.
- *  It stores original column and indexes in this column.
- *  Used to perform lazy column replication.
- */
-class ColumnReplicated final : public COWHelper<IColumnHelper<ColumnReplicated>, ColumnReplicated>
-{
-private:
-    friend class COWHelper<IColumnHelper<ColumnReplicated>, ColumnReplicated>;
-
-    explicit ColumnReplicated(MutableColumnPtr && nested_column_);
-    ColumnReplicated(MutableColumnPtr && nested_column_, MutableColumnPtr && indexes_);
-    ColumnReplicated(MutableColumnPtr && nested_column_, ColumnIndex && indexes_);
-    ColumnReplicated(const ColumnReplicated &) = default;
-
-public:
-    using Base = COWHelper<IColumnHelper<ColumnReplicated>, ColumnReplicated>;
-
-    static Ptr create(const ColumnPtr & nested_column_, const ColumnPtr & indexes_)
-    {
-        return Base::create(nested_column_->assumeMutable(), indexes_->assumeMutable());
-    }
-
-    static Ptr create(const ColumnPtr & nested_column_)
-    {
-        return Base::create(nested_column_->assumeMutable());
-    }
-
-    static MutablePtr create(MutableColumnPtr && nested_column_, MutableColumnPtr && indexes_)
-    {
-        return Base::create(std::move(nested_column_), std::move(indexes_));
-    }
-
-    static MutablePtr create(MutableColumnPtr && nested_column_)
-    {
-        return Base::create(std::move(nested_column_));
-    }
-
-    static MutablePtr create(MutableColumnPtr && nested_column_, ColumnIndex && indexes_)
-    {
-        return Base::create(std::move(nested_column_), std::move(indexes_));
-    }
-
-    static Ptr create(ColumnPtr & nested_column_, ColumnIndex && indexes_)
-    {
-        return Base::create(nested_column_->assumeMutable(), std::move(indexes_));
-    }
-
-    bool isReplicated() const override { return true; }
-    const char * getFamilyName() const override { return "Replicated"; }
-    std::string getName() const override { return "Replicated(" + nested_column->getName() + ")"; }
-    TypeIndex getDataType() const override { return nested_column->getDataType(); }
-    MutableColumnPtr cloneResized(size_t new_size) const override;
-    MutableColumnPtr cloneEmpty() const override;
-    size_t size() const override { return indexes.getIndexes()->size(); }
-    bool isDefaultAt(size_t n) const override;
-    bool isNullAt(size_t n) const override;
-    Field operator[](size_t n) const override;
-    void get(size_t n, Field & res) const override;
-    DataTypePtr getValueNameAndTypeImpl(WriteBufferFromOwnString & name_buf, size_t n, const IColumn::Options & options) const override;
-    bool getBool(size_t n) const override;
-    Float64 getFloat64(size_t n) const override;
-    Float32 getFloat32(size_t n) const override;
-    UInt64 getUInt(size_t n) const override;
-    Int64 getInt(size_t n) const override;
-    UInt64 get64(size_t n) const override;
-    StringRef getDataAt(size_t n) const override;
-
-    ColumnPtr convertToFullColumnIfReplicated() const override;
-
-    void insertData(const char * pos, size_t length) override;
-    StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
-    StringRef serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
-    char * serializeValueIntoMemory(size_t n, char * memory) const override;
-    std::optional<size_t> getSerializedValueSize(size_t n) const override;
-    void deserializeAndInsertFromArena(ReadBuffer & in) override;
-    void deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in) override;
-    void skipSerializedInArena(ReadBuffer & in) const override;
-#if !defined(DEBUG_OR_SANITIZER_BUILD)
-    void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
-#else
-    void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
-#endif
-    void insert(const Field & x) override;
-    bool tryInsert(const Field & x) override;
-#if !defined(DEBUG_OR_SANITIZER_BUILD)
-    void insertFrom(const IColumn & src, size_t n) override;
-#else
-    void doInsertFrom(const IColumn & src, size_t n) override;
-#endif
-#if !defined(DEBUG_OR_SANITIZER_BUILD)
-    void insertManyFrom(const IColumn & src, size_t n, size_t length) override;
-#else
-    void doInsertManyFrom(const IColumn & src, size_t n, size_t length) override;
-#endif
-    void insertDefault() override;
-    void insertManyDefaults(size_t length) override;
-
-    void popBack(size_t n) override;
-    ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
-    void expand(const Filter & mask, bool inverted) override;
-    ColumnPtr permute(const Permutation & perm, size_t limit) const override;
-
-    ColumnPtr index(const IColumn & res_indexes, size_t limit) const override;
-
-#if !defined(DEBUG_OR_SANITIZER_BUILD)
-    int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
-#else
-    int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
-#endif
-
-    int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override;
-    bool hasEqualValues() const override;
-
-
-    struct ComparatorBase;
-
-    using ComparatorAscendingUnstable = ComparatorAscendingUnstableImpl<ComparatorBase>;
-    using ComparatorAscendingStable = ComparatorAscendingStableImpl<ComparatorBase>;
-    using ComparatorDescendingUnstable = ComparatorDescendingUnstableImpl<ComparatorBase>;
-    using ComparatorDescendingStable = ComparatorDescendingStableImpl<ComparatorBase>;
-    using ComparatorEqual = ComparatorEqualImpl<ComparatorBase>;
-
-    struct ComparatorCollationBase;
-
-    using ComparatorCollationAscendingUnstable = ComparatorAscendingUnstableImpl<ComparatorCollationBase>;
-    using ComparatorCollationAscendingStable = ComparatorAscendingStableImpl<ComparatorCollationBase>;
-    using ComparatorCollationDescendingUnstable = ComparatorDescendingUnstableImpl<ComparatorCollationBase>;
-    using ComparatorCollationDescendingStable = ComparatorDescendingStableImpl<ComparatorCollationBase>;
-    using ComparatorCollationEqual = ComparatorEqualImpl<ComparatorCollationBase>;
-
-    void getPermutation(PermutationSortDirection direction, PermutationSortStability stability,
-                        size_t limit, int nan_direction_hint, Permutation & res) const override;
-
-    void updatePermutation(PermutationSortDirection direction, PermutationSortStability stability,
-                        size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const override;
-
-    void getPermutationWithCollation(const Collator & collator, PermutationSortDirection direction, PermutationSortStability stability,
-                        size_t limit, int nan_direction_hint, Permutation & res) const override;
-
-    void updatePermutationWithCollation(const Collator & collator, PermutationSortDirection direction, PermutationSortStability stability,
-                        size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
-
-    size_t byteSize() const override;
-    size_t byteSizeAt(size_t n) const override;
-    size_t allocatedBytes() const override;
-    void protect() override;
-    ColumnPtr replicate(const Offsets & offsets) const override;
-    void updateHashWithValue(size_t n, SipHash & hash) const override;
-    WeakHash32 getWeakHash32() const override;
-    void updateHashFast(SipHash & hash) const override;
-    void getExtremes(Field & min, Field & max) const override;
-
-    void getIndicesOfNonDefaultRows(Offsets & result_indexes, size_t from, size_t limit) const override;
-    UInt64 getNumberOfDefaultRows() const override;
-
-    ColumnPtr compress(bool force_compression) const override;
-
-    ColumnCheckpointPtr getCheckpoint() const override;
-    void updateCheckpoint(ColumnCheckpoint & checkpoint) const override;
-    void rollback(const ColumnCheckpoint & checkpoint) override;
-
-    void forEachMutableSubcolumn(MutableColumnCallback callback) override;
-    void forEachMutableSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
-    void forEachSubcolumn(ColumnCallback callback) const override;
-    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
-
-    bool structureEquals(const IColumn & rhs) const override;
-
-    bool isNullable() const override { return nested_column->isNullable(); }
-    bool isFixedAndContiguous() const override { return false; }
-    bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); }
-    size_t sizeOfValueIfFixed() const override { return nested_column->sizeOfValueIfFixed(); }
-    bool isCollationSupported() const override { return nested_column->isCollationSupported(); }
-
-    bool hasDynamicStructure() const override { return nested_column->hasDynamicStructure(); }
-    void takeDynamicStructureFromSourceColumns(const Columns & source_columns, std::optional<size_t> max_dynamic_subcolumns) override;
-    void takeDynamicStructureFromColumn(const ColumnPtr & source_column) override;
-
-    const ColumnIndex & getIndexes() const { return indexes; }
-    ColumnIndex & getIndexes() { return indexes; }
-
-    const ColumnPtr & getIndexesColumn() const { return indexes.getIndexes(); }
-
-    const ColumnPtr & getNestedColumn() const { return nested_column; }
-    WrappedPtr & getNestedColumn() { return nested_column; }
-
-private:
-    WrappedPtr nested_column;
-    ColumnIndex indexes;
-
-    /// Unique id taken from static global_id_counter field at creation.
-    /// It's used as the key in the insertion cache.
-    UInt64 id;
-    /// During inserts into ColumnReplicated from another ColumnReplicated we remember
-    /// what values at what index we already inserted to avoid copying of these values on each call
-    /// of insertFrom/insertRangeFrom/insertManyFrom.
-    /// It helps to reduce memory usage during sorting/merge-sorting of replicated columns where
-    /// we create empty ColumnReplicated and do insertFrom/insertRangeFrom/insertManyFrom from
-    /// source columns.
-    /// Mapping is the following: id -> (source_index -> inserted_index).
-    std::unordered_map<UInt64, std::unordered_map<size_t, size_t>> insertion_cache;
-
-    /// Global counter used to create a unique id for each ColumnReplicated instance.
-    static std::atomic<UInt64> global_id_counter;
-};
-
-ColumnPtr recursiveRemoveReplicated(const ColumnPtr & column);
-ColumnPtr convertOffsetsToIndexes(const IColumn::Offsets & offsets);
-
-/// For some columns like Const/LowCardinality/Int* lazy replication is useless and can lead to worse performance.
-bool isLazyReplicationUseful(const ColumnPtr & column);
-
-}

From 3e13a802679ab87f12164ad5eb0c7d852637da9d Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 20 Nov 2025 20:15:48 +0000
Subject: [PATCH 102/112] Backport #90286 to 25.8: Add more diagnostic to
 logical error in JSON

---
 src/Columns/ColumnObject.cpp | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp
index 12c7bd54550a..9deef413f084 100644
--- a/src/Columns/ColumnObject.cpp
+++ b/src/Columns/ColumnObject.cpp
@@ -729,7 +729,31 @@ void ColumnObject::insertFromSharedDataAndFillRemainingDynamicPaths(const DB::Co
             {
                 /// Deserialize binary value into dynamic column from shared data.
                 if (it->second->size() != current_size)
-                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected size of dynamic path {}: {} != {}. It may indicate duplicated data for this path", path, it->second->size(), current_size);
+                {
+                    if (src_object_column.getDynamicPaths().contains(path))
+                        throw Exception(
+                            ErrorCodes::LOGICAL_ERROR,
+                            "Path {} is present both in shared data and in dynamic paths at row {}. Dynamic path value type: {}. Shared data path value type: {}",
+                            path,
+                            row,
+                            src_object_column.getDynamicPathsPtrs().at(toString(path))->getTypeNameAt(row),
+                            decodeDataType(src_shared_data_values->getDataAt(i).toString())->getName());
+
+                    for (size_t j = offset; j != end; ++j)
+                    {
+                        if (j != i && src_shared_data_paths->getDataAt(j).toView() == path)
+                            throw Exception(
+                            ErrorCodes::LOGICAL_ERROR,
+                            "Path {} is duplicated inside shared data at offsets {} and {}. First value type: {}. Second value type: {}",
+                            path,
+                            i,
+                            j,
+                            decodeDataType(src_shared_data_values->getDataAt(i).toString())->getName(),
+                            decodeDataType(src_shared_data_values->getDataAt(j).toString())->getName());
+                    }
+
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected size of dynamic path {}: {} != {}", path, it->second->size(), current_size);
+                }
                 deserializeValueFromSharedData(src_shared_data_values, i, *it->second);
             }
             else

From ea6d6f1733549c5015b8a30fb358332fdc07934b Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 21 Nov 2025 12:17:54 +0000
Subject: [PATCH 103/112] Backport #88441 to 25.8: Backporting function
 firstNonDefault

---
 src/Functions/firstNonDefault.cpp             | 153 ++++++++++++++++++
 ...03522_function_first_non_default.reference |  70 ++++++++
 .../03522_function_first_non_default.sql      | 105 ++++++++++++
 3 files changed, 328 insertions(+)
 create mode 100644 src/Functions/firstNonDefault.cpp
 create mode 100644 tests/queries/0_stateless/03522_function_first_non_default.reference
 create mode 100644 tests/queries/0_stateless/03522_function_first_non_default.sql

diff --git a/src/Functions/firstNonDefault.cpp b/src/Functions/firstNonDefault.cpp
new file mode 100644
index 000000000000..296532906364
--- /dev/null
+++ b/src/Functions/firstNonDefault.cpp
@@ -0,0 +1,153 @@
+#include <Columns/IColumn.h>
+
+#include <DataTypes/getLeastSupertype.h>
+
+#include <Functions/FunctionFactory.h>
+#include <Functions/IFunction.h>
+
+#include <Interpreters/castColumn.h>
+#include <Interpreters/Context_fwd.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+namespace
+{
+
+/// Implements the function which takes a set of arguments and
+/// returns the value of the leftmost non-falsey argument.
+/// If all arguments are falsey, returns the default value for the result type.
+/// Result type is the supertype of all arguments.
+class FunctionFirstNonDefault : public IFunction
+{
+public:
+    static constexpr auto name = "firstNonDefault";
+
+    static FunctionPtr create(ContextPtr)
+    {
+        return std::make_shared<FunctionFirstNonDefault>();
+    }
+
+    FunctionFirstNonDefault() = default;
+
+    String getName() const override { return name; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
+    bool useDefaultImplementationForNulls() const override { return false; }
+    bool isVariadic() const override { return true; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
+    size_t getNumberOfArguments() const override { return 0; }
+
+    ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t number_of_arguments) const override
+    {
+        ColumnNumbers args;
+        for (size_t i = 0; i + 1 < number_of_arguments; ++i)
+            args.push_back(i);
+        return args;
+    }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (arguments.empty())
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Function {} requires at least one argument", getName());
+        size_t max_args = 1024;
+        if (arguments.size() > max_args)
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Function {} requires at most {} arguments, got {}", getName(), max_args, arguments.size());
+
+        if (arguments.size() == 1)
+            return arguments[0];
+
+        return getLeastSupertype(arguments);
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
+    {
+        if (arguments.size() == 1)
+            return arguments[0].column;
+
+        size_t num_columns = arguments.size();
+
+        auto result_col = result_type->createColumn();
+        result_col->reserve(input_rows_count);
+
+        /// Cast all arguments to the result type
+        /// Use this columns to insert values into the result column
+        std::vector<ColumnPtr> casted_columns;
+        casted_columns.reserve(num_columns);
+        for (const auto & arg : arguments)
+        {
+            auto casted_column = castColumn(arg, result_type);
+            casted_column = casted_column->convertToFullColumnIfConst();
+            casted_column = casted_column->convertToFullColumnIfSparse();
+
+            if (casted_column->getDataType() != result_col->getDataType())
+            {
+                throw Exception(ErrorCodes::LOGICAL_ERROR,
+                    "All arguments must cast to the same type, got {} and {} for result type {}",
+                    casted_column->dumpStructure(), result_col->dumpStructure(), result_type->getName());
+            }
+
+            casted_columns.push_back(std::move(casted_column));
+        }
+
+        for (size_t row = 0; row < input_rows_count; ++row)
+        {
+            bool found = false;
+
+            /// Check each argument for truthiness
+            for (size_t arg_idx = 0; !found && arg_idx < num_columns; ++arg_idx)
+            {
+                /// A value is considered "falsey" if it's NULL or the default value for its type
+                /// For example:
+                /// - for numeric types, the default is 0
+                /// - for strings, the default is ''
+                /// - for arrays, the default is []
+                if (!arguments[arg_idx].column->isDefaultAt(row))
+                {
+                    /// Found a truthy value, insert it into the result
+                    result_col->insertFrom(*casted_columns[arg_idx], row);
+                    found = true;
+                }
+            }
+
+            if (!found)
+                result_col->insertDefault();
+        }
+        return result_col;
+    }
+};
+
+}
+
+REGISTER_FUNCTION(FirstNonDefault)
+{
+    FunctionDocumentation doc;
+    doc.description = "Returns the first non-default value from a set of arguments";
+    doc.arguments = {
+        {"arg1", "The first argument to check"},
+        {"arg2", "The second argument to check"},
+        {"...", "Additional arguments to check"},
+    };
+
+    doc.returned_value = FunctionDocumentation::ReturnedValue{"Result type is the supertype of all arguments", {}};
+    doc.examples = {
+        {"integers", "SELECT firstNonDefault(0, 1, 2)", "1"},
+        {"strings", "SELECT firstNonDefault('', 'hello', 'world')", "'hello'"},
+        {"nulls", "SELECT firstNonDefault(NULL, 0 :: UInt8, 1 :: UInt8)", "1"},
+        {"nullable zero", "SELECT firstNonDefault(NULL, 0 :: Nullable(UInt8), 1 :: Nullable(UInt8))", "0"},
+    };
+    doc.category = {FunctionDocumentation::Category::Null};
+
+    doc.introduced_in = {25, 9};
+    factory.registerFunction<FunctionFirstNonDefault>(doc, FunctionFactory::Case::Insensitive);
+}
+
+}
diff --git a/tests/queries/0_stateless/03522_function_first_non_default.reference b/tests/queries/0_stateless/03522_function_first_non_default.reference
new file mode 100644
index 000000000000..dce4625c562e
--- /dev/null
+++ b/tests/queries/0_stateless/03522_function_first_non_default.reference
@@ -0,0 +1,70 @@
+43
+0
+0
+\N
+true
+[1,2,3]
+foo	Nullable(String)
+42	UInt32
+42	Int32
+42	UInt128
+42	Int128
+42	Int16
+42	Int64
+42.5	Float64
+42	Float64
+0	Nullable(Int32)
+42	Nullable(Int32)
+0	String
+0	String
+[0]	Array(Int32)
+['']	Array(String)
+42	Nullable(UInt8)
+0	Nullable(String)
+\N	Nullable(Nothing)
+0	UInt8
+	String
+[]	Array(UInt8)
+0
+1
+2
+0	0	0	Nullable(Int32)
+0	0	0	Nullable(Int32)
+0	0	0	Nullable(Int32)
+0	2	0	Nullable(Int32)
+0	\N	0	Nullable(Int32)
+1	0	1	Nullable(Int32)
+\N	0	0	Nullable(Int32)
+\N	\N	\N	Nullable(Int32)
+		Nullable(String)
+		Nullable(String)
+		Nullable(String)
+		Nullable(String)
+		Nullable(String)
+hello	hello	Nullable(String)
+\N	default	Nullable(String)
+\N	default	Nullable(String)
+[]	[99,100]	Array(Int32)
+[]	[99,100]	Array(Int32)
+[]	[99,100]	Array(Int32)
+[]	[99,100]	Array(Int32)
+[]	[99,100]	Array(Int32)
+[]	[99,100]	Array(Int32)
+[]	[99,100]	Array(Int32)
+[1,2,3]	[1,2,3]	Array(Int32)
+0	0	0	Nullable(Int64)
+0	0	0	Nullable(Int64)
+0	0	0	Nullable(Int64)
+0	2	2	Nullable(Int64)
+0	\N	\N	Nullable(Int64)
+1	0	1	Nullable(Int64)
+\N	0	\N	Nullable(Int64)
+\N	\N	\N	Nullable(Int64)
+0	0	42	0	0
+0	0	42	0	0
+0	0	42	0	0
+0	2	42	0	0
+0	\N	42	0	0
+1	0	42	1	1
+\N	0	42	0	0
+\N	\N	42	\N	\N
diff --git a/tests/queries/0_stateless/03522_function_first_non_default.sql b/tests/queries/0_stateless/03522_function_first_non_default.sql
new file mode 100644
index 000000000000..525801df5e52
--- /dev/null
+++ b/tests/queries/0_stateless/03522_function_first_non_default.sql
@@ -0,0 +1,105 @@
+SELECT firstNonDefault(NULL, 0, 43, 256) AS result;
+SELECT firstNonDefault(NULL :: Nullable(UInt8), 0 :: Nullable(UInt8), 42 :: UInt8) AS result;
+SELECT firstNonDefault('', '0', 'hello') AS result;
+SELECT firstNonDefault(NULL::Nullable(UInt8), 0::UInt8) AS result;
+SELECT firstNonDefault(false, true) AS result;
+
+SELECT firstNonDefault([] :: Array(UInt8), [1, 2, 3] :: Array(UInt8)) AS result;
+SELECT firstNonDefault(NULL::Nullable(String), ''::String, 'foo') as result, toTypeName(result);
+
+SELECT firstNonDefault(0::UInt8, 0::UInt16, 42::UInt32) AS result, toTypeName(result);
+SELECT firstNonDefault(0::Int8, 0::Int16, 42::Int32) AS result, toTypeName(result);
+SELECT firstNonDefault(0::UInt32, 0::UInt64, 42::UInt128) AS result, toTypeName(result);
+SELECT firstNonDefault(0::Int128, 0::Int128, 42::Int128) AS result, toTypeName(result);
+SELECT firstNonDefault(0::UInt8, 0::Int8, 42::Int16) AS result, toTypeName(result);
+SELECT firstNonDefault(0::Int64, 0::Int64, 42::Int64) AS result, toTypeName(result);
+SELECT firstNonDefault(0.0::Float32, 0.0::Float64, 42.5::Float64) AS result, toTypeName(result);
+SELECT firstNonDefault(0::Float64, 0.0::Float64, 42.0::Float64) AS result, toTypeName(result);
+SELECT firstNonDefault(NULL::Nullable(Int32), 0::Nullable(Int32), 42::Nullable(Int32)) AS result, toTypeName(result);
+SELECT firstNonDefault(NULL, 0::Int32, 42::Nullable(Int32)) AS result, toTypeName(result);
+SELECT firstNonDefault(''::String, '0'::String, 'hello'::String) AS result, toTypeName(result);
+SELECT firstNonDefault(''::FixedString(5), '0'::String, 'hello'::String) AS result, toTypeName(result);
+SELECT firstNonDefault([]::Array(Int32), [0]::Array(Int32), [1, 2, 3]::Array(Int32)) AS result, toTypeName(result);
+SELECT firstNonDefault([]::Array(String), ['']::Array(String), ['hello']::Array(String)) AS result, toTypeName(result);
+SELECT firstNonDefault(NULL::Nullable(UInt8), 0::UInt8, 42::UInt8, 100::UInt8) AS result, toTypeName(result);
+SELECT firstNonDefault(NULL::Nullable(String), ''::String, '0'::String, 'hello'::String) AS result, toTypeName(result);
+
+SELECT firstNonDefault(NULL) AS result, toTypeName(result);
+SELECT firstNonDefault(0) AS result, toTypeName(result);
+SELECT firstNonDefault(''::String) AS result, toTypeName(result);
+SELECT firstNonDefault([]::Array(UInt8)) AS result, toTypeName(result);
+
+SELECT firstNonDefault(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+
+SELECT firstNonDefault(0, 'hello'); -- { serverError NO_COMMON_TYPE }
+SELECT firstNonDefault([]::Array(UInt8), 42); -- { serverError NO_COMMON_TYPE }
+SELECT firstNonDefault([]::Array(UInt8), 'hello');  -- { serverError NO_COMMON_TYPE }
+SELECT firstNonDefault(0::UInt64, 1::Int64);  -- { serverError NO_COMMON_TYPE }
+SELECT firstNonDefault(NULL::Nullable(Array(UInt8)), []::Array(UInt8)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
+SELECT firstNonDefault(
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    number
+) FROM numbers(3);
+
+DROP TABLE IF EXISTS test_first_truthy;
+
+CREATE TABLE test_first_truthy
+(
+    a Nullable(Int32),
+    b Nullable(Int32),
+    c Nullable(String),
+    d Array(Int32)
+) ENGINE = Memory;
+
+INSERT INTO test_first_truthy VALUES
+(NULL, 0, NULL, []),
+(0, NULL, '', []),
+(NULL, NULL, NULL, []),
+(0, 0, '', []),
+(1, 0, '', []),
+(0, 2, '', []),
+(0, 0, 'hello', []),
+(0, 0, '', [1, 2, 3]);
+
+SELECT
+    a, b,
+    firstNonDefault(a, b) AS result,
+    toTypeName(firstNonDefault(a, b)) AS type
+FROM test_first_truthy
+ORDER BY ALL;
+
+SELECT
+    c,
+    firstNonDefault(c, 'default'::String) AS result,
+    toTypeName(firstNonDefault(c, 'default'::String)) AS type
+FROM test_first_truthy
+ORDER BY ALL;
+
+SELECT
+    d,
+    firstNonDefault(d, [99, 100]::Array(Int32)) AS result,
+    toTypeName(firstNonDefault(d, [99, 100]::Array(Int32))) AS type
+FROM test_first_truthy
+ORDER BY length(result);
+
+SELECT
+    a, b,
+    firstNonDefault(a + b, a * b, a - b) AS result,
+    toTypeName(firstNonDefault(a + b, a * b, a - b)) AS type
+FROM test_first_truthy
+ORDER BY ALL;
+
+SELECT
+    a, b,
+    firstNonDefault(42, a, b) AS result1,
+    firstNonDefault(0, a, b) AS result2,
+    firstNonDefault(NULL, a, b) AS result3
+FROM test_first_truthy
+ORDER BY ALL;
+
+DROP TABLE test_first_truthy;

From 5a410dcf7c72619c94e69baa5e08532a1f819aae Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 21 Nov 2025 17:12:19 +0000
Subject: [PATCH 104/112] Backport #90375 to 25.8: Fix prewhere on non-existing
 JSON path with index_granularity_bytes=0

---
 .../SerializationObjectSharedDataPath.cpp        |  2 +-
 .../SerializationVariantElementNullMap.cpp       |  6 +++---
 ...ewhere_zero_index_granularity_bytes.reference |  1 +
 ...umn_prewhere_zero_index_granularity_bytes.sql | 16 ++++++++++++++++
 ...ewhere_zero_index_granularity_bytes.reference |  1 +
 ...umn_prewhere_zero_index_granularity_bytes.sql |  6 ++++++
 6 files changed, 28 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/03725_json_dynamic_subcolumn_prewhere_zero_index_granularity_bytes.reference
 create mode 100644 tests/queries/0_stateless/03725_json_dynamic_subcolumn_prewhere_zero_index_granularity_bytes.sql
 create mode 100644 tests/queries/0_stateless/03725_variant_element_null_map_subcolumn_prewhere_zero_index_granularity_bytes.reference
 create mode 100644 tests/queries/0_stateless/03725_variant_element_null_map_subcolumn_prewhere_zero_index_granularity_bytes.sql

diff --git a/src/DataTypes/Serializations/SerializationObjectSharedDataPath.cpp b/src/DataTypes/Serializations/SerializationObjectSharedDataPath.cpp
index 4f1d8b9183b5..0d44bbb614c1 100644
--- a/src/DataTypes/Serializations/SerializationObjectSharedDataPath.cpp
+++ b/src/DataTypes/Serializations/SerializationObjectSharedDataPath.cpp
@@ -250,7 +250,7 @@ void SerializationObjectSharedDataPath::deserializeBinaryBulkWithMultipleStreams
         /// Check if we don't have any paths in shared data in current range.
         const auto & offsets = assert_cast<const ColumnArray &>(*map_column).getOffsets();
         if (offsets.back() == offsets[ssize_t(map_column_offset) - 1])
-            dynamic_column->insertManyDefaults(limit);
+            dynamic_column->insertManyDefaults(num_read_rows);
         else
             ColumnObject::fillPathColumnFromSharedData(*dynamic_column, path, map_column, map_column_offset, map_column->size());
 
diff --git a/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp b/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp
index 54043db3f29c..dce31cb7f851 100644
--- a/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp
+++ b/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp
@@ -163,12 +163,12 @@ void SerializationVariantElementNullMap::deserializeBinaryBulkWithMultipleStream
     /// Check if there are no such variant in read range.
     if (variant_limit && *variant_limit == 0)
     {
-        data.resize_fill(data.size() + limit, 1);
+        data.resize_fill(data.size() + num_read_discriminators, 1);
     }
     /// Check if there is only our variant in read range.
-    else if (variant_limit && *variant_limit == limit)
+    else if (variant_limit && *variant_limit == num_read_discriminators)
     {
-        data.resize_fill(data.size() + limit, 0);
+        data.resize_fill(data.size() + num_read_discriminators, 0);
     }
     /// Iterate through new discriminators to calculate the null map of our variant.
     else
diff --git a/tests/queries/0_stateless/03725_json_dynamic_subcolumn_prewhere_zero_index_granularity_bytes.reference b/tests/queries/0_stateless/03725_json_dynamic_subcolumn_prewhere_zero_index_granularity_bytes.reference
new file mode 100644
index 000000000000..d00491fd7e5b
--- /dev/null
+++ b/tests/queries/0_stateless/03725_json_dynamic_subcolumn_prewhere_zero_index_granularity_bytes.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/03725_json_dynamic_subcolumn_prewhere_zero_index_granularity_bytes.sql b/tests/queries/0_stateless/03725_json_dynamic_subcolumn_prewhere_zero_index_granularity_bytes.sql
new file mode 100644
index 000000000000..05a1c0a0df96
--- /dev/null
+++ b/tests/queries/0_stateless/03725_json_dynamic_subcolumn_prewhere_zero_index_granularity_bytes.sql
@@ -0,0 +1,16 @@
+DROP TABLE IF EXISTS test;
+
+CREATE TABLE test (
+    s String,
+    json JSON
+)
+ENGINE = MergeTree
+ORDER BY (s)
+SETTINGS index_granularity = 2, index_granularity_bytes = 0, min_rows_for_wide_part=0, min_bytes_for_wide_part=0;
+
+INSERT INTO test SELECT 'a', '{}' FROM numbers(1);
+
+SELECT count() FROM test WHERE s = 'a' AND json.a IS NULL;
+
+DROP TABLE test;
+
diff --git a/tests/queries/0_stateless/03725_variant_element_null_map_subcolumn_prewhere_zero_index_granularity_bytes.reference b/tests/queries/0_stateless/03725_variant_element_null_map_subcolumn_prewhere_zero_index_granularity_bytes.reference
new file mode 100644
index 000000000000..d81cc0710eb6
--- /dev/null
+++ b/tests/queries/0_stateless/03725_variant_element_null_map_subcolumn_prewhere_zero_index_granularity_bytes.reference
@@ -0,0 +1 @@
+42
diff --git a/tests/queries/0_stateless/03725_variant_element_null_map_subcolumn_prewhere_zero_index_granularity_bytes.sql b/tests/queries/0_stateless/03725_variant_element_null_map_subcolumn_prewhere_zero_index_granularity_bytes.sql
new file mode 100644
index 000000000000..9f240e70859a
--- /dev/null
+++ b/tests/queries/0_stateless/03725_variant_element_null_map_subcolumn_prewhere_zero_index_granularity_bytes.sql
@@ -0,0 +1,6 @@
+drop table if exists test;
+create table test (s Int128, v Variant(UUID, Int128)) engine=MergeTree order by s settings index_granularity = 2, index_granularity_bytes = 0, min_rows_for_wide_part=0, min_bytes_for_wide_part=0;
+insert into test select 42::Int128, 42::Int128;
+select v from test prewhere 1;
+drop table test;
+

From 52a597c2b3512538092536d0f710a0033ee3d9f7 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 24 Nov 2025 10:13:46 +0000
Subject: [PATCH 105/112] Backport #90474 to 25.8: Fix(Hive): Fix thread-safety
 race condition in static extractor

---
 src/Storages/HivePartitioningUtils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/HivePartitioningUtils.cpp b/src/Storages/HivePartitioningUtils.cpp
index 2dbb792ef84d..27a59aae044c 100644
--- a/src/Storages/HivePartitioningUtils.cpp
+++ b/src/Storages/HivePartitioningUtils.cpp
@@ -34,7 +34,7 @@ static auto makeExtractor()
 
 HivePartitioningKeysAndValues parseHivePartitioningKeysAndValues(const String & path)
 {
-    static auto extractor = makeExtractor();
+    thread_local auto extractor = makeExtractor();
 
     HivePartitioningKeysAndValues key_values;
 

From b07d66d00bc6850d00a9ed9cf00a9fbaeed51ef2 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 24 Nov 2025 10:16:17 +0000
Subject: [PATCH 106/112] Backport #90216 to 25.8: retry network errors when s3
 library parse xml response

---
 src/IO/ReadBufferFromS3.cpp                   |   2 +-
 src/IO/S3/Client.cpp                          | 152 ++++++++++--------
 src/IO/S3/Client.h                            |   4 +-
 src/IO/S3/PocoHTTPClient.cpp                  |  52 +++---
 src/IO/S3/PocoHTTPClient.h                    |   2 +-
 src/IO/S3/Requests.cpp                        |  58 +++++++
 src/IO/S3/Requests.h                          |   6 +
 .../integration/helpers/s3_mocks/broken_s3.py |  51 +++++-
 .../test_checking_s3_blobs_paranoid/test.py   |  81 +++++++++-
 9 files changed, 294 insertions(+), 114 deletions(-)

diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp
index 3bfc8628f294..06e9ed3885ca 100644
--- a/src/IO/ReadBufferFromS3.cpp
+++ b/src/IO/ReadBufferFromS3.cpp
@@ -429,7 +429,7 @@ Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t attempt, si
     if (!version_id.empty())
         req.SetVersionId(version_id);
 
-    req.SetAdditionalCustomHeaderValue("clickhouse-request", fmt::format("attempt={}", attempt));
+    S3::setClickhouseAttemptNumber(req, attempt);
 
     if (range_end_incl)
     {
diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp
index 2584d96bb1b6..c46d1456c417 100644
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@@ -19,6 +19,7 @@
 #include <aws/core/utils/logging/ErrorMacros.h>
 
 #include <Poco/Net/NetException.h>
+#include <Poco/Exception.h>
 
 #include <IO/Expect404ResponseScope.h>
 #include <IO/S3/Requests.h>
@@ -493,37 +494,37 @@ Model::HeadObjectOutcome Client::HeadObject(HeadObjectRequest & request) const
 Model::ListObjectsV2Outcome Client::ListObjectsV2(ListObjectsV2Request & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ true>(
-        request, [this](const Model::ListObjectsV2Request & req) { return ListObjectsV2(req); });
+        request, [this](Model::ListObjectsV2Request & req) { return ListObjectsV2(req); });
 }
 
 Model::ListObjectsOutcome Client::ListObjects(ListObjectsRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ true>(
-        request, [this](const Model::ListObjectsRequest & req) { return ListObjects(req); });
+        request, [this](Model::ListObjectsRequest & req) { return ListObjects(req); });
 }
 
 Model::GetObjectOutcome Client::GetObject(GetObjectRequest & request) const
 {
     return processRequestResult(
-        doRequest(request, [this](const Model::GetObjectRequest & req) { return GetObject(req); }));
+        doRequest(request, [this](Model::GetObjectRequest & req) { return GetObject(req); }));
 }
 
 Model::AbortMultipartUploadOutcome Client::AbortMultipartUpload(AbortMultipartUploadRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
-        request, [this](const Model::AbortMultipartUploadRequest & req) { return AbortMultipartUpload(req); });
+        request, [this](Model::AbortMultipartUploadRequest & req) { return AbortMultipartUpload(req); });
 }
 
 Model::CreateMultipartUploadOutcome Client::CreateMultipartUpload(CreateMultipartUploadRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
-        request, [this](const Model::CreateMultipartUploadRequest & req) { return CreateMultipartUpload(req); });
+        request, [this](Model::CreateMultipartUploadRequest & req) { return CreateMultipartUpload(req); });
 }
 
 Model::CompleteMultipartUploadOutcome Client::CompleteMultipartUpload(CompleteMultipartUploadRequest & request) const
 {
     auto outcome = doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
-        request, [this](const Model::CompleteMultipartUploadRequest & req) { return CompleteMultipartUpload(req); });
+        request, [this](Model::CompleteMultipartUploadRequest & req) { return CompleteMultipartUpload(req); });
 
     const auto & key = request.GetKey();
     const auto & bucket = request.GetBucket();
@@ -570,42 +571,42 @@ Model::CompleteMultipartUploadOutcome Client::CompleteMultipartUpload(CompleteMu
 Model::CopyObjectOutcome Client::CopyObject(CopyObjectRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
-        request, [this](const Model::CopyObjectRequest & req) { return CopyObject(req); });
+        request, [this](Model::CopyObjectRequest & req) { return CopyObject(req); });
 }
 
 Model::PutObjectOutcome Client::PutObject(PutObjectRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
-        request, [this](const Model::PutObjectRequest & req) { return PutObject(req); });
+        request, [this](Model::PutObjectRequest & req) { return PutObject(req); });
 }
 
 Model::UploadPartOutcome Client::UploadPart(UploadPartRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
-        request, [this](const Model::UploadPartRequest & req) { return UploadPart(req); });
+        request, [this](Model::UploadPartRequest & req) { return UploadPart(req); });
 }
 
 Model::UploadPartCopyOutcome Client::UploadPartCopy(UploadPartCopyRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
-        request, [this](const Model::UploadPartCopyRequest & req) { return UploadPartCopy(req); });
+        request, [this](Model::UploadPartCopyRequest & req) { return UploadPartCopy(req); });
 }
 
 Model::DeleteObjectOutcome Client::DeleteObject(DeleteObjectRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
-        request, [this](const Model::DeleteObjectRequest & req) { Expect404ResponseScope scope; return DeleteObject(req); });
+        request, [this](Model::DeleteObjectRequest & req) { Expect404ResponseScope scope; return DeleteObject(req); });
 }
 
 Model::DeleteObjectsOutcome Client::DeleteObjects(DeleteObjectsRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
-        request, [this](const Model::DeleteObjectsRequest & req) { Expect404ResponseScope scope; return DeleteObjects(req); });
+        request, [this](Model::DeleteObjectsRequest & req) { Expect404ResponseScope scope; return DeleteObjects(req); });
 }
 
 Client::ComposeObjectOutcome Client::ComposeObject(ComposeObjectRequest & request) const
 {
-    auto request_fn = [this](const ComposeObjectRequest & req)
+    auto request_fn = [this](ComposeObjectRequest & req)
     {
         auto & endpoint_provider = const_cast<Client &>(*this).accessEndpointProvider();
         AWS_OPERATION_CHECK_PTR(endpoint_provider, ComposeObject, Aws::Client::CoreErrors, Aws::Client::CoreErrors::ENDPOINT_RESOLUTION_FAILURE);
@@ -634,7 +635,7 @@ Client::ComposeObjectOutcome Client::ComposeObject(ComposeObjectRequest & reques
 }
 
 template <typename RequestType, typename RequestFn>
-std::invoke_result_t<RequestFn, RequestType>
+std::invoke_result_t<RequestFn, RequestType &>
 Client::doRequest(RequestType & request, RequestFn request_fn) const
 {
     addAdditionalAMZHeadersToCanonicalHeadersList(request, client_configuration.extra_headers);
@@ -729,81 +730,100 @@ Client::doRequest(RequestType & request, RequestFn request_fn) const
 }
 
 template <bool IsReadMethod, typename RequestType, typename RequestFn>
-std::invoke_result_t<RequestFn, RequestType>
+std::invoke_result_t<RequestFn, RequestType &>
 Client::doRequestWithRetryNetworkErrors(RequestType & request, RequestFn request_fn) const
 {
+    /// S3 does retries network errors actually.
+    /// But it does matter when errors occur.
+    /// This code retries a specific case when
+    /// network error happens when XML document is being read from the response body.
+    /// Hence, the response body is a stream, network errors are possible at reading.
+    /// S3 doesn't retry them.
+
+    /// Not all requests can be retried in that way.
+    /// Requests that read out response body to build the result are possible to retry.
+    /// Requests that expose the response stream as an answer are not retried with that code. E.g. GetObject.
+
     addAdditionalAMZHeadersToCanonicalHeadersList(request, client_configuration.extra_headers);
-    auto with_retries = [this, request_fn_ = std::move(request_fn)] (const RequestType & request_)
+    auto with_retries = [this, request_fn_ = std::move(request_fn)] (RequestType & request_)
     {
         chassert(client_configuration.retryStrategy);
         const Int64 max_attempts = client_configuration.retry_strategy.max_retries + 1;
-        chassert(max_attempts > 0);
-        std::exception_ptr last_exception = nullptr;
-        for (Int64 attempt_no = 0; attempt_no < max_attempts; ++attempt_no)
+
+        Int64 attempt_no = 1;
+        std::invoke_result_t<RequestFn, RequestType &> outcome;
+
+        auto net_exception_handler = [&]() -> bool /// return true if we should retry
+        {
+            incrementProfileEvents<IsReadMethod>(ProfileEvents::S3ReadRequestsErrors, ProfileEvents::S3WriteRequestsErrors);
+            if (isClientForDisk())
+                incrementProfileEvents<IsReadMethod>(ProfileEvents::DiskS3ReadRequestsErrors, ProfileEvents::DiskS3WriteRequestsErrors);
+
+            tryLogCurrentException(log, fmt::format("Network error on S3 request, attempt {} of {}", attempt_no, max_attempts));
+
+            outcome = Aws::Client::AWSError<Aws::Client::CoreErrors>(
+                Aws::Client::CoreErrors::NETWORK_CONNECTION,
+                /*name*/ "",
+                /*message*/ fmt::format("All {} retry attempts failed. Last exception: {}", max_attempts, getCurrentExceptionMessage(false)),
+                /*retryable*/ true);
+
+            // network exceptions are always retryable, we could just return true here
+            // but we have to check cancellation points for query, ShouldRetry method does it already
+            return client_configuration.retryStrategy->ShouldRetry(outcome.GetError(), /*attemptedRetries*/ -1);
+        };
+
+        for (attempt_no = 1; attempt_no <= max_attempts; ++attempt_no)
         {
             incrementProfileEvents<IsReadMethod>(ProfileEvents::S3ReadRequestAttempts, ProfileEvents::S3WriteRequestAttempts);
             if (isClientForDisk())
                 incrementProfileEvents<IsReadMethod>(ProfileEvents::DiskS3ReadRequestAttempts, ProfileEvents::DiskS3WriteRequestAttempts);
 
+            if (attempt_no > 1)
+            {
+                incrementProfileEvents<IsReadMethod>(ProfileEvents::S3ReadRequestRetryableErrors, ProfileEvents::S3WriteRequestRetryableErrors);
+                if (isClientForDisk())
+                    incrementProfileEvents<IsReadMethod>(ProfileEvents::DiskS3ReadRequestRetryableErrors, ProfileEvents::DiskS3WriteRequestRetryableErrors);
+
+                // use previously attempt number to calculate delay
+                updateNextTimeToRetryAfterRetryableError(outcome.GetError(), attempt_no - 1);
+
+                // update ClickHouse-specific attempt number in the request
+                // to help choose the right timeouts on the HTTP client which depends on retry attempt number
+                auto clickhouse_request_attempt = getClickhouseAttemptNumber(request_);
+                setClickhouseAttemptNumber(request_, clickhouse_request_attempt + attempt_no);
+            }
+
             /// Slowing down due to a previously encountered retryable error, possibly from another thread.
             slowDownAfterRetryableError();
 
             try
             {
-                /// S3 does retries network errors actually.
-                /// But it does matter when errors occur.
-                /// This code retries a specific case when
-                /// network error happens when XML document is being read from the response body.
-                /// Hence, the response body is a stream, network errors are possible at reading.
-                /// S3 doesn't retry them.
-
-                /// Not all requests can be retried in that way.
-                /// Requests that read out response body to build the result are possible to retry.
-                /// Requests that expose the response stream as an answer are not retried with that code. E.g. GetObject.
-                auto outcome = request_fn_(request_);
-
-                if (!outcome.IsSuccess()
-                    /// AWS SDK's built-in per-thread retry logic is disabled.
-                    && client_configuration.s3_slow_all_threads_after_retryable_error
-                    && attempt_no + 1 < max_attempts
-                    /// Retry attempts are managed by the outer loop, so the attemptedRetries argument can be ignored.
-                    && client_configuration.retryStrategy->ShouldRetry(outcome.GetError(), /*attemptedRetries*/ -1))
-                {
-                    incrementProfileEvents<IsReadMethod>(
-                        ProfileEvents::S3ReadRequestRetryableErrors, ProfileEvents::S3WriteRequestRetryableErrors);
-                    if (isClientForDisk())
-                        incrementProfileEvents<IsReadMethod>(
-                            ProfileEvents::DiskS3ReadRequestRetryableErrors, ProfileEvents::DiskS3WriteRequestRetryableErrors);
-
-                    updateNextTimeToRetryAfterRetryableError(outcome.GetError(), attempt_no);
-                    continue;
-                }
-                return outcome;
-            }
-            catch (Poco::Net::NetException &)
-            {
-                /// This includes "connection reset", "malformed message", and possibly other exceptions.
+                outcome = request_fn_(request_);
 
-                incrementProfileEvents<IsReadMethod>(ProfileEvents::S3ReadRequestsErrors, ProfileEvents::S3WriteRequestsErrors);
-                if (isClientForDisk())
-                    incrementProfileEvents<IsReadMethod>(ProfileEvents::DiskS3ReadRequestsErrors, ProfileEvents::DiskS3WriteRequestsErrors);
-
-                tryLogCurrentException(log, "Will retry");
-                last_exception = std::current_exception();
+                if (outcome.IsSuccess())
+                    break;
 
-                auto error = Aws::Client::AWSError<Aws::Client::CoreErrors>(Aws::Client::CoreErrors::NETWORK_CONNECTION, /*retry*/ true);
+                // do not increment S3ReadRequestsErrors/S3WriteRequestsErrors here, it has been accounted in IO/S3/PocoHTTPClient.cpp
 
-                /// Check if query is canceled.
                 /// Retry attempts are managed by the outer loop, so the attemptedRetries argument can be ignored.
-                if (!client_configuration.retryStrategy->ShouldRetry(error, /*attemptedRetries*/ -1))
+                if (!client_configuration.retryStrategy->ShouldRetry(outcome.GetError(), /*attemptedRetries*/ -1))
+                    break;
+            }
+            catch (Poco::Net::NetException &)
+            {
+                /// This includes "connection reset", "malformed message", and possibly other exceptions.
+                if (!net_exception_handler())
+                    break;
+            }
+            catch (Poco::TimeoutException &)
+            {
+                /// This includes "Timeout"
+                if (!net_exception_handler())
                     break;
-
-                updateNextTimeToRetryAfterRetryableError(error, attempt_no);
             }
         }
 
-        chassert(last_exception);
-        std::rethrow_exception(last_exception);
+        return outcome;
     };
 
     return doRequest(request, with_retries);
@@ -844,7 +864,7 @@ void Client::updateNextTimeToRetryAfterRetryableError(Aws::Client::AWSError<Aws:
     {
         if (next_time_to_retry_after_retryable_error.compare_exchange_weak(stored_next_time, next_time_ms))
         {
-            LOG_TRACE(log, "Updated next retry time to {} ms forward after retryable error with code {} ('{}')", sleep_ms, error.GetResponseCode(), error.GetMessage());
+            LOG_TRACE(log, "Updated next retry time to {} ms forward after retryable error with code {}", sleep_ms, error.GetResponseCode());
             break;
         }
     }
diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h
index 14bf90d854c2..16457c7a511f 100644
--- a/src/IO/S3/Client.h
+++ b/src/IO/S3/Client.h
@@ -278,11 +278,11 @@ class Client : private Aws::S3::S3Client
     ComposeObjectOutcome ComposeObject(ComposeObjectRequest & request) const;
 
     template <typename RequestType, typename RequestFn>
-    std::invoke_result_t<RequestFn, RequestType>
+    std::invoke_result_t<RequestFn, RequestType &>
     doRequest(RequestType & request, RequestFn request_fn) const;
 
     template <bool IsReadMethod, typename RequestType, typename RequestFn>
-    std::invoke_result_t<RequestFn, RequestType>
+    std::invoke_result_t<RequestFn, RequestType &>
     doRequestWithRetryNetworkErrors(RequestType & request, RequestFn request_fn) const;
 
     void updateURIForBucket(const std::string & bucket, S3::URI new_uri) const;
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index b70cc4753945..7b36c5195a34 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -6,8 +6,8 @@
 #if USE_AWS_S3
 
 #include <IO/S3/PocoHTTPClient.h>
+#include <IO/S3/Requests.h>
 
-#include <utility>
 #include <algorithm>
 #include <functional>
 
@@ -384,30 +384,6 @@ void PocoHTTPClient::observeLatency(const Aws::Http::HttpRequest & request, S3La
     }
 }
 
-String extractAttemptFromInfo(const Aws::String & request_info)
-{
-    static auto key = Aws::String("attempt=");
-
-    auto key_begin = request_info.find(key, 0);
-    if (key_begin == Aws::String::npos)
-        return "1";
-
-    auto val_begin = key_begin + key.size();
-    auto val_end = request_info.find(';', val_begin);
-    if (val_end == Aws::String::npos)
-        val_end = request_info.size();
-
-    return request_info.substr(val_begin, val_end-val_begin);
-}
-
-String getOrEmpty(const Aws::Http::HeaderValueCollection & map, const String & key)
-{
-    auto it = map.find(key);
-    if (it == map.end())
-        return {};
-    return it->second;
-}
-
 ConnectionTimeouts PocoHTTPClient::getTimeouts(const String & method, bool first_attempt, bool first_byte) const
 {
     if (!s3_use_adaptive_timeouts)
@@ -444,12 +420,12 @@ String getMethod(const Aws::Http::HttpRequest & request)
     }
 }
 
-PocoHTTPClient::S3LatencyType PocoHTTPClient::getFirstByteLatencyType(const String & sdk_attempt, const String & ch_attempt)
+PocoHTTPClient::S3LatencyType PocoHTTPClient::getFirstByteLatencyType(size_t sdk_attempt, size_t ch_attempt)
 {
     S3LatencyType result = S3LatencyType::FirstByteAttempt1;
-    if (sdk_attempt != "1" || ch_attempt != "1")
+    if (sdk_attempt != 1 || ch_attempt != 1)
     {
-        if ((sdk_attempt == "1" && ch_attempt == "2") || (sdk_attempt == "2" && ch_attempt == "1"))
+        if ((sdk_attempt == 1 && ch_attempt == 2) || (sdk_attempt == 2 && ch_attempt == 1))
             result = S3LatencyType::FirstByteAttempt2;
         else
             result = S3LatencyType::FirstByteAttemptN;
@@ -468,12 +444,20 @@ void PocoHTTPClient::makeRequestInternalImpl(
     auto uri = request.GetUri().GetURIString();
     auto method = getMethod(request);
 
-    auto sdk_attempt = extractAttemptFromInfo(getOrEmpty(request.GetHeaders(), Aws::Http::SDK_REQUEST_HEADER));
-    auto ch_attempt = extractAttemptFromInfo(getOrEmpty(request.GetHeaders(), "clickhouse-request"));
-    bool first_attempt = ch_attempt == "1" && sdk_attempt == "1";
-
-    if (enable_s3_requests_logging)
-        LOG_TEST(log, "Make request to: {}, aws sdk attempt: {}, clickhouse attempt: {}", uri, sdk_attempt, ch_attempt);
+    auto sdk_attempt = getSDKAttemptNumber(request);
+    auto ch_attempt = getClickhouseAttemptNumber(request);
+    bool first_attempt = ch_attempt == 1 && sdk_attempt == 1;
+
+    if (!first_attempt)
+        LOG_DEBUG(
+            log,
+            "Retrying S3 request to: {}, aws sdk attempt: {}, clickhouse attempt: {}, kind: {}",
+            uri, sdk_attempt, ch_attempt, getMetricKind(request) == S3MetricKind::Read ? "Read" : "Write");
+    else // if (enable_s3_requests_logging)
+        LOG_TEST(
+            log,
+            "Make S3 request to: {}, aws sdk attempt: {}, clickhouse attempt: {}, kind: {}",
+            uri, sdk_attempt, ch_attempt, getMetricKind(request) == S3MetricKind::Read ? "Read" : "Write");
 
     switch (request.GetMethod())
     {
diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h
index c43998065e4f..db8f8ea415aa 100644
--- a/src/IO/S3/PocoHTTPClient.h
+++ b/src/IO/S3/PocoHTTPClient.h
@@ -196,7 +196,7 @@ class PocoHTTPClient : public Aws::Http::HttpClient
         Aws::Utils::RateLimits::RateLimiterInterface * readLimiter,
         Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const;
 
-    static S3LatencyType getFirstByteLatencyType(const String & sdk_attempt, const String & ch_attempt);
+    static S3LatencyType getFirstByteLatencyType(size_t sdk_attempt, size_t ch_attempt);
 
 protected:
     virtual void makeRequestInternal(
diff --git a/src/IO/S3/Requests.cpp b/src/IO/S3/Requests.cpp
index 50ed2e21bfc8..79030f501e40 100644
--- a/src/IO/S3/Requests.cpp
+++ b/src/IO/S3/Requests.cpp
@@ -6,6 +6,9 @@
 #include <aws/core/endpoint/EndpointParameter.h>
 #include <aws/core/utils/xml/XmlSerializer.h>
 
+#include <string_view>
+#include <fmt/format.h>
+
 namespace DB::S3
 {
 
@@ -166,6 +169,61 @@ void ComposeObjectRequest::SetContentType(Aws::String value)
     content_type = std::move(value);
 }
 
+
+size_t getAttemptFromInfo(const Aws::String & request_info)
+{
+    static auto key = Aws::String("attempt=");
+
+    auto key_begin = request_info.find(key, 0);
+    if (key_begin == Aws::String::npos)
+        return 1;
+
+    auto val_begin = key_begin + key.size();
+    auto val_end = request_info.find(';', val_begin);
+    if (val_end == Aws::String::npos)
+        val_end = request_info.size();
+
+    if (val_begin == val_end)
+        return 1;
+
+    auto value = request_info.substr(val_begin, val_end - val_begin);
+    try
+    {
+        return std::stol(value, nullptr, 10);
+    }
+    catch (...)
+    {
+        return 1;
+    }
+}
+
+String getOrEmpty(const Aws::Http::HeaderValueCollection & map, const String & key)
+{
+    auto it = map.find(key);
+    if (it == map.end())
+        return {};
+    return it->second;
+}
+
+void setClickhouseAttemptNumber(Aws::AmazonWebServiceRequest & request, size_t attempt)
+{
+    request.SetAdditionalCustomHeaderValue("clickhouse-request", fmt::format("attempt={}", attempt));
+}
+
+size_t getClickhouseAttemptNumber(const Aws::AmazonWebServiceRequest & request)
+{
+    return getAttemptFromInfo(getOrEmpty(request.GetHeaders(), "clickhouse-request"));
+}
+
+size_t getClickhouseAttemptNumber(const Aws::Http::HttpRequest & request)
+{
+    return getAttemptFromInfo(getOrEmpty(request.GetHeaders(), "clickhouse-request"));
+}
+
+size_t getSDKAttemptNumber(const Aws::Http::HttpRequest & request)
+{
+       return getAttemptFromInfo(getOrEmpty(request.GetHeaders(), Aws::Http::SDK_REQUEST_HEADER));
+}
 }
 
 #endif
diff --git a/src/IO/S3/Requests.h b/src/IO/S3/Requests.h
index 3b03356a8fbb..6685a1694077 100644
--- a/src/IO/S3/Requests.h
+++ b/src/IO/S3/Requests.h
@@ -201,6 +201,12 @@ class ComposeObjectRequest : public ExtendedRequest<Aws::S3::S3Request>
     Aws::String content_type;
 };
 
+size_t getSDKAttemptNumber(const Aws::Http::HttpRequest & request);
+
+size_t getClickhouseAttemptNumber(const Aws::AmazonWebServiceRequest & request);
+size_t getClickhouseAttemptNumber(const Aws::Http::HttpRequest & request);
+void setClickhouseAttemptNumber(Aws::AmazonWebServiceRequest & request, size_t attempt);
+
 }
 
 #endif
diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py
index db7ea074cc60..c9f613aa2bc6 100644
--- a/tests/integration/helpers/s3_mocks/broken_s3.py
+++ b/tests/integration/helpers/s3_mocks/broken_s3.py
@@ -68,6 +68,9 @@ def setup_at_object_upload(self, **kwargs):
     def setup_at_part_upload(self, **kwargs):
         self.setup_action("at_part_upload", **kwargs)
 
+    def setup_at_listing(self, **kwargs):
+        self.setup_action("at_listing", **kwargs)
+
     def setup_at_create_multi_part_upload(self, **kwargs):
         self.setup_action("at_create_multi_part_upload", **kwargs)
 
@@ -314,6 +317,28 @@ def inject_error(self, request_handler):
             )
             request_handler.connection.close()
 
+    class TimeoutAction:
+        def inject_error(self, request_handler):
+            request_handler.log_message("timeout action: read all input and send 200")
+
+            request_handler.read_all_input()
+
+            request_handler.send_response(200)
+            request_handler.send_header("Content-Type", "text/xml")
+            request_handler.end_headers()
+
+            request_handler.log_message("timeout action: write partial data")
+            request_handler.wfile.write(b'<?xml version="1.0" encoding="UTF-8"?> <')
+            request_handler.wfile.flush()
+
+            request_handler.log_message("timeout action: sleep")
+            time.sleep(10)
+            request_handler.log_message("timeout action: close connection")
+            request_handler.connection.setsockopt(
+                socket.SOL_SOCKET, socket.SO_LINGER, struct.pack("ii", 1, 0)
+            )
+            request_handler.connection.close()
+
     class ConnectionRefusedAction(RedirectAction):
         pass
 
@@ -352,6 +377,8 @@ def __init__(
                 self.error_handler = _ServerRuntime.ThrottleToBpsAction(
                     *self.action_args
                 )
+            elif self.action == "timeout":
+                self.error_handler = _ServerRuntime.TimeoutAction()
             else:
                 self.error_handler = _ServerRuntime.Expected500ErrorAction()
 
@@ -372,10 +399,9 @@ def has_effect(self):
             with self.lock:
                 if self.after:
                     self.after -= 1
-                if self.after == 0:
-                    if self.count:
-                        self.count -= 1
-                        return True
+                elif self.count:
+                    self.count -= 1
+                    return True
                 return False
 
         def inject_error(self, request_handler):
@@ -410,6 +436,7 @@ def reset(self):
             self.slow_put = None
             self.fake_multipart_upload = None
             self.at_create_multi_part_upload = None
+            self.at_listing = None
 
 
 _runtime = _ServerRuntime()
@@ -587,6 +614,14 @@ def _mock_settings(self):
             )
             return self._ok()
 
+        if path[1] == "at_listing":
+            params = urllib.parse.parse_qs(parts.query, keep_blank_values=False)
+            _runtime.at_listing = _ServerRuntime.CountAfter.from_cgi_params(
+                _runtime.lock, params
+            )
+            self.log_message("set at_listing %s", _runtime.at_listing)
+            return self._ok()
+
         if path[1] == "reset":
             _runtime.reset()
             self.log_message("reset")
@@ -601,6 +636,14 @@ def do_GET(self):
         if self.path.startswith("/mock_settings"):
             return self._mock_settings()
 
+        parts = urllib.parse.urlsplit(self.path)
+        params = urllib.parse.parse_qs(parts.query, keep_blank_values=False)
+        is_listing = params.get("list-type", [None])[0] is not None
+
+        if is_listing and _runtime.at_listing is not None:
+            if _runtime.at_listing.has_effect():
+                return _runtime.at_listing.inject_error(self)
+
         self.log_message("get redirect")
         return self.redirect()
 
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py
index 257953445965..423ea81ff013 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/test.py
+++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py
@@ -465,9 +465,8 @@ def test_when_s3_connection_reset_by_peer_at_upload_is_retried(
 
     assert "Code: 1000" in error, error
     assert (
-        "DB::Exception: Connection reset by peer." in error
-        or "DB::Exception: Poco::Exception. Code: 1000, e.code() = 104, Connection reset by peer"
-        in error
+        "Connection reset by peer." in error
+        or "Code: 1000, e.code() = 104, Connection reset by peer" in error
     ), error
 
 
@@ -548,12 +547,82 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried(
 
     assert "Code: 1000" in error, error
     assert (
-        "DB::Exception: Connection reset by peer." in error
-        or "DB::Exception: Poco::Exception. Code: 1000, e.code() = 104, Connection reset by peer"
+        "Connection reset by peer." in error
+        or "Code: 1000, e.code() = 104, Connection reset by peer"
         in error
     ), error
 
 
+def test_when_s3_timeout_at_listing(
+    cluster, broken_s3
+):
+    node = cluster.instances["node_with_inf_s3_retries"]
+
+    insert_query_id = randomize_query_id(
+        f"TEST_WHEN_S3_TIMEOUT_AT_LISTING_INSERT"
+    )
+    node.query(
+        f"""
+        INSERT INTO
+            TABLE FUNCTION s3(
+                'http://resolver:8083/root/data/test_when_s3_timeout_at_listing/{{_partition_id}}/file',
+                'minio', '{minio_secret_key}',
+                'CSV', auto, 'none'
+            )
+            PARTITION BY number
+        SELECT
+            *
+        FROM system.numbers
+        LIMIT 2000
+        SETTINGS
+            s3_check_objects_after_upload=0,
+            s3_truncate_on_insert=1
+        """,
+        query_id=insert_query_id,
+    )
+
+    broken_s3.setup_at_listing(
+        count=1,
+        after=1,
+        action="timeout"
+    )
+
+    select_query_id = randomize_query_id(
+        f"TEST_WHEN_S3_TIMEOUT_AT_LISTING_SELECT"
+    )
+    result = node.query(
+        f"""
+        SELECT * FROM
+            s3(
+                'http://resolver:8083/root/data/test_when_s3_timeout_at_listing/*/file',
+                'minio', '{minio_secret_key}',
+                'CSV', auto, 'none'
+            )
+        ORDER BY ALL
+        """,
+        query_id=select_query_id,
+    )
+    result = result.strip().split("\n")
+    assert len(result) == 2000
+    assert result == [str(i) for i in range(2000)]
+
+    node.query("SYSTEM FLUSH LOGS")
+    read_count, errors, retryable = node.query(
+        f"""
+            SELECT
+                ProfileEvents['S3ReadRequestsCount'],
+                ProfileEvents['S3ReadRequestsErrors'],
+                ProfileEvents['S3ReadRequestRetryableErrors'],
+            FROM system.query_log
+            WHERE query_id='{select_query_id}'
+                AND type='QueryFinish'
+            """).strip().split("\t")
+
+    assert int(read_count) > 10 # at least 10 files are read from s3
+    assert int(errors) >= 1
+    assert int(retryable) >= 1
+
+
 def test_query_is_canceled_with_inf_retries(cluster, broken_s3):
     node = cluster.instances["node_with_inf_s3_retries"]
 
@@ -729,7 +798,7 @@ def test_no_key_found_disk(cluster, broken_s3):
     error = node.query_and_get_error("SELECT * FROM no_key_found_disk").strip()
 
     assert (
-        "DB::Exception: The specified key does not exist. This error happened for S3 disk"
+        "The specified key does not exist. This error happened for S3 disk"
         in error
     )
 

From fbb1b4397a8c53e300bef7e52a6bfbb4deeae4a8 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 24 Nov 2025 15:15:06 +0000
Subject: [PATCH 107/112] Backport #89740 to 25.8: Avoid crash due to reading
 from remote server after disconnect in remote queries during cancellation

---
 src/Client/Connection.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 232c19bdc5b3..74d39468c0d4 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -1000,7 +1000,7 @@ void Connection::sendCancel()
 {
     /// If we already disconnected.
     if (!out)
-        return;
+        throw Exception(ErrorCodes::NETWORK_ERROR, "Connection to {} terminated", getDescription());
 
     writeVarUInt(Protocol::Client::Cancel, *out);
     out->finishChunk();

From 7b77eaf08d98de37e3364655f937b46609167af8 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 24 Nov 2025 22:11:33 +0000
Subject: [PATCH 108/112] Backport #90438 to 25.8: Update warning messages when
 approaching guardrails limits: show current and throw values

---
 src/Interpreters/Context.cpp                  |  74 ++++-
 .../test_table_db_num_limit/config/config.xml |  40 ++-
 .../test_table_db_num_limit/test.py           | 303 ++++++++++++++----
 .../02931_max_num_to_warn.reference           |  10 +-
 .../0_stateless/02931_max_num_to_warn.sql     |   8 +-
 5 files changed, 328 insertions(+), 107 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 3f774e2a1b97..e1a1e3b677c1 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -360,6 +360,11 @@ namespace ServerSetting
     extern const ServerSettingsUInt64 iceberg_catalog_threadpool_pool_size;
     extern const ServerSettingsUInt64 iceberg_catalog_threadpool_queue_size;
     extern const ServerSettingsBool dictionaries_lazy_load;
+    extern const ServerSettingsInt32 os_threads_nice_value_zookeeper_client_send_receive;
+    extern const ServerSettingsUInt64 max_table_num_to_throw;
+    extern const ServerSettingsUInt64 max_view_num_to_throw;
+    extern const ServerSettingsUInt64 max_dictionary_num_to_throw;
+    extern const ServerSettingsUInt64 max_database_num_to_throw;
 }
 
 namespace ErrorCodes
@@ -1324,16 +1329,65 @@ std::unordered_map<Context::WarningType, PreformattedMessage> Context::getWarnin
     {
         SharedLockGuard lock(shared->mutex);
         common_warnings = shared->warnings;
-        if (CurrentMetrics::get(CurrentMetrics::AttachedTable) > static_cast<Int64>(shared->max_table_num_to_warn))
-            common_warnings[Context::WarningType::MAX_ATTACHED_TABLES] = PreformattedMessage::create("The number of attached tables is more than {}.", shared->max_table_num_to_warn.load());
-        if (CurrentMetrics::get(CurrentMetrics::AttachedView) > static_cast<Int64>(shared->max_view_num_to_warn))
-            common_warnings[Context::WarningType::MAX_ATTACHED_VIEWS] =  PreformattedMessage::create("The number of attached views is more than {}.", shared->max_view_num_to_warn.load());
-        if (CurrentMetrics::get(CurrentMetrics::AttachedDictionary) > static_cast<Int64>(shared->max_dictionary_num_to_warn))
-            common_warnings[Context::WarningType::MAX_ATTACHED_DICTIONARIES] =  PreformattedMessage::create("The number of attached dictionaries is more than {}.", shared->max_dictionary_num_to_warn.load());
-        if (CurrentMetrics::get(CurrentMetrics::AttachedDatabase) > static_cast<Int64>(shared->max_database_num_to_warn))
-            common_warnings[Context::WarningType::MAX_ATTACHED_DATABASES] = PreformattedMessage::create("The number of attached databases is more than {}.", shared->max_database_num_to_warn.load());
-        if (CurrentMetrics::get(CurrentMetrics::PartsActive) > static_cast<Int64>(shared->max_part_num_to_warn))
-            common_warnings[Context::WarningType::MAX_ACTIVE_PARTS] = PreformattedMessage::create("The number of active parts is more than {}.", shared->max_part_num_to_warn.load());
+
+        auto attached_tables = CurrentMetrics::get(CurrentMetrics::AttachedTable);
+        auto attached_views = CurrentMetrics::get(CurrentMetrics::AttachedView);
+        auto attached_dictionaries = CurrentMetrics::get(CurrentMetrics::AttachedDictionary);
+        auto attached_databases = CurrentMetrics::get(CurrentMetrics::AttachedDatabase);
+        auto active_parts = CurrentMetrics::get(CurrentMetrics::PartsActive);
+
+        if (attached_tables > static_cast<Int64>(shared->max_table_num_to_warn))
+        {
+            if (auto limit = shared->server_settings[ServerSetting::max_table_num_to_throw]; limit > shared->max_table_num_to_warn.load())
+                common_warnings[Context::WarningType::MAX_ATTACHED_TABLES] = PreformattedMessage::create(
+                    "The number of attached tables ({}) exceeds the warning limit of {}. You will not be able to create new tables once the limit of {} is reached.",
+                    attached_tables, shared->max_table_num_to_warn.load(), limit.value);
+            else
+                common_warnings[Context::WarningType::MAX_ATTACHED_TABLES] = PreformattedMessage::create(
+                    "The number of attached tables ({}) exceeds the warning limit of {}.",
+                    attached_tables, shared->max_table_num_to_warn.load());
+        }
+
+        if (attached_views > static_cast<Int64>(shared->max_view_num_to_warn))
+        {
+            if (auto limit = shared->server_settings[ServerSetting::max_view_num_to_throw]; limit > shared->max_view_num_to_warn.load())
+                common_warnings[Context::WarningType::MAX_ATTACHED_VIEWS] =  PreformattedMessage::create(
+                    "The number of attached views ({}) exceeds the warning limit of {}. You will not be able to create new views once the limit of {} is reached.",
+                    attached_views, shared->max_view_num_to_warn.load(), limit.value);
+            else
+                common_warnings[Context::WarningType::MAX_ATTACHED_VIEWS] =  PreformattedMessage::create(
+                    "The number of attached views ({}) exceeds the warning limit of {}.",
+                    attached_views, shared->max_view_num_to_warn.load());
+        }
+
+        if (attached_dictionaries > static_cast<Int64>(shared->max_dictionary_num_to_warn))
+        {
+            if (auto limit = shared->server_settings[ServerSetting::max_dictionary_num_to_throw]; limit > shared->max_dictionary_num_to_warn.load())
+                common_warnings[Context::WarningType::MAX_ATTACHED_DICTIONARIES] =  PreformattedMessage::create(
+                    "The number of attached dictionaries ({}) exceeds the warning limit of {}. You will not be able to create new dictionaries once the limit of {} is reached.",
+                    attached_dictionaries, shared->max_dictionary_num_to_warn.load(), limit.value);
+            else
+                common_warnings[Context::WarningType::MAX_ATTACHED_DICTIONARIES] =  PreformattedMessage::create(
+                    "The number of attached dictionaries ({}) exceeds the warning limit of {}.",
+                    attached_dictionaries, shared->max_dictionary_num_to_warn.load());
+        }
+
+        if (attached_databases > static_cast<Int64>(shared->max_database_num_to_warn))
+        {
+            if (auto limit = shared->server_settings[ServerSetting::max_database_num_to_throw]; limit > shared->max_database_num_to_warn.load())
+                common_warnings[Context::WarningType::MAX_ATTACHED_DATABASES] = PreformattedMessage::create(
+                    "The number of attached databases ({}) exceeds the warning limit of {}. You will not be able to create new databases once the limit of {} is reached.",
+                    attached_databases, shared->max_database_num_to_warn.load(), limit.value);
+            else
+                common_warnings[Context::WarningType::MAX_ATTACHED_DATABASES] = PreformattedMessage::create(
+                    "The number of attached databases ({}) exceeds the warning limit of {}.",
+                    attached_databases, shared->max_database_num_to_warn.load());
+        }
+
+        if (active_parts > static_cast<Int64>(shared->max_part_num_to_warn))
+            common_warnings[Context::WarningType::MAX_ACTIVE_PARTS] = PreformattedMessage::create(
+                "The number of active parts ({}) exceeds the warning limit of {}.",
+                active_parts, shared->max_part_num_to_warn.load());
     }
     /// Make setting's name ordered
     auto obsolete_settings = settings->getChangedAndObsoleteNames();
diff --git a/tests/integration/test_table_db_num_limit/config/config.xml b/tests/integration/test_table_db_num_limit/config/config.xml
index 88438d51b940..bb683a874716 100644
--- a/tests/integration/test_table_db_num_limit/config/config.xml
+++ b/tests/integration/test_table_db_num_limit/config/config.xml
@@ -1,21 +1,31 @@
 <clickhouse>
     <remote_servers>
-		<cluster>
-			<shard>
-				<replica>
-					<host>node1</host>
-					<port>9000</port>
-				</replica>
-				<replica>
-					<host>node2</host>
-					<port>9000</port>
-				</replica>
-			</shard>
-		</cluster>
-	</remote_servers>
+       <cluster>
+          <shard>
+             <replica>
+                <host>node1</host>
+                <port>9000</port>
+             </replica>
+             <replica>
+                <host>node2</host>
+                <port>9000</port>
+             </replica>
+          </shard>
+       </cluster>
+    </remote_servers>
 
-    <max_dictionary_num_to_throw>10</max_dictionary_num_to_throw>
+    <!-- Limits set to verify Warning/Throw logic -->
+    <!-- Warning limit is 5, Throw limit is 10 for all metrics to allow consistent testing -->
+
+    <max_table_num_to_warn>5</max_table_num_to_warn>
     <max_table_num_to_throw>10</max_table_num_to_throw>
+
+    <max_view_num_to_warn>5</max_view_num_to_warn>
+    <max_view_num_to_throw>10</max_view_num_to_throw>
+
+    <max_dictionary_num_to_warn>5</max_dictionary_num_to_warn>
+    <max_dictionary_num_to_throw>10</max_dictionary_num_to_throw>
+
+    <max_database_num_to_warn>5</max_database_num_to_warn>
     <max_database_num_to_throw>10</max_database_num_to_throw>
 </clickhouse>
-
diff --git a/tests/integration/test_table_db_num_limit/test.py b/tests/integration/test_table_db_num_limit/test.py
index 969c8e0a0b24..7d9020c64492 100644
--- a/tests/integration/test_table_db_num_limit/test.py
+++ b/tests/integration/test_table_db_num_limit/test.py
@@ -1,6 +1,7 @@
 import pytest
-
+import re
 from helpers.cluster import ClickHouseCluster
+from helpers.client import QueryRuntimeException
 
 cluster = ClickHouseCluster(__file__)
 
@@ -23,102 +24,264 @@
 def started_cluster():
     try:
         cluster.start()
-
         yield cluster
-
     finally:
         cluster.shutdown()
 
 
-def test_table_db_limit(started_cluster):
-    # By the way, default database already exists.
-    for i in range(9):
-        node.query("create database db{}".format(i))
+def verify_warning_with_values(node, current_val, warn_val, throw_val):
+    """
+    Parses system.warnings to find a warning message matching the expected values.
+    """
+    warnings_result = node.query("SELECT message, message_format_string FROM system.warnings")
+    rows = warnings_result.strip().split('\n')
+
+    found = False
+    found_message = ""
+
+    for row in rows:
+        if not row: continue
+        parts = row.split('\t')
+        if len(parts) < 1: continue
+
+        message = parts[0]
 
-    assert "TOO_MANY_DATABASES" in node.query_and_get_error(
-        "create database db_exp".format(i)
+        # Regex to capture values from message:
+        match = re.search(
+            r"\((\d+)\) exceeds the warning limit of (\d+)(?:\. You will not be able to create new .* limit of (\d+) is reached)?",
+            message)
+
+        if match:
+            msg_curr = int(match.group(1))
+            msg_warn = int(match.group(2))
+            msg_throw = int(match.group(3)) if match.group(3) else None
+
+            # We allow current_val to be >= expected because some internal system objects might have been attached
+            if msg_curr >= current_val and msg_warn == warn_val and msg_throw == throw_val:
+                found = True
+                found_message = message
+                break
+
+    assert found, (
+        f"Warning not found or values mismatch.\n"
+        f"Expected: Current>={current_val}, Warn={warn_val}, Throw={throw_val}\n"
+        f"Last found message: {found_message}"
     )
 
-    for i in range(10):
-        node.query("create table t{} (a Int32) Engine = Log".format(i))
 
-    # This checks that system tables are not accounted in the number of tables.
-    node.query("system flush logs")
+def verify_no_warning(node, message_part):
+    """
+    Verifies that no warning containing message_part exists in system.warnings.
+    """
+    warnings = node.query("SELECT message FROM system.warnings").strip()
+    if not warnings:
+        return
+    assert message_part not in warnings, f"Found unexpected warning containing '{message_part}':\n{warnings}"
 
-    # Regular tables
-    for i in range(10):
-        node.query("drop table t{}".format(i))
 
-    for i in range(10):
-        node.query("create table t{} (a Int32) Engine = Log".format(i))
+def test_table_limit(started_cluster):
+    warn_limit = 5
+    throw_limit = 10
 
-    assert "TOO_MANY_TABLES" in node.query_and_get_error(
-        "create table default.tx (a Int32) Engine = Log"
+    for i in range(15):
+        node.query(f"DROP TABLE IF EXISTS t{i} SYNC")
+
+    for i in range(warn_limit):
+        node.query(f"CREATE TABLE t{i} (a Int32) Engine = Log")
+
+    node.query(f"CREATE TABLE t{warn_limit} (a Int32) Engine = Log")
+
+    verify_warning_with_values(
+        node,
+        current_val=warn_limit + 1,
+        warn_val=warn_limit,
+        throw_val=throw_limit
     )
 
-    # Dictionaries
-    for i in range(10):
-        node.query(
-            "create dictionary d{} (a Int32) primary key a source(null()) layout(flat()) lifetime(1000)".format(
-                i
-            )
-        )
+    for i in range(warn_limit + 1, throw_limit):
+        node.query(f"CREATE TABLE t{i} (a Int32) Engine = Log")
+
+    verify_warning_with_values(
+        node,
+        current_val=throw_limit,
+        warn_val=warn_limit,
+        throw_val=throw_limit
+    )
 
     assert "TOO_MANY_TABLES" in node.query_and_get_error(
-        "create dictionary dx (a Int32) primary key a source(null()) layout(flat()) lifetime(1000)"
+        f"CREATE TABLE t{throw_limit} (a Int32) Engine = Log"
     )
 
-    # Replicated tables
-    for i in range(10):
-        node.query("drop table t{}".format(i))
+    # Cleanup and check warning disappears
+    for i in range(throw_limit):
+        node.query(f"DROP TABLE t{i} SYNC")
 
-    for i in range(3):
-        node.query(
-            "create table t{} on cluster 'cluster' (a Int32) Engine = ReplicatedMergeTree('/clickhouse/tables/t{}', '{{replica}}') order by a".format(
-                i, i
-            )
-        )
+    verify_no_warning(node, "The number of attached tables")
 
-    # Test limit on other replica
-    assert "Too many replicated tables" in node2.query_and_get_error(
-        "create table tx (a Int32) Engine = ReplicatedMergeTree('/clickhouse/tables/tx', '{replica}') order by a"
-    )
 
-    for i in range(3, 5):
-        node.query(
-            "create table t{} (a Int32) Engine = ReplicatedMergeTree('/clickhouse/tables/t{}', '{{replica}}') order by a".format(
-                i, i
-            )
-        )
+def test_view_limit(started_cluster):
+    warn_limit = 5
+    throw_limit = 10
+
+    for i in range(warn_limit + 1):
+        node.query(f"CREATE VIEW v{i} AS SELECT 1")
 
-    assert "Too many replicated tables" in node.query_and_get_error(
-        "create table tx (a Int32) Engine = ReplicatedMergeTree('/clickhouse/tables/tx', '{replica}') order by a"
+    verify_warning_with_values(
+        node,
+        current_val=warn_limit + 1,
+        warn_val=warn_limit,
+        throw_val=throw_limit
     )
 
-    # Checks that replicated tables are also counted as regular tables
-    for i in range(5, 10):
-        node.query("create table t{} (a Int32) Engine = Log".format(i))
+    for i in range(warn_limit + 1, throw_limit):
+        node.query(f"CREATE VIEW v{i} AS SELECT 1")
 
     assert "TOO_MANY_TABLES" in node.query_and_get_error(
-        "create table tx (a Int32) Engine = Log"
+        f"CREATE VIEW v{throw_limit} AS SELECT 1"
     )
 
-    # Cleanup
-    for i in range(10):
-        node.query("drop table t{} sync".format(i))
-    for i in range(3):
-        node2.query("drop table t{} sync".format(i))
-    for i in range(9):
-        node.query("drop database db{}".format(i))
-    for i in range(10):
-        node.query("drop dictionary d{}".format(i))
-
-
-def test_replicated_database(started_cluster):
-    node.query("CREATE DATABASE db_replicated ENGINE = Replicated('/clickhouse/db_replicated', '{replica}');")
-    for i in range(10):
-        node.query(f"CREATE TABLE db_replicated.t{i} (a Int32) ENGINE = Log;")
+    # Cleanup and check warning disappears
+    for i in range(throw_limit):
+        node.query(f"DROP VIEW v{i}")
+
+    verify_no_warning(node, "The number of attached views")
+
+
+def test_database_limit(started_cluster):
+    warn_limit = 5
+    throw_limit = 10
+    created_dbs = []
+
+    try:
+        initial_dbs = int(node.query("SELECT value FROM system.metrics WHERE name = 'AttachedDatabase'"))
+
+        # 1. Reach Warning Limit
+        dbs_to_warn = max(0, warn_limit - initial_dbs + 1)
+        for i in range(dbs_to_warn):
+            name = f"db_test_{i}"
+            node.query(f"CREATE DATABASE {name}")
+            created_dbs.append(name)
+
+        current_total = int(node.query("SELECT value FROM system.metrics WHERE name = 'AttachedDatabase'"))
+
+        verify_warning_with_values(
+            node,
+            current_val=current_total,
+            warn_val=warn_limit,
+            throw_val=throw_limit
+        )
+
+        # 2. Reach Throw Limit (and try to exceed)
+        limit_hit = False
+
+        # Try creating enough databases to definitely hit the limit + safety margin
+        for i in range(len(created_dbs), throw_limit + 10):
+            name = f"db_test_{i}"
+            try:
+                node.query(f"CREATE DATABASE {name}")
+                created_dbs.append(name)
+            except QueryRuntimeException as e:
+                if "TOO_MANY_DATABASES" in str(e):
+                    limit_hit = True
+                    break
+                else:
+                    raise e
+
+        assert limit_hit, f"Failed to trigger TOO_MANY_DATABASES limit. Created {len(created_dbs)} databases on top of {initial_dbs} initial."
+
+        # Cleanup and verify warning disappears
+        for db in created_dbs:
+            node.query(f"DROP DATABASE IF EXISTS {db}")
+
+        verify_no_warning(node, "The number of attached databases")
+        created_dbs = []  # Clear list so finally block doesn't duplicate effort
+
+    finally:
+        for db in created_dbs:
+            node.query(f"DROP DATABASE IF EXISTS {db}")
+
+
+def test_dictionary_limit(started_cluster):
+    warn_limit = 5
+    throw_limit = 10
+
+    for i in range(warn_limit + 1):
+        node.query(f"CREATE DICTIONARY d{i} (a Int32) primary key a source(null()) layout(flat()) lifetime(1000)")
+
+    verify_warning_with_values(
+        node,
+        current_val=warn_limit + 1,
+        warn_val=warn_limit,
+        throw_val=throw_limit
+    )
+
+    for i in range(warn_limit + 1, throw_limit):
+        node.query(f"CREATE DICTIONARY d{i} (a Int32) primary key a source(null()) layout(flat()) lifetime(1000)")
+
     assert "TOO_MANY_TABLES" in node.query_and_get_error(
-        "CREATE TABLE db_replicated.tx (a Int32) ENGINE = Log;"
+        f"CREATE DICTIONARY d{throw_limit} (a Int32) primary key a source(null()) layout(flat()) lifetime(1000)"
     )
-    node.query("DROP DATABASE db_replicated SYNC;")
+
+    # Cleanup and check warning disappears
+    for i in range(throw_limit):
+        node.query(f"DROP DICTIONARY d{i}")
+
+    verify_no_warning(node, "The number of attached dictionaries")
+
+
+# def test_named_collection_limit(started_cluster):
+#     warn_limit = 5
+#     throw_limit = 10
+#
+#     def _get_number_of_collections():
+#         return int(node.query("SELECT value FROM system.metrics WHERE name = 'NamedCollection'"))
+#
+#     try:
+#         for i in range(warn_limit):
+#             node.query(f"CREATE NAMED COLLECTION nc_{i} AS key=1")
+#
+#         node.query(f"CREATE NAMED COLLECTION nc_{warn_limit} AS key=1")
+#
+#         verify_warning_with_values(
+#             node,
+#             current_val=warn_limit + 1,
+#             warn_val=warn_limit,
+#             throw_val=throw_limit
+#         )
+#
+#         for i in range(warn_limit + 1, throw_limit):
+#             node.query(f"CREATE NAMED COLLECTION nc_{i} AS key=1")
+#
+#         assert _get_number_of_collections() == throw_limit
+#
+#         assert "TOO_MANY_NAMED_COLLECTIONS" in node.query_and_get_error(
+#             f"CREATE NAMED COLLECTION nc_{throw_limit} AS key=1"
+#         )
+#
+#         node.query(f"DROP NAMED COLLECTION IF EXISTS nc_1")
+#
+#         verify_warning_with_values(
+#             node,
+#             current_val=throw_limit-1,
+#             warn_val=warn_limit,
+#             throw_val=throw_limit
+#         )
+#
+#         node.query(f"DROP NAMED COLLECTION IF EXISTS nc_1")
+#
+#         verify_warning_with_values(
+#             node,
+#             current_val=throw_limit-1,
+#             warn_val=warn_limit,
+#             throw_val=throw_limit
+#         )
+#
+#         # Cleanup and check warning disappears
+#         for i in range(throw_limit + 1):
+#             node.query(f"DROP NAMED COLLECTION IF EXISTS nc_{i}")
+#
+#         verify_no_warning(node, "The number of named collections")
+#
+#     finally:
+#         for i in range(throw_limit + 1):
+#             node.query(f"DROP NAMED COLLECTION IF EXISTS nc_{i}")
diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.reference b/tests/queries/0_stateless/02931_max_num_to_warn.reference
index ff78ddef793c..2b363fb177b9 100644
--- a/tests/queries/0_stateless/02931_max_num_to_warn.reference
+++ b/tests/queries/0_stateless/02931_max_num_to_warn.reference
@@ -1,5 +1,5 @@
-The number of active parts is more than 10.	The number of active parts is more than {}.
-The number of attached databases is more than 2.	The number of attached databases is more than {}.
-The number of attached dictionaries is more than 5.	The number of attached dictionaries is more than {}.
-The number of attached tables is more than 5.	The number of attached tables is more than {}.
-The number of attached views is more than 5.	The number of attached views is more than {}.
+The number of active parts _ exceeds the warning limit of 10.	The number of active parts ({}) exceeds the warning limit of {}.
+The number of attached databases _ exceeds the warning limit of 2.	The number of attached databases ({}) exceeds the warning limit of {}.
+The number of attached dictionaries _ exceeds the warning limit of 5.	The number of attached dictionaries ({}) exceeds the warning limit of {}.
+The number of attached tables _ exceeds the warning limit of 5.	The number of attached tables ({}) exceeds the warning limit of {}.
+The number of attached views _ exceeds the warning limit of 5.	The number of attached views ({}) exceeds the warning limit of {}.
diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.sql b/tests/queries/0_stateless/02931_max_num_to_warn.sql
index 4f2ab59fffd1..ed2c0a99f3a2 100644
--- a/tests/queries/0_stateless/02931_max_num_to_warn.sql
+++ b/tests/queries/0_stateless/02931_max_num_to_warn.sql
@@ -70,13 +70,7 @@ INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_9 VALUES (1, 'Hello'
 INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_10 VALUES (1, 'Hello');
 INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_11 VALUES (1, 'Hello');
 
-SELECT * FROM system.warnings where message in (
-    'The number of attached tables is more than 5.',
-    'The number of attached views is more than 5.',
-    'The number of attached dictionaries is more than 5.',
-    'The number of attached databases is more than 2.',
-    'The number of active parts is more than 10.'
-) ORDER BY message;
+SELECT replaceRegexpAll(message, '\(\d+\)', '_'), message_format_string FROM system.warnings WHERE message LIKE 'The number of%' ORDER BY message;
 
 DROP DATABASE IF EXISTS test_max_num_to_warn_02931;
 DROP DATABASE IF EXISTS test_max_num_to_warn_1;

From ad6252ab3dc1a3ade862b58544337810cbf457b0 Mon Sep 17 00:00:00 2001
From: Andrey Zvonov <zvonov.andrey@gmail.com>
Date: Fri, 5 Dec 2025 16:54:20 +0100
Subject: [PATCH 109/112] remove wrong part

---
 .github/workflows/release_branches.yml | 114 -------------------------
 1 file changed, 114 deletions(-)

diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 5f37dad3ce00..78d989e82047 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -770,120 +770,6 @@ jobs:
             python3 -m praktika run 'Install packages (arm_release)' --workflow "ReleaseBranchCI" --ci |& tee ./ci/tmp/job.log
           fi
 
-  stateless_tests_amd_asan_distributed_plan_parallel_1_2:
-    runs-on: [self-hosted, amd-medium-cpu]
-    needs: [config_workflow, dockers_build_amd, dockers_build_arm, build_amd_asan]
-    if: ${{ !failure() && !cancelled() && !contains(fromJson(needs.config_workflow.outputs.data).cache_success_base64, 'U3RhdGVsZXNzIHRlc3RzIChhbWRfYXNhbiwgZGlzdHJpYnV0ZWQgcGxhbiwgcGFyYWxsZWwsIDEvMik=') }}
-    name: "Stateless tests (amd_asan, distributed plan, parallel, 1/2)"
-    outputs:
-      data: ${{ steps.run.outputs.DATA }}
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ env.CHECKOUT_REF }}
-
-      - name: Prepare env script
-        run: |
-          rm -rf ./ci/tmp ./ci/tmp ./ci/tmp
-          mkdir -p ./ci/tmp ./ci/tmp ./ci/tmp
-          cat > ./ci/tmp/praktika_setup_env.sh << 'ENV_SETUP_SCRIPT_EOF'
-          export PYTHONPATH=./ci:.:
-          cat > ./ci/tmp/workflow_config_releasebranchci.json << 'EOF'
-          ${{ needs.config_workflow.outputs.data }}
-          EOF
-          cat > ./ci/tmp/workflow_status.json << 'EOF'
-          ${{ toJson(needs) }}
-          EOF
-          ENV_SETUP_SCRIPT_EOF
-
-      - name: Run
-        id: run
-        run: |
-          . ./ci/tmp/praktika_setup_env.sh
-          set -o pipefail
-          if command -v ts &> /dev/null; then
-            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, parallel, 1/2)' --workflow "ReleaseBranchCI" --ci |& ts '[%Y-%m-%d %H:%M:%S]' | tee ./ci/tmp/job.log
-          else
-            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, parallel, 1/2)' --workflow "ReleaseBranchCI" --ci |& tee ./ci/tmp/job.log
-          fi
-
-  stateless_tests_amd_asan_distributed_plan_parallel_2_2:
-    runs-on: [self-hosted, amd-medium-cpu]
-    needs: [config_workflow, dockers_build_amd, dockers_build_arm, build_amd_asan]
-    if: ${{ !failure() && !cancelled() && !contains(fromJson(needs.config_workflow.outputs.data).cache_success_base64, 'U3RhdGVsZXNzIHRlc3RzIChhbWRfYXNhbiwgZGlzdHJpYnV0ZWQgcGxhbiwgcGFyYWxsZWwsIDIvMik=') }}
-    name: "Stateless tests (amd_asan, distributed plan, parallel, 2/2)"
-    outputs:
-      data: ${{ steps.run.outputs.DATA }}
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ env.CHECKOUT_REF }}
-
-      - name: Prepare env script
-        run: |
-          rm -rf ./ci/tmp ./ci/tmp ./ci/tmp
-          mkdir -p ./ci/tmp ./ci/tmp ./ci/tmp
-          cat > ./ci/tmp/praktika_setup_env.sh << 'ENV_SETUP_SCRIPT_EOF'
-          export PYTHONPATH=./ci:.:
-          cat > ./ci/tmp/workflow_config_releasebranchci.json << 'EOF'
-          ${{ needs.config_workflow.outputs.data }}
-          EOF
-          cat > ./ci/tmp/workflow_status.json << 'EOF'
-          ${{ toJson(needs) }}
-          EOF
-          ENV_SETUP_SCRIPT_EOF
-
-      - name: Run
-        id: run
-        run: |
-          . ./ci/tmp/praktika_setup_env.sh
-          set -o pipefail
-          if command -v ts &> /dev/null; then
-            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, parallel, 2/2)' --workflow "ReleaseBranchCI" --ci |& ts '[%Y-%m-%d %H:%M:%S]' | tee ./ci/tmp/job.log
-          else
-            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, parallel, 2/2)' --workflow "ReleaseBranchCI" --ci |& tee ./ci/tmp/job.log
-          fi
-
-  stateless_tests_amd_asan_distributed_plan_sequential:
-    runs-on: [self-hosted, amd-small-mem]
-    needs: [config_workflow, dockers_build_amd, dockers_build_arm, build_amd_asan]
-    if: ${{ !failure() && !cancelled() && !contains(fromJson(needs.config_workflow.outputs.data).cache_success_base64, 'U3RhdGVsZXNzIHRlc3RzIChhbWRfYXNhbiwgZGlzdHJpYnV0ZWQgcGxhbiwgc2VxdWVudGlhbCk=') }}
-    name: "Stateless tests (amd_asan, distributed plan, sequential)"
-    outputs:
-      data: ${{ steps.run.outputs.DATA }}
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ env.CHECKOUT_REF }}
-
-      - name: Prepare env script
-        run: |
-          rm -rf ./ci/tmp ./ci/tmp ./ci/tmp
-          mkdir -p ./ci/tmp ./ci/tmp ./ci/tmp
-          cat > ./ci/tmp/praktika_setup_env.sh << 'ENV_SETUP_SCRIPT_EOF'
-          export PYTHONPATH=./ci:.:
-          cat > ./ci/tmp/workflow_config_releasebranchci.json << 'EOF'
-          ${{ needs.config_workflow.outputs.data }}
-          EOF
-          cat > ./ci/tmp/workflow_status.json << 'EOF'
-          ${{ toJson(needs) }}
-          EOF
-          ENV_SETUP_SCRIPT_EOF
-
-      - name: Run
-        id: run
-        run: |
-          . ./ci/tmp/praktika_setup_env.sh
-          set -o pipefail
-          if command -v ts &> /dev/null; then
-            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, sequential)' --workflow "ReleaseBranchCI" --ci |& ts '[%Y-%m-%d %H:%M:%S]' | tee ./ci/tmp/job.log
-          else
-            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, sequential)' --workflow "ReleaseBranchCI" --ci |& tee ./ci/tmp/job.log
-          fi
-
   integration_tests_amd_asan_1_4:
     runs-on: [self-hosted, altinity-on-demand, altinity-func-tester]
     needs: [config_workflow, dockers_build_amd, dockers_build_arm, build_amd_asan]

From b1609bbc5415aa8256493e7a93001b9e1d8dbd24 Mon Sep 17 00:00:00 2001
From: Andrey Zvonov <zvonov.andrey@gmail.com>
Date: Fri, 5 Dec 2025 17:14:12 +0100
Subject: [PATCH 110/112] update yamls

---
 .github/workflows/release_branches.yml | 137 ++++++++++++++++++++++++-
 1 file changed, 136 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 78d989e82047..12e370e00207 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -770,6 +770,141 @@ jobs:
             python3 -m praktika run 'Install packages (arm_release)' --workflow "ReleaseBranchCI" --ci |& tee ./ci/tmp/job.log
           fi
 
+  stateless_tests_amd_asan_distributed_plan_parallel_1_2:
+    runs-on: [self-hosted, altinity-on-demand, altinity-func-tester]
+    needs: [config_workflow, dockers_build_amd, dockers_build_arm, build_amd_asan]
+    if: ${{ !failure() && !cancelled() && !contains(fromJson(needs.config_workflow.outputs.data).cache_success_base64, 'U3RhdGVsZXNzIHRlc3RzIChhbWRfYXNhbiwgZGlzdHJpYnV0ZWQgcGxhbiwgcGFyYWxsZWwsIDEvMik=') }}
+    name: "Stateless tests (amd_asan, distributed plan, parallel, 1/2)"
+    outputs:
+      data: ${{ steps.run.outputs.DATA }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ env.CHECKOUT_REF }}
+
+      - name: Setup
+        uses: ./.github/actions/runner_setup
+      - name: Docker setup
+        uses: ./.github/actions/docker_setup
+        with:
+          test_name: "Stateless tests (amd_asan, distributed plan, parallel, 1/2)"
+
+      - name: Prepare env script
+        run: |
+          rm -rf ./ci/tmp ./ci/tmp ./ci/tmp
+          mkdir -p ./ci/tmp ./ci/tmp ./ci/tmp
+          cat > ./ci/tmp/praktika_setup_env.sh << 'ENV_SETUP_SCRIPT_EOF'
+          export PYTHONPATH=./ci:.:
+          cat > ./ci/tmp/workflow_config_releasebranchci.json << 'EOF'
+          ${{ needs.config_workflow.outputs.data }}
+          EOF
+          cat > ./ci/tmp/workflow_status.json << 'EOF'
+          ${{ toJson(needs) }}
+          EOF
+          ENV_SETUP_SCRIPT_EOF
+
+      - name: Run
+        id: run
+        run: |
+          . ./ci/tmp/praktika_setup_env.sh
+          set -o pipefail
+          if command -v ts &> /dev/null; then
+            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, parallel, 1/2)' --workflow "ReleaseBranchCI" --ci |& ts '[%Y-%m-%d %H:%M:%S]' | tee ./ci/tmp/job.log
+          else
+            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, parallel, 1/2)' --workflow "ReleaseBranchCI" --ci |& tee ./ci/tmp/job.log
+          fi
+
+  stateless_tests_amd_asan_distributed_plan_parallel_2_2:
+    runs-on: [self-hosted, altinity-on-demand, altinity-func-tester]
+    needs: [config_workflow, dockers_build_amd, dockers_build_arm, build_amd_asan]
+    if: ${{ !failure() && !cancelled() && !contains(fromJson(needs.config_workflow.outputs.data).cache_success_base64, 'U3RhdGVsZXNzIHRlc3RzIChhbWRfYXNhbiwgZGlzdHJpYnV0ZWQgcGxhbiwgcGFyYWxsZWwsIDIvMik=') }}
+    name: "Stateless tests (amd_asan, distributed plan, parallel, 2/2)"
+    outputs:
+      data: ${{ steps.run.outputs.DATA }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ env.CHECKOUT_REF }}
+
+      - name: Setup
+        uses: ./.github/actions/runner_setup
+      - name: Docker setup
+        uses: ./.github/actions/docker_setup
+        with:
+          test_name: "Stateless tests (amd_asan, distributed plan, parallel, 2/2)"
+
+      - name: Prepare env script
+        run: |
+          rm -rf ./ci/tmp ./ci/tmp ./ci/tmp
+          mkdir -p ./ci/tmp ./ci/tmp ./ci/tmp
+          cat > ./ci/tmp/praktika_setup_env.sh << 'ENV_SETUP_SCRIPT_EOF'
+          export PYTHONPATH=./ci:.:
+          cat > ./ci/tmp/workflow_config_releasebranchci.json << 'EOF'
+          ${{ needs.config_workflow.outputs.data }}
+          EOF
+          cat > ./ci/tmp/workflow_status.json << 'EOF'
+          ${{ toJson(needs) }}
+          EOF
+          ENV_SETUP_SCRIPT_EOF
+
+      - name: Run
+        id: run
+        run: |
+          . ./ci/tmp/praktika_setup_env.sh
+          set -o pipefail
+          if command -v ts &> /dev/null; then
+            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, parallel, 2/2)' --workflow "ReleaseBranchCI" --ci |& ts '[%Y-%m-%d %H:%M:%S]' | tee ./ci/tmp/job.log
+          else
+            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, parallel, 2/2)' --workflow "ReleaseBranchCI" --ci |& tee ./ci/tmp/job.log
+          fi
+
+  stateless_tests_amd_asan_distributed_plan_sequential:
+    runs-on: [self-hosted, altinity-on-demand, altinity-func-tester]
+    needs: [config_workflow, dockers_build_amd, dockers_build_arm, build_amd_asan]
+    if: ${{ !failure() && !cancelled() && !contains(fromJson(needs.config_workflow.outputs.data).cache_success_base64, 'U3RhdGVsZXNzIHRlc3RzIChhbWRfYXNhbiwgZGlzdHJpYnV0ZWQgcGxhbiwgc2VxdWVudGlhbCk=') }}
+    name: "Stateless tests (amd_asan, distributed plan, sequential)"
+    outputs:
+      data: ${{ steps.run.outputs.DATA }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ env.CHECKOUT_REF }}
+
+      - name: Setup
+        uses: ./.github/actions/runner_setup
+      - name: Docker setup
+        uses: ./.github/actions/docker_setup
+        with:
+          test_name: "Stateless tests (amd_asan, distributed plan, sequential)"
+
+      - name: Prepare env script
+        run: |
+          rm -rf ./ci/tmp ./ci/tmp ./ci/tmp
+          mkdir -p ./ci/tmp ./ci/tmp ./ci/tmp
+          cat > ./ci/tmp/praktika_setup_env.sh << 'ENV_SETUP_SCRIPT_EOF'
+          export PYTHONPATH=./ci:.:
+          cat > ./ci/tmp/workflow_config_releasebranchci.json << 'EOF'
+          ${{ needs.config_workflow.outputs.data }}
+          EOF
+          cat > ./ci/tmp/workflow_status.json << 'EOF'
+          ${{ toJson(needs) }}
+          EOF
+          ENV_SETUP_SCRIPT_EOF
+
+      - name: Run
+        id: run
+        run: |
+          . ./ci/tmp/praktika_setup_env.sh
+          set -o pipefail
+          if command -v ts &> /dev/null; then
+            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, sequential)' --workflow "ReleaseBranchCI" --ci |& ts '[%Y-%m-%d %H:%M:%S]' | tee ./ci/tmp/job.log
+          else
+            python3 -m praktika run 'Stateless tests (amd_asan, distributed plan, sequential)' --workflow "ReleaseBranchCI" --ci |& tee ./ci/tmp/job.log
+          fi
+
   integration_tests_amd_asan_1_4:
     runs-on: [self-hosted, altinity-on-demand, altinity-func-tester]
     needs: [config_workflow, dockers_build_amd, dockers_build_arm, build_amd_asan]
@@ -1672,7 +1807,7 @@ jobs:
 
   finish_workflow:
     runs-on: [self-hosted, altinity-on-demand, altinity-style-checker-aarch64]
-    needs: [config_workflow, dockers_build_amd, dockers_build_arm, build_amd_debug, build_amd_release, build_amd_asan, build_amd_tsan, build_amd_msan, build_amd_ubsan, build_arm_release, build_amd_darwin, build_arm_darwin, docker_server_image, docker_keeper_image, install_packages_amd_release, install_packages_arm_release, integration_tests_amd_asan_1_4, integration_tests_amd_asan_2_4, integration_tests_amd_asan_3_4, integration_tests_amd_asan_4_4, integration_tests_amd_asan_old_analyzer_1_6, integration_tests_amd_asan_old_analyzer_2_6, integration_tests_amd_asan_old_analyzer_3_6, integration_tests_amd_asan_old_analyzer_4_6, integration_tests_amd_asan_old_analyzer_5_6, integration_tests_amd_asan_old_analyzer_6_6, integration_tests_amd_tsan_1_6, integration_tests_amd_tsan_2_6, integration_tests_amd_tsan_3_6, integration_tests_amd_tsan_4_6, integration_tests_amd_tsan_5_6, integration_tests_amd_tsan_6_6, stress_test_amd_debug, stress_test_amd_tsan, stress_test_amd_ubsan, stress_test_amd_msan]
+    needs: [config_workflow, dockers_build_amd, dockers_build_arm, build_amd_debug, build_amd_release, build_amd_asan, build_amd_tsan, build_amd_msan, build_amd_ubsan, build_arm_release, build_amd_darwin, build_arm_darwin, docker_server_image, docker_keeper_image, install_packages_amd_release, install_packages_arm_release, stateless_tests_amd_asan_distributed_plan_parallel_1_2, stateless_tests_amd_asan_distributed_plan_parallel_2_2, stateless_tests_amd_asan_distributed_plan_sequential, integration_tests_amd_asan_1_4, integration_tests_amd_asan_2_4, integration_tests_amd_asan_3_4, integration_tests_amd_asan_4_4, integration_tests_amd_asan_old_analyzer_1_6, integration_tests_amd_asan_old_analyzer_2_6, integration_tests_amd_asan_old_analyzer_3_6, integration_tests_amd_asan_old_analyzer_4_6, integration_tests_amd_asan_old_analyzer_5_6, integration_tests_amd_asan_old_analyzer_6_6, integration_tests_amd_tsan_1_6, integration_tests_amd_tsan_2_6, integration_tests_amd_tsan_3_6, integration_tests_amd_tsan_4_6, integration_tests_amd_tsan_5_6, integration_tests_amd_tsan_6_6, stress_test_amd_debug, stress_test_amd_tsan, stress_test_amd_ubsan, stress_test_amd_msan]
     if: ${{ !cancelled() }}
     name: "Finish Workflow"
     outputs:

From 360a0d0407d10440023dc100a1e0b95d8744027a Mon Sep 17 00:00:00 2001
From: Andrey Zvonov <zvonov.andrey@gmail.com>
Date: Fri, 5 Dec 2025 23:09:04 +0100
Subject: [PATCH 111/112] fix build

---
 .../ObjectStorage/DataLakes/Iceberg/IcebergIterator.cpp         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.cpp
index e19589aa0299..7264647486e6 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.cpp
@@ -170,7 +170,7 @@ std::optional<ManifestFileEntry> SingleThreadIcebergKeysIterator::next()
             auto pruning_status = current_pruner ? current_pruner->canBePruned(manifest_file_entry) : PruningReturnStatus::NOT_PRUNED;
             insertRowToLogTable(
                 local_context,
-                "",
+                [&]()->String { return ""; },
                 DB::IcebergMetadataLogLevel::ManifestFileEntry,
                 configuration.lock()->getRawPath().path,
                 current_manifest_file_content->getPathToManifestFile(),

From 349e3f612e5ae2d1dda32579cf70328b143eca2d Mon Sep 17 00:00:00 2001
From: Andrey Zvonov <zvonov.andrey@gmail.com>
Date: Sat, 6 Dec 2025 13:46:34 +0100
Subject: [PATCH 112/112] fix bad merge

---
 .../integration/test_storage_iceberg/test.py  | 62 +++++++++++--------
 1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py
index 0e1ba979863d..a5f08571c862 100644
--- a/tests/integration/test_storage_iceberg/test.py
+++ b/tests/integration/test_storage_iceberg/test.py
@@ -3452,24 +3452,12 @@ def execute_spark_query(query: str):
             raise
 
 
-<<<<<<< HEAD
 @pytest.mark.parametrize("storage_type", ["s3", "azure"])
 @pytest.mark.parametrize("run_on_cluster", [False, True])
 def test_read_constant_columns_optimization(started_cluster, storage_type, run_on_cluster):
     instance = started_cluster.instances["node1"]
     spark = started_cluster.spark_session
     TABLE_NAME = "test_read_constant_columns_optimization_" + storage_type + "_" + get_uuid_str()
-=======
-@pytest.mark.parametrize(
-    "storage_type",
-    ["s3", "azure", "local"],
-)
-def test_partition_pruning_with_subquery_set(started_cluster, storage_type):
-    instance = started_cluster.instances["node1"]
-    spark = started_cluster.spark_session
-    TABLE_NAME = "test_partition_pruning_" + storage_type + "_" + get_uuid_str()
-    IN_MEMORY_TABLE = "in_memory_table_" + get_uuid_str()
->>>>>>> v25.8.12.129-lts
 
     def execute_spark_query(query: str):
         return execute_spark_query_general(
@@ -3483,7 +3471,6 @@ def execute_spark_query(query: str):
     execute_spark_query(
         f"""
             CREATE TABLE {TABLE_NAME} (
-<<<<<<< HEAD
                 tag INT,
                 date DATE,
                 date2 DATE,
@@ -3714,20 +3701,10 @@ def execute_spark_query(query: str, table_name):
             USING iceberg
             OPTIONS('format-version'='2')
         """, TABLE_NAME_2
-=======
-                id INT,
-                data STRING
-            )
-            USING iceberg
-            PARTITIONED BY (identity(id))
-            OPTIONS('format-version'='2')
-        """
->>>>>>> v25.8.12.129-lts
     )
 
     execute_spark_query(
         f"""
-<<<<<<< HEAD
         INSERT INTO {TABLE_NAME_2} VALUES
         (1, 'dow'),
         (2, 'sparrow')
@@ -3871,7 +3848,41 @@ def test_system_tables_partition_sorting_keys(started_cluster, storage_type):
     """).strip().lower()
 
     assert res == '"bucket(16, id), day(ts)","id desc, hour(ts) asc"'
-=======
+
+
+@pytest.mark.parametrize(
+    "storage_type",
+    ["s3", "azure", "local"],
+)
+def test_partition_pruning_with_subquery_set(started_cluster, storage_type):
+    instance = started_cluster.instances["node1"]
+    spark = started_cluster.spark_session
+    TABLE_NAME = "test_partition_pruning_" + storage_type + "_" + get_uuid_str()
+    IN_MEMORY_TABLE = "in_memory_table_" + get_uuid_str()
+
+    def execute_spark_query(query: str):
+        return execute_spark_query_general(
+            spark,
+            started_cluster,
+            storage_type,
+            TABLE_NAME,
+            query,
+        )
+
+    execute_spark_query(
+        f"""
+            CREATE TABLE {TABLE_NAME} (
+                id INT,
+                data STRING
+            )
+            USING iceberg
+            PARTITIONED BY (identity(id))
+            OPTIONS('format-version'='2')
+        """
+    )
+
+    execute_spark_query(
+        f"""
         INSERT INTO {TABLE_NAME} VALUES
         (1, 'a'),
         (2, 'b'),
@@ -3881,7 +3892,6 @@ def test_system_tables_partition_sorting_keys(started_cluster, storage_type):
     """
     )
 
-
     creation_expression = get_creation_expression(
         storage_type, TABLE_NAME, started_cluster, table_function=True
     )
@@ -3889,7 +3899,6 @@ def test_system_tables_partition_sorting_keys(started_cluster, storage_type):
     instance.query(f"CREATE TABLE {IN_MEMORY_TABLE} (id INT) ENGINE = Memory")
     instance.query(f"INSERT INTO {IN_MEMORY_TABLE} VALUES (2), (4)")
 
-
     def check_validity_and_get_prunned_files(select_expression):
         settings1 = {
             "use_iceberg_partition_pruning": 0
@@ -3907,4 +3916,3 @@ def check_validity_and_get_prunned_files(select_expression):
         )
         == 3
     )
->>>>>>> v25.8.12.129-lts