diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 02445972ecd..d57aa58333f 100755 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -677,7 +677,7 @@ const std::unordered_map scalar_func_map({ {tipb::ScalarFuncSig::Trim3Args, "tidbTrim"}, {tipb::ScalarFuncSig::LTrim, "tidbLTrim"}, {tipb::ScalarFuncSig::RTrim, "tidbRTrim"}, - //{tipb::ScalarFuncSig::UnHex, "cast"}, + {tipb::ScalarFuncSig::UnHex, "tidbUnHex"}, {tipb::ScalarFuncSig::UpperUTF8, "upperUTF8"}, {tipb::ScalarFuncSig::Upper, "upperBinary"}, //{tipb::ScalarFuncSig::CharLength, "upper"}, diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index 33c4bb32cac..b8677e2af72 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -5956,6 +5956,143 @@ class FunctionFormatDecimal : public IFunction } }; +class FunctionTiDBUnHex : public IFunction +{ +public: + static constexpr auto name = "tidbUnHex"; + FunctionTiDBUnHex() = default; + + static FunctionPtr create(const Context & /*context*/) + { + return std::make_shared(); + } + + std::string getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!arguments[0]->isString()) + throw Exception( + fmt::format("Illegal type {} of first argument of function {}", arguments[0]->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + return makeNullable(std::make_shared()); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + const ColumnPtr & column = block.getByPosition(arguments[0]).column; + + size_t size = block.rows(); + auto col_res = ColumnString::create(); + auto result_null_map = ColumnUInt8::create(size, 0); + + if (executeUnHexString(column, col_res->getChars(), col_res->getOffsets(), result_null_map->getData())) + { + block.getByPosition(result).column = ColumnNullable::create(std::move(col_res), std::move(result_null_map)); + } + else + { + throw Exception(fmt::format("Illegal argument of function {}", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + +private: + static bool executeUnHexString(const ColumnPtr & column, + ColumnString::Chars_t & res_data, + ColumnString::Offsets & res_offsets, + ColumnUInt8::Container & res_null_map) + { + const auto * const col = checkAndGetColumn(column.get()); + if (col == nullptr) + { + return false; + } + const size_t size = col->size(); + const ColumnString::Chars_t & data = col->getChars(); + const ColumnString::Offsets & offsets = col->getOffsets(); + res_data.resize(data.size() / 2 + size); + res_offsets.resize(size); + + ColumnString::Offset pos = 0; + ColumnString::Offset prev_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t begin = prev_offset; + size_t length = offsets[i] - prev_offset - 1; + unhexOne(data, length, i, begin, pos, res_data, res_offsets, res_null_map); + pos = res_offsets[i]; + prev_offset = offsets[i]; + } + res_data.resize(pos); + + return true; + } + + static void unhexOne(const ColumnString::Chars_t & data, + const size_t length, + const size_t idx, + size_t begin, + size_t pos, + ColumnString::Chars_t & res_data, + ColumnString::Offsets & res_offsets, + ColumnUInt8::Container & res_null_map) + { + char low; + char high; + size_t end = begin + length; + res_offsets[idx] = pos + 1; + + if (length % 2 != 0) + { + const char * byte = reinterpret_cast(&data[begin]); + if (!fromHexChar(byte, low)) + { + res_null_map[idx] = 1; + return; + } + res_data[pos] = low; + pos++; + begin++; + } + for (size_t i = begin; i < end; i += 2) + { + const char * byte1 = reinterpret_cast(&data[i]); + const char * byte2 = reinterpret_cast(&data[i + 1]); + if (!fromHexChar(byte1, high) || !fromHexChar(byte2, low)) + { + res_null_map[idx] = 1; + return; + } + res_data[pos] = (high << 4) | low; + pos++; + } + res_offsets[idx] = pos + 1; + } + + static bool fromHexChar(const char * in, char & out) + { + if (*in >= '0' && *in <= '9') + { + out = *in - '0'; + } + else if (*in >= 'a' && *in <= 'f') + { + out = *in - 'a' + 10; + } + else if (*in >= 'A' && *in <= 'F') + { + out = *in - 'A' + 10; + } + else + { + return false; + } + return true; + } +}; + // clang-format off struct NameEmpty { static constexpr auto name = "empty"; }; struct NameNotEmpty { static constexpr auto name = "notEmpty"; }; @@ -6047,5 +6184,6 @@ void registerFunctionsString(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); } } // namespace DB diff --git a/dbms/src/Functions/tests/gtest_strings_unhex.cpp b/dbms/src/Functions/tests/gtest_strings_unhex.cpp new file mode 100644 index 00000000000..7ff65ba801f --- /dev/null +++ b/dbms/src/Functions/tests/gtest_strings_unhex.cpp @@ -0,0 +1,75 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include +#include + + +namespace DB +{ +namespace tests +{ +class UnHexTest : public DB::tests::FunctionTest +{ +}; + +TEST_F(UnHexTest, unhexAllUnitTest) +try +{ + const String & func_name = "tidbUnHex"; + + ASSERT_COLUMN_EQ( + createColumn>({"www.pingcap.com", "abcd", std::nullopt, std::nullopt, ""}), + executeFunction( + func_name, + createColumn>({"7777772E70696E676361702E636F6D", "61626364", std::nullopt, "GG", ""}))); + + ASSERT_COLUMN_EQ( + createColumn>({"ѐёђѓєѕіїјЉЊЋЌЍЎЏ", "+ѐ-ё*ђ/ѓ!є@ѕ#і$@ї%ј……Љ&Њ(Ћ)Ќ¥Ѝ#Ў@Џ!^", "αβγδεζηθικλμνξοπρστυφχψως", "▲α▼βγ➨δε☎ζη✂θι€κλ♫μν✓ξο✚πρ℉στ♥υφ♖χψ♘ω★ς✕", "թփձջրչճժծքոեռտըւիօպասդֆգհյկլխզղցվբնմշ"}), + executeFunction( + func_name, + createColumn({"d190d191d192d193d194d195d196d197d198d089d08ad08bd08cd08dd08ed08f", + "2bd1902dd1912ad1922fd19321d19440d19523d1962440d19725d198e280a6e280a6d08926d08aefbc88d08befbc89d08cefbfa5d08d23d08e40d08fefbc815e", + "ceb1ceb2ceb3ceb4ceb5ceb6ceb7ceb8ceb9cebacebbcebccebdcebecebfcf80cf81cf83cf84cf85cf86cf87cf88cf89cf82", + "e296b2ceb1e296bcceb2ceb3e29ea8ceb4ceb5e2988eceb6ceb7e29c82ceb8ceb9e282accebacebbe299abcebccebde29c93cebecebfe29c9acf80cf81e28489cf83cf84e299a5cf85cf86e29996cf87cf88e29998cf89e29885cf82e29c95", + "d5a9d683d5b1d5bbd680d5b9d5b3d5aad5aed684d5b8d5a5d5bcd5bfd5a8d682d5abd685d5bad5a1d5bdd5a4d686d5a3d5b0d5b5d5afd5acd5add5a6d5b2d681d5bed5a2d5b6d5b4d5b7"}))); + + // CJK and emoji + ASSERT_COLUMN_EQ( + createColumn>({"さらに入", "测试测试测试测试abcd测试", "🍻", "🏴‍☠️"}), + executeFunction( + func_name, + createColumn>({"E38195E38289E381ABE585A5", "E6B58BE8AF95E6B58BE8AF95E6B58BE8AF95E6B58BE8AF9561626364E6B58BE8AF95", "F09F8DBB", "F09F8FB4E2808DE298A0EFB88F"}))); + + // Special Empty Character + ASSERT_COLUMN_EQ( + createColumn>({"\t", "\t", "\n", "\n", " "}), + executeFunction( + func_name, + createColumn>({"9", "09", "A", "0A", "20"}))); + + // Const Column + ASSERT_COLUMN_EQ( + createConstColumn>(4, "ab"), + executeFunction( + func_name, + createConstColumn(4, "6162"))); +} +CATCH +} // namespace tests +} // namespace DB diff --git a/tests/fullstack-test/expr/unhex.test b/tests/fullstack-test/expr/unhex.test new file mode 100644 index 00000000000..37e7dab9b2a --- /dev/null +++ b/tests/fullstack-test/expr/unhex.test @@ -0,0 +1,35 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t; +mysql> create table if not exists test.t(a char(100), b int); + +mysql> insert into test.t values('7777772E70696E676361702E636F6D', 3039); +mysql> insert into test.t values('61626364', 61626364); +mysql> insert into test.t values('GG', -1); +mysql> insert into test.t values('E38195E38289E381ABE585A5', 313233); +mysql> insert into test.t values('F09F8DBB', 414243); +mysql> alter table test.t set tiflash replica 1; +func> wait_table test t + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select unhex(a), unhex(b) from test.t; ++-----------------+----------+ +| unhex(a) | unhex(b) | ++-----------------+----------+ +| www.pingcap.com | 09 | +| abcd | abcd | +| NULL | NULL | +| さらに入 | 123 | +| 🍻 | ABC | ++-----------------+----------+