Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dbms/src/Flash/Coprocessor/DAGUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ const std::unordered_map<tipb::ScalarFuncSig, String> scalar_func_map({
{tipb::ScalarFuncSig::Trim3Args, "tidbTrim"},
{tipb::ScalarFuncSig::LTrim, "tidbLTrim"},
{tipb::ScalarFuncSig::RTrim, "tidbRTrim"},
//{tipb::ScalarFuncSig::UnHex, "cast"},
{tipb::ScalarFuncSig::UnHex, "tidbUnHex"},
{tipb::ScalarFuncSig::UpperUTF8, "upperUTF8"},
{tipb::ScalarFuncSig::Upper, "upperBinary"},
//{tipb::ScalarFuncSig::CharLength, "upper"},
Expand Down
138 changes: 138 additions & 0 deletions dbms/src/Functions/FunctionsString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5956,6 +5956,143 @@ class FunctionFormatDecimal : public IFunction
}
};

class FunctionTiDBUnHex : public IFunction
{
public:
static constexpr auto name = "tidbUnHex";
FunctionTiDBUnHex() = default;

static FunctionPtr create(const Context & /*context*/)
{
return std::make_shared<FunctionTiDBUnHex>();
}

std::string getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool useDefaultImplementationForConstants() const override { return true; }

DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!arguments[0]->isString())
throw Exception(
fmt::format("Illegal type {} of first argument of function {}", arguments[0]->getName(), getName()),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return makeNullable(std::make_shared<DataTypeString>());
}

void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override
{
const ColumnPtr & column = block.getByPosition(arguments[0]).column;

size_t size = block.rows();
auto col_res = ColumnString::create();
auto result_null_map = ColumnUInt8::create(size, 0);

if (executeUnHexString(column, col_res->getChars(), col_res->getOffsets(), result_null_map->getData()))
{
block.getByPosition(result).column = ColumnNullable::create(std::move(col_res), std::move(result_null_map));
}
else
{
throw Exception(fmt::format("Illegal argument of function {}", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}

private:
static bool executeUnHexString(const ColumnPtr & column,
ColumnString::Chars_t & res_data,
ColumnString::Offsets & res_offsets,
ColumnUInt8::Container & res_null_map)
{
const auto * const col = checkAndGetColumn<ColumnString>(column.get());
if (col == nullptr)
{
return false;
}
const size_t size = col->size();
const ColumnString::Chars_t & data = col->getChars();
const ColumnString::Offsets & offsets = col->getOffsets();
res_data.resize(data.size() / 2 + size);
res_offsets.resize(size);

ColumnString::Offset pos = 0;
ColumnString::Offset prev_offset = 0;
for (size_t i = 0; i < size; ++i)
{
size_t begin = prev_offset;
size_t length = offsets[i] - prev_offset - 1;
unhexOne(data, length, i, begin, pos, res_data, res_offsets, res_null_map);
pos = res_offsets[i];
prev_offset = offsets[i];
}
res_data.resize(pos);

return true;
}

static void unhexOne(const ColumnString::Chars_t & data,
const size_t length,
const size_t idx,
size_t begin,
size_t pos,
ColumnString::Chars_t & res_data,
ColumnString::Offsets & res_offsets,
ColumnUInt8::Container & res_null_map)
{
char low;
char high;
size_t end = begin + length;
res_offsets[idx] = pos + 1;

if (length % 2 != 0)
{
const char * byte = reinterpret_cast<const char *>(&data[begin]);
if (!fromHexChar(byte, low))
{
res_null_map[idx] = 1;
return;
}
res_data[pos] = low;
pos++;
begin++;
}
Comment thread
b41sh marked this conversation as resolved.
for (size_t i = begin; i < end; i += 2)
{
const char * byte1 = reinterpret_cast<const char *>(&data[i]);
const char * byte2 = reinterpret_cast<const char *>(&data[i + 1]);
if (!fromHexChar(byte1, high) || !fromHexChar(byte2, low))
{
res_null_map[idx] = 1;
return;
}
res_data[pos] = (high << 4) | low;
pos++;
}
res_offsets[idx] = pos + 1;
}

static bool fromHexChar(const char * in, char & out)
{
if (*in >= '0' && *in <= '9')
{
out = *in - '0';
}
else if (*in >= 'a' && *in <= 'f')
{
out = *in - 'a' + 10;
}
else if (*in >= 'A' && *in <= 'F')
{
out = *in - 'A' + 10;
}
else
{
return false;
}
return true;
}
};

// clang-format off
struct NameEmpty { static constexpr auto name = "empty"; };
struct NameNotEmpty { static constexpr auto name = "notEmpty"; };
Expand Down Expand Up @@ -6047,5 +6184,6 @@ void registerFunctionsString(FunctionFactory & factory)
factory.registerFunction<FunctionBin>();
factory.registerFunction<FunctionElt>();
factory.registerFunction<FunctionFormatDecimal>();
factory.registerFunction<FunctionTiDBUnHex>();
}
} // namespace DB
75 changes: 75 additions & 0 deletions dbms/src/Functions/tests/gtest_strings_unhex.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Copyright 2022 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <DataTypes/DataTypeNullable.h>
#include <TestUtils/FunctionTestUtils.h>
#include <TestUtils/TiFlashTestBasic.h>

#include <string>
#include <vector>


namespace DB
{
namespace tests
{
class UnHexTest : public DB::tests::FunctionTest
{
};

TEST_F(UnHexTest, unhexAllUnitTest)
try
{
const String & func_name = "tidbUnHex";

ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"www.pingcap.com", "abcd", std::nullopt, std::nullopt, ""}),
executeFunction(
func_name,
createColumn<Nullable<String>>({"7777772E70696E676361702E636F6D", "61626364", std::nullopt, "GG", ""})));

ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"ѐёђѓєѕіїјЉЊЋЌЍЎЏ", "+ѐ-ё*ђ/ѓ!є@ѕ#і$@ї%ј……Љ&Њ(Ћ)Ќ¥Ѝ#Ў@Џ!^", "αβγδεζηθικλμνξοπρστυφχψως", "▲α▼βγ➨δε☎ζη✂θι€κλ♫μν✓ξο✚πρ℉στ♥υφ♖χψ♘ω★ς✕", "թփձջրչճժծքոեռտըւիօպասդֆգհյկլխզղցվբնմշ"}),
executeFunction(
func_name,
createColumn<String>({"d190d191d192d193d194d195d196d197d198d089d08ad08bd08cd08dd08ed08f",
"2bd1902dd1912ad1922fd19321d19440d19523d1962440d19725d198e280a6e280a6d08926d08aefbc88d08befbc89d08cefbfa5d08d23d08e40d08fefbc815e",
"ceb1ceb2ceb3ceb4ceb5ceb6ceb7ceb8ceb9cebacebbcebccebdcebecebfcf80cf81cf83cf84cf85cf86cf87cf88cf89cf82",
"e296b2ceb1e296bcceb2ceb3e29ea8ceb4ceb5e2988eceb6ceb7e29c82ceb8ceb9e282accebacebbe299abcebccebde29c93cebecebfe29c9acf80cf81e28489cf83cf84e299a5cf85cf86e29996cf87cf88e29998cf89e29885cf82e29c95",
"d5a9d683d5b1d5bbd680d5b9d5b3d5aad5aed684d5b8d5a5d5bcd5bfd5a8d682d5abd685d5bad5a1d5bdd5a4d686d5a3d5b0d5b5d5afd5acd5add5a6d5b2d681d5bed5a2d5b6d5b4d5b7"})));

// CJK and emoji
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"さらに入", "测试测试测试测试abcd测试", "🍻", "🏴‍☠️"}),
executeFunction(
func_name,
createColumn<Nullable<String>>({"E38195E38289E381ABE585A5", "E6B58BE8AF95E6B58BE8AF95E6B58BE8AF95E6B58BE8AF9561626364E6B58BE8AF95", "F09F8DBB", "F09F8FB4E2808DE298A0EFB88F"})));

// Special Empty Character
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"\t", "\t", "\n", "\n", " "}),
executeFunction(
func_name,
createColumn<Nullable<String>>({"9", "09", "A", "0A", "20"})));

// Const Column
ASSERT_COLUMN_EQ(
createConstColumn<Nullable<String>>(4, "ab"),
executeFunction(
func_name,
createConstColumn<String>(4, "6162")));
}
Comment thread
ywqzzy marked this conversation as resolved.
Comment thread
ywqzzy marked this conversation as resolved.
CATCH
} // namespace tests
} // namespace DB
35 changes: 35 additions & 0 deletions tests/fullstack-test/expr/unhex.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright 2022 PingCAP, Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

mysql> drop table if exists test.t;
mysql> create table if not exists test.t(a char(100), b int);

mysql> insert into test.t values('7777772E70696E676361702E636F6D', 3039);
mysql> insert into test.t values('61626364', 61626364);
mysql> insert into test.t values('GG', -1);
mysql> insert into test.t values('E38195E38289E381ABE585A5', 313233);
mysql> insert into test.t values('F09F8DBB', 414243);
mysql> alter table test.t set tiflash replica 1;
Comment thread
b41sh marked this conversation as resolved.
func> wait_table test t

mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select unhex(a), unhex(b) from test.t;
+-----------------+----------+
| unhex(a) | unhex(b) |
+-----------------+----------+
| www.pingcap.com | 09 |
| abcd | abcd |
| NULL | NULL |
| さらに入 | 123 |
| 🍻 | ABC |
+-----------------+----------+