From 014e10736ea2151b5429a9aebbb4f95d641ed535 Mon Sep 17 00:00:00 2001 From: HangyuanLiu <460660596@qq.com> Date: Tue, 14 Jul 2020 21:55:14 +0800 Subject: [PATCH] fix orc decimal --- be/src/exec/orc_scanner.cpp | 15 ++++-- be/test/exec/orc_scanner_test.cpp | 47 ++++++++++++++++-- .../orc_scanner/decimal_and_timestamp.orc | Bin 900 -> 957 bytes 3 files changed, 53 insertions(+), 9 deletions(-) diff --git a/be/src/exec/orc_scanner.cpp b/be/src/exec/orc_scanner.cpp index 47fb7c9df3fe79..7edf204a2de4cb 100644 --- a/be/src/exec/orc_scanner.cpp +++ b/be/src/exec/orc_scanner.cpp @@ -245,16 +245,23 @@ Status ORCScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { } else { decimal_str = ((orc::Decimal128VectorBatch*) cvb)->values[_current_line_of_group].toString(); } + + int negative = decimal_str[0] == '-' ? 1 : 0; + int decimal_scale_length = decimal_str.size() - negative; std::string v; - if (decimal_str.size() <= scale) { + if (decimal_scale_length <= scale) { // decimal(5,2) : the integer of 0.01 is 1, so we should fill 0 befor integer - v = "0."; - int fill_zero = scale - decimal_str.size(); + v = std::string(negative ? "-0." : "0."); + int fill_zero = scale - decimal_scale_length; while (fill_zero--) { v += "0"; } - v += decimal_str; + if (negative) { + v += decimal_str.substr(1, decimal_str.length()); + } else { + v += decimal_str; + } } else { //Orc api will fill in 0 at the end, so size must greater than scale v = decimal_str.substr(0, decimal_str.size() - scale) + "." + decimal_str.substr(decimal_str.size() - scale); diff --git a/be/test/exec/orc_scanner_test.cpp b/be/test/exec/orc_scanner_test.cpp index 0c5e7d7fe63a91..5e27d74949acab 100644 --- a/be/test/exec/orc_scanner_test.cpp +++ b/be/test/exec/orc_scanner_test.cpp @@ -573,7 +573,7 @@ TEST_F(OrcScannerTest, normal3) { expr.nodes.push_back(cast_expr); expr.nodes.push_back(slot_ref); - params.expr_of_dest_slot.emplace(8 + i, expr); + params.expr_of_dest_slot.emplace(9 + i, expr); params.src_slot_ids.push_back(i); } @@ -606,7 +606,7 @@ TEST_F(OrcScannerTest, normal3) { expr.nodes.push_back(cast_expr); expr.nodes.push_back(slot_ref); - params.expr_of_dest_slot.emplace(13, expr); + params.expr_of_dest_slot.emplace(14, expr); params.src_slot_ids.push_back(5); } @@ -639,7 +639,7 @@ TEST_F(OrcScannerTest, normal3) { expr.nodes.push_back(cast_expr); expr.nodes.push_back(slot_ref); - params.expr_of_dest_slot.emplace(14, expr); + params.expr_of_dest_slot.emplace(15, expr); params.src_slot_ids.push_back(6); } { @@ -671,9 +671,42 @@ TEST_F(OrcScannerTest, normal3) { expr.nodes.push_back(cast_expr); expr.nodes.push_back(slot_ref); - params.expr_of_dest_slot.emplace(15, expr); + params.expr_of_dest_slot.emplace(16, expr); params.src_slot_ids.push_back(7); } + { + TExprNode cast_expr; + cast_expr.node_type = TExprNodeType::CAST_EXPR; + cast_expr.type = decimal_type; + cast_expr.__set_opcode(TExprOpcode::CAST); + cast_expr.__set_num_children(1); + cast_expr.__set_output_scale(-1); + cast_expr.__isset.fn = true; + cast_expr.fn.name.function_name = "casttodecimal"; + cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; + cast_expr.fn.arg_types.push_back(varchar_type); + cast_expr.fn.ret_type = decimal_type; + cast_expr.fn.has_var_args = false; + cast_expr.fn.__set_signature("cast_to_decimal_val(VARCHAR(*))"); + cast_expr.fn.__isset.scalar_fn = true; + cast_expr.fn.scalar_fn.symbol = "doris::DecimalOperators::cast_to_decimal_val"; + + TExprNode slot_ref; + slot_ref.node_type = TExprNodeType::SLOT_REF; + slot_ref.type = varchar_type; + slot_ref.num_children = 0; + slot_ref.__isset.slot_ref = true; + slot_ref.slot_ref.slot_id = 8; + slot_ref.slot_ref.tuple_id = 0; + + TExpr expr; + expr.nodes.push_back(cast_expr); + expr.nodes.push_back(slot_ref); + + params.expr_of_dest_slot.emplace(17, expr); + params.src_slot_ids.push_back(8); + } + } params.__set_src_tuple_id(0); params.__set_dest_tuple_id(1); @@ -709,6 +742,8 @@ TEST_F(OrcScannerTest, normal3) { TSlotDescriptorBuilder().string_type(65535).nullable(true).column_name("col7").column_pos(7).build()); src_tuple_builder.add_slot( TSlotDescriptorBuilder().string_type(65535).nullable(true).column_name("col8").column_pos(8).build()); + src_tuple_builder.add_slot( + TSlotDescriptorBuilder().string_type(65535).nullable(true).column_name("col9").column_pos(9).build()); src_tuple_builder.build(&dtb); TTupleDescriptorBuilder dest_tuple_builder; @@ -728,6 +763,8 @@ TEST_F(OrcScannerTest, normal3) { TSlotDescriptorBuilder().type(TYPE_DATETIME).column_name("col7").column_pos(7).build()); dest_tuple_builder.add_slot( TSlotDescriptorBuilder().type(TYPE_DATE).nullable(true).column_name("col8").column_pos(8).build()); + dest_tuple_builder.add_slot( + TSlotDescriptorBuilder().decimal_type(27,9).column_name("col9").column_pos(9).build()); dest_tuple_builder.build(&dtb); t_desc_table = dtb.desc_tbl(); @@ -755,7 +792,7 @@ TEST_F(OrcScannerTest, normal3) { bool eof = false; ASSERT_TRUE(scanner.get_next(tuple, &tuple_pool, &eof).ok()); ASSERT_EQ(Tuple::to_string(tuple, *_desc_tbl->get_tuple_descriptor(1)), - "(0.123456789 1.12 -1.1234500000 0.12345 0.000 1 2020-01-14 14:12:19 2020-02-10)"); + "(0.123456789 1.12 -1.1234500000 0.12345 0.000 1 2020-01-14 14:12:19 2020-02-10 -0.0014)"); scanner.close(); } diff --git a/be/test/exec/test_data/orc_scanner/decimal_and_timestamp.orc b/be/test/exec/test_data/orc_scanner/decimal_and_timestamp.orc index 65bc52b7c1cdf81e655661ef09149cd2a952893a..548fac4f5b074964629e18968544132f4146dcbb 100644 GIT binary patch delta 640 zcmV-`0)PF42fYWd0s$dD0086C;$me00U<>WMk8r1c3lHK0|P@7As9mn#t2{llK}xc z1_uBDLI8l1K>-$j0{{RU2LJ#@0D$@c0ENvl4#F@Dh2bWRoumi}F+EkZ14yi#firM| zOtcG2?=N9qy)#&!{>X9$Eqdy37=8xaX~ax>+-Sm;3NBOunlaIWkyh>nyOT{Zl^r4M z=-pFsgWcadc?%L-yw%O9e9zv6{{;JY->=YfES*jKH{0RZD_ zVB%uoU<_d3(&b<@Qs?3}&@(hLHZe6bx3CmK=13uPfXXF+%7wUC41t0|U{VT90%cTy zGICrTx`q(-LU5K8oCQ>+2vjA_#SS%82*!|tF@Q?=fl4^J7!8CN4Wt+ifC4-mj6!T2 zOae?2OhBH02nVAVpTvi0>yLejR)A1IQ9cevD-Mo7bD0Ey7-5gDfu4bZp@|TTAq8Uu zFaQAZ0RT;nJx;?w5QQ`AwLQC7wzZn5rc+BsG^S8ABirX;uR*DBH-;yZY)por(+Jy9S0OZ)_Jn42GjA8m z)q3NO7k|8Pz2LgDy5`Ucy^t>#d^xjN)%(6XIDc^5I_K8d#{HviFn%yx$+%)HkOT5S z93%)wE<%;7Lh*KraFZ6_Z=YX3cayzvnj&uXu(7)MmPC=an&`pK-m)AQw@{9`|TX9$ZU-^-Y5TO!+n2=eMLD)?R@0V z{PyGSzAo+dnA__)x8)5y0RT-*I|{;35X?TJStlCf780Hyf}oZ968|2-Dy87P>^y_F z@HT>lrG-mkLW-T8nVng@2tHHpVa1fq5xQlXtvB1<{&1A0uuYMBmmaJZ1b(Wv6le?7 zTZoz7#^6O-cBE#uW}1KNVyd|jg8yr&h^y59R>?O^m%t13?hSZ+7ovZZBRoOORs)2`6Y8b?@@& zU|}g1l130LZ3SPzhp@8q8GH+0z!$Nw6m*zeLW=qQKXztO+b@6Z3)J=xs=Lw480t&x zIRvHSB+byrkIzqyuoX^(Q{hZF7cPX$I>mDALf&c8sSzD^ISIN}2s}AYKLEuDtBB4K z+7nZRv!Rf``DBp%THRsI00K=Y;kVZp!NW?c?+3VYC+R`(zOa@%LR> m9&WO$*W2sp9|-mV5CIr~fDj@A01POh0Wb%G^aBG=QbQRxOB`PS