From e63071991ebd8155994b6cd0803a3654b996b136 Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Wed, 7 Aug 2024 16:41:20 +0800 Subject: [PATCH] [bug](parquet)Fix the problem that the parquet reader reads the missing sub-columns of the struct and fails. (#38718) ## Proposed changes Fix the problem that the parquet reader reads the missing sub-columns of the struct and fails. for example : suppose we have a column `array>` . And this column has two data ``` [{1},{2},{3}] [{4},{5}] ``` Then we add a subcolumn b to the struct . Now the column structure is like this `array>` The expected data for the query is as follows, instead of an error : ``` [{1,null},{2,null},{3,null}] [{4,null},{5,null}] ``` --- .../format/parquet/vparquet_column_reader.cpp | 22 +- .../create_preinstalled_scripts/run66.hql | 22 + .../test_hive_struct_add_column_orc/000000_0 | Bin 0 -> 578 bytes .../000000_0_copy_1 | Bin 0 -> 778 bytes .../000000_0_copy_2 | Bin 0 -> 940 bytes .../000000_0_copy_3 | Bin 0 -> 996 bytes .../000000_0_copy_4 | Bin 0 -> 1127 bytes .../000000_0_copy_5 | Bin 0 -> 1393 bytes .../000000_0 | Bin 0 -> 1008 bytes .../000000_0_copy_1 | Bin 0 -> 1098 bytes .../000000_0_copy_2 | Bin 0 -> 1423 bytes .../000000_0_copy_3 | Bin 0 -> 1445 bytes .../000000_0_copy_4 | Bin 0 -> 1704 bytes .../000000_0_copy_5 | Bin 0 -> 2130 bytes .../hive/test_hive_struct_add_column.out | 411 ++++++++++++++++++ .../hive/test_hive_struct_add_column.groovy | 169 +++++++ 16 files changed, 616 insertions(+), 8 deletions(-) create mode 100644 docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run66.hql create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_1 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_2 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_3 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_4 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_5 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_struct_add_column_parquet/000000_0 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_struct_add_column_parquet/000000_0_copy_1 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_struct_add_column_parquet/000000_0_copy_2 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_struct_add_column_parquet/000000_0_copy_3 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_struct_add_column_parquet/000000_0_copy_4 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_struct_add_column_parquet/000000_0_copy_5 create mode 100644 regression-test/data/external_table_p0/hive/test_hive_struct_add_column.out create mode 100644 regression-test/suites/external_table_p0/hive/test_hive_struct_add_column.groovy diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp index 4efa6c60e477ee..c51a51bac3cd4f 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp @@ -352,9 +352,6 @@ Status ScalarColumnReader::_read_nested_column(ColumnPtr& doris_column, DataType SCOPED_RAW_TIMER(&_decode_null_map_time); auto* nullable_column = const_cast( static_cast(doris_column.get())); - - // auto* nullable_column = reinterpret_cast( - // (*std::move(src_column)).mutate().get()); data_column = nullable_column->get_nested_column_ptr(); map_data_column = &(nullable_column->get_null_map_data()); } else { @@ -723,7 +720,7 @@ Status StructColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr const DataTypeStruct* doris_struct_type = reinterpret_cast(remove_nullable(type).get()); - bool least_one_reader = false; + int not_missing_column_id = -1; std::vector missing_column_idxs {}; _read_column_names.clear(); @@ -744,8 +741,8 @@ Status StructColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr select_vector.reset(); size_t field_rows = 0; bool field_eof = false; - if (!least_one_reader) { - least_one_reader = true; + if (not_missing_column_id == -1) { + not_missing_column_id = i; RETURN_IF_ERROR(_child_readers[doris_name]->read_column_data( doris_field, doris_type, select_vector, batch_size, &field_rows, &field_eof, is_dict_filter)); @@ -765,12 +762,21 @@ Status StructColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr } } - if (!least_one_reader) { + if (not_missing_column_id == -1) { // TODO: support read struct which columns are all missing return Status::Corruption("Not support read struct '{}' which columns are all missing", _field_schema->name); } + // This missing_column_sz is not *read_rows. Because read_rows returns the number of rows. + // For example: suppose we have a column array>, + // where b is a newly added column, that is, a missing column. + // There are two rows of data in this column, + // [{1,null},{2,null},{3,null}] + // [{4,null},{5,null}] + // When you first read subcolumn a, you read 5 data items and the value of *read_rows is 2. + // You should insert 5 records into subcolumn b instead of 2. + auto missing_column_sz = doris_struct.get_column(not_missing_column_id).size(); // fill missing column with null or default value for (auto idx : missing_column_idxs) { auto& doris_field = doris_struct.get_column_ptr(idx); @@ -778,7 +784,7 @@ Status StructColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr DCHECK(doris_type->is_nullable()); auto* nullable_column = reinterpret_cast( (*std::move(doris_field)).mutate().get()); - nullable_column->insert_null_elements(*read_rows); + nullable_column->insert_null_elements(missing_column_sz); } if (null_map_ptr != nullptr) { diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run66.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run66.hql new file mode 100644 index 00000000000000..bc0ac3327ea1b6 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run66.hql @@ -0,0 +1,22 @@ +use `default`; + +create table test_hive_struct_add_column_orc ( + `id` int, + `name` string, + `details` struct, + `sex` int, + `complex` array>> +) +STORED AS ORC +LOCATION '/user/doris/preinstalled_data/orc_table/test_hive_struct_add_column_orc'; + +create table test_hive_struct_add_column_parquet ( + `id` int, + `name` string, + `details` struct, + `sex` int, + `complex` array>> +) +STORED AS parquet +LOCATION '/user/doris/preinstalled_data/parquet_table/test_hive_struct_add_column_parquet'; + diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0 new file mode 100644 index 0000000000000000000000000000000000000000..df41136523a380d98811004bbde488a3f67f95c4 GIT binary patch literal 578 zcmeYdau#G@;9?VE;b0A5&;~MvxtJLk7=(B@ScTX)m;|^aF5JNk%nY0i3LwJ``9EzxLP%?dGrL#Ts+!S>Waz6J#z z<^T;Ij<}wW0gq2k{{O%IOxni_d$!)pS^T#1_MbCLSmYy<*6&&Jv2^qQQqFmLF~RFM zO6R{lR+pf{uk?Q}!xo^kUnM?CdYHhQ@Zizo1kt3&2|^DhP2f)GNO(T!K|-j|gQQZB zOGT4PSp$XK93(wgRMjo0D&tdT)b6xX+rXB-fDM~xPY9?o_%Sj(P7sSYqH^@?L7fZd zH6E^fu*%~mQIlrO3|@2O$U&YXZ@iiuOnr3L z1fA)8xiU!S|gH%Af!M literal 0 HcmV?d00001 diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_1 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_1 new file mode 100644 index 0000000000000000000000000000000000000000..d6a1045e1d6ce309963c77d3b21aec6be9b29188 GIT binary patch literal 778 zcmeYdau#G@;9?VE;b0A5&;>F@xR@Cj7=(B^ScTX*#02CdnivBZ92giL2R%tjWnfg~ zVgtPBiHY}_D4zZjX67#O^L&grc5VG=y4pv3C3q=^HpfEB3f{|6_aEZ7K-wH{}5 zPk3JSK6l<1Xw@^drY9GVXgM(W@-Xl+F!(VG@{5QpVqow+ZOGSPz`=S!j5{f|r|?Jb z_DitNnxV3WbZ7J_$StJqAn+ zYk3Y2HM@7roY)!JrD^QirK)de9@=HAx>d!{czHx; zXR~C-&8aGmSrHL8m&_2p9K1AQ643Ic5y8ccSr5D#ZZur&YHc=cIM_A6IkaQ0vyX${ zqE5pG#iqMnT|lEdy3bxTR5!K^?Kpc?)W!P-!_y}VqF5TEEG+8`xl9Z_YaX+B6-0P4 zm~s|uRBOr+PV!|96moNr^wbuHBomhjntUroJ9n~LuBys_ o3=f@1Y?{x`%)oJzQGijRp+P{2iGfF>ozZ~3=?gQne~_~z07|wJjsO4v literal 0 HcmV?d00001 diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_2 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_2 new file mode 100644 index 0000000000000000000000000000000000000000..46cb7e8647c07a62627b456c10068556b12ea842 GIT binary patch literal 940 zcmeYdau#G@;9?VE;b0A5&;>F@xR@Cj7=(B^ScTX*v;>SKo-qb6*f20W4tkQ5mXIJY z=}|(c(8Hwia~?XUPWp&88LHf2l3+kG#1LqR7{m~Mpdp+bTbKn}SR~E}1uz5vjWK$V zl*+)USjrcs>Z*6|!4VtQ?Te1=d2;5_lNv>V?M&yUzDVRrU|>iADlK{hR_YB@nyhD- zoc#Dn&yhuQ&V;mI={j%ACx2oTH$Hllr1MDVyaclZgBeiMq=!lA4GfBod}4}u zefd31ymS8u*?wnbl3?UyU^u{{#l*_Mz{J4Lz~IJEz|X+ooRL_RlbOm5lH0;8;>5tf z?RO@)K-=q#_c+5udAnbOerfTr^ofmojWq!D^vv-TDyh2FP>!sg_IwIID?oUgA~vu z|364FFuY^_#<)(9K|7ss6Z;_s2H&$AxmpYaST3YY_D$09Tl369fWom*=McS&}@RwS7%Q#*_V8ynel6vogZxU0jlUd3W-KfA0;e z?sC2gx^XDSu?Yptr@EX>Ihjlgi*gdhG7=|RD6>p?@Zb_lR`8QYyvV^N z&Qz%VAf{7p0`IPvYYt0~#Uvd&cI>3UX4xQZph9jy;XLR00o5}H zm5!YK*c-TD;pF&5&-XbnB)4!dd;>Z!LF~sVouh{j>U@|Kn54Pbsf%s(BoX$00b^z1 z%_6~RT_WCbNqMV8s=cy2lhXV)3JJ3t3vAZbWV*Ux!-h>8y$#iuE%(}}l0GfOYvZzK zOro9}#ZsO;UwUY&lfh=j)uP<&*$mC1_Ux+#tlOtGOgv(IjG>LAOj{C|&;<3$odt4%kP?NckTxh;@DF@xR@Cj7=(B^ScTX*>;&8-PB8^A*f20Wp7b;+m4Q)F zkS|QtPA}~EoHI{C6w|J63gO(#z{Y^j5F-Hxi8(++Jb;D-Jp&t}2{c4duk3lxnI}iS z)F^&?aOKFPNpnIvX8?_g04g(j3|1BkR932Y@CeAR_A6aS?ljE_+56;AjN-p5Mk0)z zsXdH55T`v(N^f9LOym<&jO%Ojt81JmEGqOp?`%3(0t_YI@G~&@u^8D{ zIGDu(S*hg;k@-d0KKaE8j(O>+IjP0LiFpccMTvRInZ?QZ&Ka4>iRt;lsfi^eIS?bd zuNQEl=GvDV{^?g`JU-sjHyp7M8krk1WE_B3VD3J*`6(>{)2lcU;{_!+vqxe564_M1%LkDmfJ%uqZnNgA+5y4h9C_9z(tc10Lqv7nTJ^ zHk{mGxUuoa|K}WGEfQ`aNy|qgIY(< zC8|GAE9}y2P;8p(-P0V}+24KDP~F%vwBzPholqx!jdd-H?syz3=>h(#Pw zI(qn^&WFi?k2DrL?O|IziHW^mfLmGExLqVTtxLo^E-7!7NVQj%XHuHqMv>yetX3gm zc4L9f+L}!vn>K9P=xwOJY`ND)mGo&LUK^J^V+!@!D3 literal 0 HcmV?d00001 diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_4 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_4 new file mode 100644 index 0000000000000000000000000000000000000000..b8f4d0e0658fd5d8fd3fa93daa6ea991918fb70a GIT binary patch literal 1127 zcmeYdau#G@;9?VE;b0A5&;>F@xR@Cj7=(B^ScTX*f&`)@)-eY#*f20Wp7b;+m4Q)F zkS|QtPA}~EoHI{C6w|J63gO(#z{Y^j5F-Hxi8)LG3?4v3f}Vj5(F7VIs8{y9=ggBM zUuqP;J-BjY(xf>doil(&MF5o5`@IG_W$FJ05t;ZSN6P{PS&z<)@m-@1A?KiikM z^w{1b_EU@7?T>o782x`;K0%I2epoz6I2mU-PmW@px89m zyQev{v%mYQp}MhUXvfX1I-ySd8tYmX-SIf!n8YU>Fr(X>ZADB=_gzb6V@pHZ_zqhO z(~V11)I*inG_p@F<)0GzT*!w-^4Wx_i8G=nOkOp8&V217^MOI5u(3yQ#la?TkqkS1 zcW3qe4(B_$I+>T>Yt9jT_=uTnbD;s}29FPnKV@Qa6pp)WV3Iw`S|TFPE*>$>+ll#! z8bcTpL#V&4_hAPKzoPQJ*Eg#fhFJ2kpG_)U@o0&;t4qP8Mc#tbm2Gc1ToRM=x)a07 zUf;0!!K3^F5%K?Rv+ECj7ir-;UvU0-^?7EMe~VV}s?>2lS-wBEG;$aF+m@e-puZuv==p4GI;znkBs=yS07%-`Qpq&>({J&ed&~RNdH&6RlAe!?ns`Q+jIN!;YiOE zaqpIzZ-xa+%s>2-x^vh48>dVS1-EC4>W^}~-#WZc>0%?|MT!jmQTi@*R myZQL&&hN||>zD)>B^nw8l$aQJG`2Guus3~SX7&$qmIMIZ4VI$- literal 0 HcmV?d00001 diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_5 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_struct_add_column_orc/000000_0_copy_5 new file mode 100644 index 0000000000000000000000000000000000000000..21dd6d9e00eeb6b8e8315a25bb8c366c1a7f4df3 GIT binary patch literal 1393 zcmaJ;c}&x182)-lf4xt%o5mGvYdM69pvWcVS~*55lv;=&1zJQ{OF+OZ#uOABr7D64 zZjGoYV+;>e5TdeD%OO+{0;0|-luK|?$k;$7vL7W|mi@8h{o~F1KHu{`&-00*0;vGt zY-ng`l^AS-TLumV0H6_}RWuBwq^Ou>Xff~sV6<#JNdXXcRFahA=OXRUo5v(}%AuPQ z?0sMZ{*T0s;?LA0#Xty?L`_&p9AFZvOYWr3JZ73ZX!qyvfT^rZFDb5osbnzBZPW^j zhhaG`t)}H$M+dZ~7X^CB{jtT}cAo~^7>Htp4nY9hVa=bfXr{xOiIAq4Own~7U>dsK z!AG6oHGO4GfY&wVLs8SUfitK2) ziWQP2S0$%qC#wVZrX|Z$)zJ$1{{0y%6eUBMQUJou4;O`V~G;XS=yb%_Ed zxT);b<0+SFgrUye5dvv>T#-E)SPxRkbSilR04g-fq+v36#F^UlYyy4WVwJ^av*4%# zfCeA}fPucO5Q#+FPyi0jB)q#<1Xq}ISXiB-^HpI)SR`V^odxy4LpIl*0$V4?1#8-RG0Dl7yK|?^nBOhTm$_ z&AyJ07H;wK{{|KrP8=l4SM(tFRTg$Yax0*p?}ElDGZ z#Kd9CWTI`-q>$cOUSXmcD6Ku5_SRZcN85Bo`8mC#>9@Euju$bET>(2)>|xgCqgbA~ zjf0Z77~9_JPoC3PoOd;?*MBFADl2!SqE0o8^<%nfv$5O)LiWEt!`33&6i6+VwL=4m ztsI<$Eqn1{Ip}qu&V{zs;OQ6h8B;K+D(*nu8>lt)s9`Np}7_NLOaDa7^y!^s3Qipr&6Pl#qHva*yg&L7}!(O&@b7*j>?~A@4nNM5f z7*b^KvE$1;k?P`IW$#b>WicBa?>Fc+ zeq80NM=TB+jMtn;7JXj$GVXPR8avdLQ?9{N_C3hih_{xtDO=;K&p)^V&SoL1uXW6y z^!oWWnlkovw1=@;bp0CrkZz)Aw;_|nE!x&{eP;dy&Nx2(dXD`&5gJEQ5KN7RVvhs_ Ro&~|j6wIMeqNu>n{sN=J`$hl& literal 0 HcmV?d00001 diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_struct_add_column_parquet/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_struct_add_column_parquet/000000_0 new file mode 100644 index 0000000000000000000000000000000000000000..6a22c41be14b22610daf0a095d8c6942b4464340 GIT binary patch literal 1008 zcmbVL%WB&|6djFZTeu0dlo6v=C4vy)g5yfIEW416`+!nH2+1m|GL|RyP(NB(NoYT! zWYrJoviJ+S%qL`(&*>f28i^D_2+Y+S-FxntBkAgJa1;d%^@3WiOfMJveK-T2DQEa2BzG5h1Kzm+_JQqNnC2yrRH5>FjCCJt?4L> zUF*t-5;Nq}W@IQufty2`!M-xp{MWx4&h(_3#quODEW}8o}%`ORISY_UI_yQlBQKZ zUjXIQhv*>>kOQYaKpr8F(piJSrKnQTj{V=+ncvQ?N1U}^djt`m_)kd*gieYURUaaR zsJabL166Y~aA2^)W4TlY+W~t3CKDigNQNs+fU5fGX-`C)>g~3CL(n{32Q_y|b#x>b zr8+*&)X7Pv<_LfXBv`?z`W@n_`Wc?G#?=fQ1!+IAT>Ap{73>?Bi~&we;uQyq)OaK& zAE?IU3qlsz8pzZoMJU&#h%(X5M4AoQnF1pR#?`$m`3nc(3x36tVUIh}AfdZVo-FTL z?y2m`J=d4z-L66QvbYWYO`8&IetNkbi%b4?9K}3gL`MYgV~Rx=kunIi&l0|mPy^Gg zE}t;b8)q>EcFq?;^2`vFW@vyBJ(w9K(=t1D(SltXMID)vO2BPQ6&QB0^^b-!Ex0Sqr;|esox8Pt1;mP=25ee7iIF7x@jCC)kq! literal 0 HcmV?d00001 diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_struct_add_column_parquet/000000_0_copy_2 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_struct_add_column_parquet/000000_0_copy_2 new file mode 100644 index 0000000000000000000000000000000000000000..fbfdca32d4f6a9a7b6c551d75f2cd974bad768f5 GIT binary patch literal 1423 zcmb7^zi-n(6vxkJH%&t;vfN=KOJr#jq+*;nzbbWTg@h1dsOTSnuFk2k6^q=Uz&{YgLpER^7sZDB2 zTVi|of)*d@T}qkS1e+6 z&v~IZl*63qkxXO#n}#cUi&dP~ax)mx6l==&1*Nm<5~89Rwcb6s)erXN3*a>hCZt8p`JNn$AdFctjnT!9oCNv7_%(u(6?4D@p_(bs+-`p~l!TWo z+X|~i8#}g&1!Ack7|i^=hE^@E{pCeiHqHth#8*FPHe0k>MzLU}+7igQ#?LkQ*0S2p z1~UuaTERmxKm=C(0Q{L|+G>|4RVPgN#~MG@5CWCcmvHS}8^-g?#c0kK~&k7Q5=kh zmAX@R9k){3-V4HB#r5i<-EK8qcU5}Z-GMnN}IE+FQpoJD0 zlqa5@g2z^y`YvZ;=_&2=rdWbdAl67NB5H~O3!0+Czep^afkDK`-z2ei2P6k#fZPTF z&KC?LB0+Ac2BW>6%e2&1n>)1Sf#aBw2K5!&H{N>w`&rPpy}+n2e`Qcc#Q+zqaLWZGh5Cx`bwf>Rig01S7{IipgBpbJU8kj&GB0R2y6$I(You_q~x&Lu<-usXozktqCMn=y_X%!K7r#My}> z4Ix9U4zW3y0Z7zp*BVb>6+H{52S+F#@KqUR)C|&d+QPoid(1ENc!w1}LC0wG^?=hy zjW+GJ+LR;hSLPkGoC7Di>+3R;^gpV^+4;OIFFU kiX~cHuaqlJb-iRa*DB6hnLf}D;cGQL&<(h7JMe-21sy5-$p8QV literal 0 HcmV?d00001 diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_struct_add_column_parquet/000000_0_copy_4 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_struct_add_column_parquet/000000_0_copy_4 new file mode 100644 index 0000000000000000000000000000000000000000..7cb59513accf71709a9cb1386daf6dd26bf60e8f GIT binary patch literal 1704 zcma)7y>8P`6u#F#Nt(85DZQ30BJk2EEhQvv5{kso@+X8Ugs2#p(6w`wSR`?ALMTi; z074Z*p8zp*-~o6521aHk)E8hwV&OZEsST|V(!KWi{m$|EN$zhws0$_A?nT^HTylW7C%hD<#VY!(v9isrUf?SjOk7P+u|x-P zJ@oQ;-RR1w7bsHQl;faHD!2l%C0Z$>ZMaa_hJz<1mU0k~kAA3;?mTV@*Tp@D3!Jwc zCq+VT`5h%kqn@(!H@bl@J0T1ci~`7e>bd(UjGjz`-VTTE)}W($YB&paM-JTEksSEJ z&<`i+#{fhoposXD9vqEzJD~FRM&MnNL#GBuET;i02Z;~467K=0N;g1|6{r8_!Oc-Kb%x~jm zFvej^%?9{_SfCmh$zYMxHb_tXOS!~BzrgiH7*|APo4ise3JsRghyZp)tgH&M%!wT4 zm8225GqkB5F&^`$66wxrow&}L!=gfWnPwZvFc zv-52=l0k1M3KJV$c2r{I2cs7-iakug1U=#i$lUFQ1DFx557k}_gr1}IdqNXA)--J0 zlO3UvW>!X#eDN3q)>nfOWU(KY%qlu1=XJSILdH#2$T^gufY^G&ams~j|5#S1gh~Ml zpI$!TB{7pg#NQT!yyMn84&AlCb~WqyQ9BM^`CSSsK@70Zt$kkps38p(VNPAmEZArL zIE`a4gQ^ dYZ3mp|*2&dK%6;(Rbq;!GM41AWn~FIxG7NkIRzby?_FX|az}oOCnu z#jmLF>2A<4ryR?|?N7z!E949(9)uX=fLjN=ygvnybO#6|<(RWd%S5w4%M~~AeUY%1 z0Cgvy%o5GepVPx7LM&RJI8OO}5h2XDGryNEatPi`8gWa?x6jMS?!99i9)kIn%X`Pj< zC^>~fp{F8p@VyXvC?SU&bLgR`l4A}fhn|Z6fr1Z(_R!y3$=Z!=OvS#Pd0)Tx_PsY_ z-`@GEDwwE?`nt$rlNQ;6Wo$CWETe|E$w~4T}T!GjW%^1-#94IWq#+wq0IS9x{ z|5+p52cQDT0lg0b&J&I&MnZ0xJtc?3uCg*WJH98|0SsptS!};l51em;@ZKW!yTQQO z>9DEpp2@PcXj5ddvxposVf$MaUi6*W@ZYAkyPT~+pio^?f1Ws)Kw z%yPJb?K$-dgl^y_J>G@Lo0cyrCifPVGAShYGdY|uae8$HCEos%MmO;kwNkmRj9kB#o(N#V z%?U5Q)R2aQFc)4!Nw8P?JdHV+K^2bBj=6cvi?4J_=loTa#Ce`Nk27sf1oT=nUu(tZ zDFXUGH%t=xaa`9LVtgp&DNAh0aKY13&0H z)pE5`t~e$4(C@aKO06oJ%|^Xa`B>H5t&Pe?rBdCH)vbE1?lrbH+}38@+pNhC?PvIl Pddb7SGZ?##|0eze9vp0D literal 0 HcmV?d00001 diff --git a/regression-test/data/external_table_p0/hive/test_hive_struct_add_column.out b/regression-test/data/external_table_p0/hive/test_hive_struct_add_column.out new file mode 100644 index 00000000000000..869d6574fe933e --- /dev/null +++ b/regression-test/data/external_table_p0/hive/test_hive_struct_add_column.out @@ -0,0 +1,411 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !desc -- +id int Yes true \N +name text Yes true \N +details struct Yes true \N +sex int Yes true \N +complex array>> Yes true \N + +-- !test_1 -- +1 Alice {"age":25, "city":"New York", "email":null, "phone":null} \N \N +2 Blice {"age":26, "city":"New York New York", "email":null, "phone":null} \N \N +3 Clice {"age":27, "city":"New York New York New York", "email":null, "phone":null} \N \N +4 Dlice {"age":28, "city":"New York New York New York New York", "email":null, "phone":null} \N \N +5 Elice {"age":29, "city":"New York New York New York New York New York", "email":null, "phone":null} \N \N +11 AAlice {"age":125, "city":"acity", "email":"alice@example.com", "phone":null} \N \N +12 BBlice {"age":126, "city":"bcity", "email":"bob@example.com", "phone":null} \N \N +13 CClice {"age":127, "city":"ccity", "email":"alicebob@example.com", "phone":null} \N \N +14 DDlice {"age":128, "city":"dcity", "email":"xxxxxbob@example.com", "phone":null} \N \N +15 EElice {"age":129, "city":"ecity", "email":null, "phone":null} \N \N +21 Charlie {"age":218, "city":"San Francisco", "email":"asdacharlie@example.com", "phone":123} \N \N +22 Charlie {"age":228, "city":"San-Francisco", "email":"ssscharlie@example.com", "phone":1234} \N \N +23 Charlie {"age":238, "city":"SanxFrancisco", "email":"333charlie@example.com", "phone":12345} \N \N +24 Charlie {"age":248, "city":"San888Francisco", "email":"777charlie@example.com", "phone":123456} \N \N +25 Charlie {"age":258, "city":"San0000Francisco", "email":"9999chasasrlie@example.com", "phone":null} \N \N +31 Alice {"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 0 \N +32 Bob {"age":30, "city":"Los Angeles", "email":"bob@example.com", "phone":789012} 0 \N +33 Charlie {"age":28, "city":"San Francisco", "email":"charlie@example.com", "phone":456789} 1 \N +34 David {"age":32, "city":"Chicago", "email":"david@example.com", "phone":987654} 0 \N +35 Eve {"age":27, "city":"Seattle", "email":"eve@example.com", "phone":null} \N \N +41 Alice {"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 1 [{"a":1, "b":null}, {"a":1, "b":null}] +42 Bob {"age":30, "city":"Los Angeles", "email":"bob@example.com", "phone":789012} 1 [{"a":2, "b":null}, {"a":1, "b":null}] +43 Charlie {"age":28, "city":"San Francisco", "email":"charlie@example.com", "phone":456789} 2 [{"a":3, "b":null}, {"a":1, "b":null}] +44 David {"age":32, "city":"Chicago", "email":"david@example.com", "phone":987654} 1 [{"a":4, "b":null}, {"a":1, "b":null}] +45 Eve {"age":27, "city":"Seattle", "email":"eve@example.com", "phone":654321} 2 [{"a":5, "b":null}, {"a":1, "b":null}] +51 Alice {"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 1 [{"a":1, "b":{"aa":"foo", "bb":100}}, {"a":1, "b":{"aa":"foo", "bb":100}}] +52 Bob {"age":30, "city":"Los Angeles", "email":"bob@example.com", "phone":789012} 2 [{"a":2, "b":{"aa":"bar", "bb":200}}] +53 Charlie {"age":28, "city":"San Francisco", "email":"charlie@example.com", "phone":456789} 1 [{"a":3, "b":{"aa":"baz", "bb":300}}] +54 David {"age":32, "city":"Chicago", "email":"david@example.com", "phone":987654} 2 [{"a":8, "b":{"aa":"qux", "bb":400}}] +55 Eve {"age":27, "city":"Seattle", "email":"eve@example.com", "phone":654321} 1 [{"a":5, "b":{"aa":"abcd", "bb":500}}, {"a":5, "b":{"aa":"abcdffff", "bb":5000}}, {"a":5, "b":{"aa":"abcdtttt", "bb":500000}}] + +-- !test_2 -- +1 Alice {"age":25, "city":"New York", "email":null, "phone":null} \N \N + +-- !test_3 -- +1 Alice {"age":25, "city":"New York", "email":null, "phone":null} \N \N +2 Blice {"age":26, "city":"New York New York", "email":null, "phone":null} \N \N +3 Clice {"age":27, "city":"New York New York New York", "email":null, "phone":null} \N \N +4 Dlice {"age":28, "city":"New York New York New York New York", "email":null, "phone":null} \N \N +5 Elice {"age":29, "city":"New York New York New York New York New York", "email":null, "phone":null} \N \N +11 AAlice {"age":125, "city":"acity", "email":"alice@example.com", "phone":null} \N \N +12 BBlice {"age":126, "city":"bcity", "email":"bob@example.com", "phone":null} \N \N +13 CClice {"age":127, "city":"ccity", "email":"alicebob@example.com", "phone":null} \N \N +14 DDlice {"age":128, "city":"dcity", "email":"xxxxxbob@example.com", "phone":null} \N \N +15 EElice {"age":129, "city":"ecity", "email":null, "phone":null} \N \N +21 Charlie {"age":218, "city":"San Francisco", "email":"asdacharlie@example.com", "phone":123} \N \N +22 Charlie {"age":228, "city":"San-Francisco", "email":"ssscharlie@example.com", "phone":1234} \N \N +23 Charlie {"age":238, "city":"SanxFrancisco", "email":"333charlie@example.com", "phone":12345} \N \N +24 Charlie {"age":248, "city":"San888Francisco", "email":"777charlie@example.com", "phone":123456} \N \N +25 Charlie {"age":258, "city":"San0000Francisco", "email":"9999chasasrlie@example.com", "phone":null} \N \N +31 Alice {"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 0 \N +32 Bob {"age":30, "city":"Los Angeles", "email":"bob@example.com", "phone":789012} 0 \N +33 Charlie {"age":28, "city":"San Francisco", "email":"charlie@example.com", "phone":456789} 1 \N +34 David {"age":32, "city":"Chicago", "email":"david@example.com", "phone":987654} 0 \N +35 Eve {"age":27, "city":"Seattle", "email":"eve@example.com", "phone":null} \N \N + +-- !test_4 -- +41 Alice {"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 1 [{"a":1, "b":null}, {"a":1, "b":null}] +42 Bob {"age":30, "city":"Los Angeles", "email":"bob@example.com", "phone":789012} 1 [{"a":2, "b":null}, {"a":1, "b":null}] +43 Charlie {"age":28, "city":"San Francisco", "email":"charlie@example.com", "phone":456789} 2 [{"a":3, "b":null}, {"a":1, "b":null}] +44 David {"age":32, "city":"Chicago", "email":"david@example.com", "phone":987654} 1 [{"a":4, "b":null}, {"a":1, "b":null}] +45 Eve {"age":27, "city":"Seattle", "email":"eve@example.com", "phone":654321} 2 [{"a":5, "b":null}, {"a":1, "b":null}] +51 Alice {"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 1 [{"a":1, "b":{"aa":"foo", "bb":100}}, {"a":1, "b":{"aa":"foo", "bb":100}}] +52 Bob {"age":30, "city":"Los Angeles", "email":"bob@example.com", "phone":789012} 2 [{"a":2, "b":{"aa":"bar", "bb":200}}] +53 Charlie {"age":28, "city":"San Francisco", "email":"charlie@example.com", "phone":456789} 1 [{"a":3, "b":{"aa":"baz", "bb":300}}] +54 David {"age":32, "city":"Chicago", "email":"david@example.com", "phone":987654} 2 [{"a":8, "b":{"aa":"qux", "bb":400}}] +55 Eve {"age":27, "city":"Seattle", "email":"eve@example.com", "phone":654321} 1 [{"a":5, "b":{"aa":"abcd", "bb":500}}, {"a":5, "b":{"aa":"abcdffff", "bb":5000}}, {"a":5, "b":{"aa":"abcdtttt", "bb":500000}}] + +-- !test_5 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N + +-- !test_6 -- +[{"a":1, "b":null}, {"a":1, "b":null}] +[{"a":2, "b":null}, {"a":1, "b":null}] +[{"a":3, "b":null}, {"a":1, "b":null}] +[{"a":4, "b":null}, {"a":1, "b":null}] +[{"a":5, "b":null}, {"a":1, "b":null}] +[{"a":1, "b":{"aa":"foo", "bb":100}}, {"a":1, "b":{"aa":"foo", "bb":100}}] +[{"a":2, "b":{"aa":"bar", "bb":200}}] +[{"a":3, "b":{"aa":"baz", "bb":300}}] +[{"a":8, "b":{"aa":"qux", "bb":400}}] +[{"a":5, "b":{"aa":"abcd", "bb":500}}, {"a":5, "b":{"aa":"abcdffff", "bb":5000}}, {"a":5, "b":{"aa":"abcdtttt", "bb":500000}}] + +-- !test_7 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N + +-- !test_8 -- +[{"a":1, "b":null}, {"a":1, "b":null}] +[{"a":2, "b":null}, {"a":1, "b":null}] +[{"a":3, "b":null}, {"a":1, "b":null}] +[{"a":4, "b":null}, {"a":1, "b":null}] +[{"a":5, "b":null}, {"a":1, "b":null}] +[{"a":1, "b":{"aa":"foo", "bb":100}}, {"a":1, "b":{"aa":"foo", "bb":100}}] +[{"a":2, "b":{"aa":"bar", "bb":200}}] +[{"a":3, "b":{"aa":"baz", "bb":300}}] +[{"a":8, "b":{"aa":"qux", "bb":400}}] +[{"a":5, "b":{"aa":"abcd", "bb":500}}, {"a":5, "b":{"aa":"abcdffff", "bb":5000}}, {"a":5, "b":{"aa":"abcdtttt", "bb":500000}}] + +-- !test_9 -- +0 +0 +0 + +-- !test_10 -- +1 +1 +1 +1 +1 +1 +1 + +-- !test_11 -- +2 +2 +2 +2 + +-- !test_12 -- +43 Charlie {"age":28, "city":"San Francisco", "email":"charlie@example.com", "phone":456789} 2 [{"a":3, "b":null}, {"a":1, "b":null}] +45 Eve {"age":27, "city":"Seattle", "email":"eve@example.com", "phone":654321} 2 [{"a":5, "b":null}, {"a":1, "b":null}] +52 Bob {"age":30, "city":"Los Angeles", "email":"bob@example.com", "phone":789012} 2 [{"a":2, "b":{"aa":"bar", "bb":200}}] +54 David {"age":32, "city":"Chicago", "email":"david@example.com", "phone":987654} 2 [{"a":8, "b":{"aa":"qux", "bb":400}}] + +-- !test_13 -- + +-- !test_14 -- +53 Charlie {"age":28, "city":"San Francisco", "email":"charlie@example.com", "phone":456789} 1 [{"a":3, "b":{"aa":"baz", "bb":300}}] +54 David {"age":32, "city":"Chicago", "email":"david@example.com", "phone":987654} 2 [{"a":8, "b":{"aa":"qux", "bb":400}}] + +-- !test_15 -- +41 Alice {"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 1 [{"a":1, "b":null}, {"a":1, "b":null}] {"a":1, "b":null} +51 Alice {"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 1 [{"a":1, "b":{"aa":"foo", "bb":100}}, {"a":1, "b":{"aa":"foo", "bb":100}}] {"a":1, "b":{"aa":"foo", "bb":100}} + +-- !test_16 -- +[{"a":2, "b":null}, {"a":1, "b":null}] + +-- !test_17 -- +{"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} +{"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} +{"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} + +-- !test_18 -- +{"age":25, "city":"New York", "email":null, "phone":null} + +-- !test_19 -- +{"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 31 +{"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 41 +{"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 51 + +-- !test_20 -- +{"age":25, "city":"New York", "email":null, "phone":null} 1 + +-- !test_21 -- +0 3 +2 4 +1 7 +\N 16 + +-- !desc -- +id int Yes true \N +name text Yes true \N +details struct Yes true \N +sex int Yes true \N +complex array>> Yes true \N + +-- !test_1 -- +1 Alice {"age":25, "city":"New York", "email":null, "phone":null} \N \N +2 Blice {"age":26, "city":"New York New York", "email":null, "phone":null} \N \N +3 Clice {"age":27, "city":"New York New York New York", "email":null, "phone":null} \N \N +4 Dlice {"age":28, "city":"New York New York New York New York", "email":null, "phone":null} \N \N +5 Elice {"age":29, "city":"New York New York New York New York New York", "email":null, "phone":null} \N \N +11 AAlice {"age":125, "city":"acity", "email":"alice@example.com", "phone":null} \N \N +12 BBlice {"age":126, "city":"bcity", "email":"bob@example.com", "phone":null} \N \N +13 CClice {"age":127, "city":"ccity", "email":"alicebob@example.com", "phone":null} \N \N +14 DDlice {"age":128, "city":"dcity", "email":"xxxxxbob@example.com", "phone":null} \N \N +15 EElice {"age":129, "city":"ecity", "email":null, "phone":null} \N \N +21 Charlie {"age":218, "city":"San Francisco", "email":"asdacharlie@example.com", "phone":123} \N \N +22 Charlie {"age":228, "city":"San-Francisco", "email":"ssscharlie@example.com", "phone":1234} \N \N +23 Charlie {"age":238, "city":"SanxFrancisco", "email":"333charlie@example.com", "phone":12345} \N \N +24 Charlie {"age":248, "city":"San888Francisco", "email":"777charlie@example.com", "phone":123456} \N \N +25 Charlie {"age":258, "city":"San0000Francisco", "email":"9999chasasrlie@example.com", "phone":null} \N \N +31 Alice {"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 0 \N +32 Bob {"age":30, "city":"Los Angeles", "email":"bob@example.com", "phone":789012} 0 \N +33 Charlie {"age":28, "city":"San Francisco", "email":"charlie@example.com", "phone":456789} 1 \N +34 David {"age":32, "city":"Chicago", "email":"david@example.com", "phone":987654} 0 \N +35 Eve {"age":27, "city":"Seattle", "email":"eve@example.com", "phone":null} \N \N +41 Alice {"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 1 [{"a":1, "b":null}, {"a":1, "b":null}] +42 Bob {"age":30, "city":"Los Angeles", "email":"bob@example.com", "phone":789012} 1 [{"a":2, "b":null}, {"a":1, "b":null}] +43 Charlie {"age":28, "city":"San Francisco", "email":"charlie@example.com", "phone":456789} 2 [{"a":3, "b":null}, {"a":1, "b":null}] +44 David {"age":32, "city":"Chicago", "email":"david@example.com", "phone":987654} 1 [{"a":4, "b":null}, {"a":1, "b":null}] +45 Eve {"age":27, "city":"Seattle", "email":"eve@example.com", "phone":654321} 2 [{"a":5, "b":null}, {"a":1, "b":null}] +51 Alice {"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 1 [{"a":1, "b":{"aa":"foo", "bb":100}}, {"a":1, "b":{"aa":"foo", "bb":100}}] +52 Bob {"age":30, "city":"Los Angeles", "email":"bob@example.com", "phone":789012} 2 [{"a":2, "b":{"aa":"bar", "bb":200}}] +53 Charlie {"age":28, "city":"San Francisco", "email":"charlie@example.com", "phone":456789} 1 [{"a":3, "b":{"aa":"baz", "bb":300}}] +54 David {"age":32, "city":"Chicago", "email":"david@example.com", "phone":987654} 2 [{"a":8, "b":{"aa":"qux", "bb":400}}] +55 Eve {"age":27, "city":"Seattle", "email":"eve@example.com", "phone":654321} 1 [{"a":5, "b":{"aa":"abcd", "bb":500}}, {"a":5, "b":{"aa":"abcdffff", "bb":5000}}, {"a":5, "b":{"aa":"abcdtttt", "bb":500000}}] + +-- !test_2 -- +1 Alice {"age":25, "city":"New York", "email":null, "phone":null} \N \N + +-- !test_3 -- +1 Alice {"age":25, "city":"New York", "email":null, "phone":null} \N \N +2 Blice {"age":26, "city":"New York New York", "email":null, "phone":null} \N \N +3 Clice {"age":27, "city":"New York New York New York", "email":null, "phone":null} \N \N +4 Dlice {"age":28, "city":"New York New York New York New York", "email":null, "phone":null} \N \N +5 Elice {"age":29, "city":"New York New York New York New York New York", "email":null, "phone":null} \N \N +11 AAlice {"age":125, "city":"acity", "email":"alice@example.com", "phone":null} \N \N +12 BBlice {"age":126, "city":"bcity", "email":"bob@example.com", "phone":null} \N \N +13 CClice {"age":127, "city":"ccity", "email":"alicebob@example.com", "phone":null} \N \N +14 DDlice {"age":128, "city":"dcity", "email":"xxxxxbob@example.com", "phone":null} \N \N +15 EElice {"age":129, "city":"ecity", "email":null, "phone":null} \N \N +21 Charlie {"age":218, "city":"San Francisco", "email":"asdacharlie@example.com", "phone":123} \N \N +22 Charlie {"age":228, "city":"San-Francisco", "email":"ssscharlie@example.com", "phone":1234} \N \N +23 Charlie {"age":238, "city":"SanxFrancisco", "email":"333charlie@example.com", "phone":12345} \N \N +24 Charlie {"age":248, "city":"San888Francisco", "email":"777charlie@example.com", "phone":123456} \N \N +25 Charlie {"age":258, "city":"San0000Francisco", "email":"9999chasasrlie@example.com", "phone":null} \N \N +31 Alice {"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 0 \N +32 Bob {"age":30, "city":"Los Angeles", "email":"bob@example.com", "phone":789012} 0 \N +33 Charlie {"age":28, "city":"San Francisco", "email":"charlie@example.com", "phone":456789} 1 \N +34 David {"age":32, "city":"Chicago", "email":"david@example.com", "phone":987654} 0 \N +35 Eve {"age":27, "city":"Seattle", "email":"eve@example.com", "phone":null} \N \N + +-- !test_4 -- +41 Alice {"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 1 [{"a":1, "b":null}, {"a":1, "b":null}] +42 Bob {"age":30, "city":"Los Angeles", "email":"bob@example.com", "phone":789012} 1 [{"a":2, "b":null}, {"a":1, "b":null}] +43 Charlie {"age":28, "city":"San Francisco", "email":"charlie@example.com", "phone":456789} 2 [{"a":3, "b":null}, {"a":1, "b":null}] +44 David {"age":32, "city":"Chicago", "email":"david@example.com", "phone":987654} 1 [{"a":4, "b":null}, {"a":1, "b":null}] +45 Eve {"age":27, "city":"Seattle", "email":"eve@example.com", "phone":654321} 2 [{"a":5, "b":null}, {"a":1, "b":null}] +51 Alice {"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 1 [{"a":1, "b":{"aa":"foo", "bb":100}}, {"a":1, "b":{"aa":"foo", "bb":100}}] +52 Bob {"age":30, "city":"Los Angeles", "email":"bob@example.com", "phone":789012} 2 [{"a":2, "b":{"aa":"bar", "bb":200}}] +53 Charlie {"age":28, "city":"San Francisco", "email":"charlie@example.com", "phone":456789} 1 [{"a":3, "b":{"aa":"baz", "bb":300}}] +54 David {"age":32, "city":"Chicago", "email":"david@example.com", "phone":987654} 2 [{"a":8, "b":{"aa":"qux", "bb":400}}] +55 Eve {"age":27, "city":"Seattle", "email":"eve@example.com", "phone":654321} 1 [{"a":5, "b":{"aa":"abcd", "bb":500}}, {"a":5, "b":{"aa":"abcdffff", "bb":5000}}, {"a":5, "b":{"aa":"abcdtttt", "bb":500000}}] + +-- !test_5 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N + +-- !test_6 -- +[{"a":1, "b":null}, {"a":1, "b":null}] +[{"a":2, "b":null}, {"a":1, "b":null}] +[{"a":3, "b":null}, {"a":1, "b":null}] +[{"a":4, "b":null}, {"a":1, "b":null}] +[{"a":5, "b":null}, {"a":1, "b":null}] +[{"a":1, "b":{"aa":"foo", "bb":100}}, {"a":1, "b":{"aa":"foo", "bb":100}}] +[{"a":2, "b":{"aa":"bar", "bb":200}}] +[{"a":3, "b":{"aa":"baz", "bb":300}}] +[{"a":8, "b":{"aa":"qux", "bb":400}}] +[{"a":5, "b":{"aa":"abcd", "bb":500}}, {"a":5, "b":{"aa":"abcdffff", "bb":5000}}, {"a":5, "b":{"aa":"abcdtttt", "bb":500000}}] + +-- !test_7 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N + +-- !test_8 -- +[{"a":1, "b":null}, {"a":1, "b":null}] +[{"a":2, "b":null}, {"a":1, "b":null}] +[{"a":3, "b":null}, {"a":1, "b":null}] +[{"a":4, "b":null}, {"a":1, "b":null}] +[{"a":5, "b":null}, {"a":1, "b":null}] +[{"a":1, "b":{"aa":"foo", "bb":100}}, {"a":1, "b":{"aa":"foo", "bb":100}}] +[{"a":2, "b":{"aa":"bar", "bb":200}}] +[{"a":3, "b":{"aa":"baz", "bb":300}}] +[{"a":8, "b":{"aa":"qux", "bb":400}}] +[{"a":5, "b":{"aa":"abcd", "bb":500}}, {"a":5, "b":{"aa":"abcdffff", "bb":5000}}, {"a":5, "b":{"aa":"abcdtttt", "bb":500000}}] + +-- !test_9 -- +0 +0 +0 + +-- !test_10 -- +1 +1 +1 +1 +1 +1 +1 + +-- !test_11 -- +2 +2 +2 +2 + +-- !test_12 -- +43 Charlie {"age":28, "city":"San Francisco", "email":"charlie@example.com", "phone":456789} 2 [{"a":3, "b":null}, {"a":1, "b":null}] +45 Eve {"age":27, "city":"Seattle", "email":"eve@example.com", "phone":654321} 2 [{"a":5, "b":null}, {"a":1, "b":null}] +52 Bob {"age":30, "city":"Los Angeles", "email":"bob@example.com", "phone":789012} 2 [{"a":2, "b":{"aa":"bar", "bb":200}}] +54 David {"age":32, "city":"Chicago", "email":"david@example.com", "phone":987654} 2 [{"a":8, "b":{"aa":"qux", "bb":400}}] + +-- !test_13 -- + +-- !test_14 -- +53 Charlie {"age":28, "city":"San Francisco", "email":"charlie@example.com", "phone":456789} 1 [{"a":3, "b":{"aa":"baz", "bb":300}}] +54 David {"age":32, "city":"Chicago", "email":"david@example.com", "phone":987654} 2 [{"a":8, "b":{"aa":"qux", "bb":400}}] + +-- !test_15 -- +41 Alice {"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 1 [{"a":1, "b":null}, {"a":1, "b":null}] {"a":1, "b":null} +51 Alice {"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 1 [{"a":1, "b":{"aa":"foo", "bb":100}}, {"a":1, "b":{"aa":"foo", "bb":100}}] {"a":1, "b":{"aa":"foo", "bb":100}} + +-- !test_16 -- +[{"a":2, "b":null}, {"a":1, "b":null}] + +-- !test_17 -- +{"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} +{"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} +{"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} + +-- !test_18 -- +{"age":25, "city":"New York", "email":null, "phone":null} + +-- !test_19 -- +{"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 31 +{"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 41 +{"age":25, "city":"New York", "email":"alice@example.com", "phone":123456} 51 + +-- !test_20 -- +{"age":25, "city":"New York", "email":null, "phone":null} 1 + +-- !test_21 -- +0 3 +2 4 +1 7 +\N 16 + diff --git a/regression-test/suites/external_table_p0/hive/test_hive_struct_add_column.groovy b/regression-test/suites/external_table_p0/hive/test_hive_struct_add_column.groovy new file mode 100644 index 00000000000000..143a98195d12fc --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_hive_struct_add_column.groovy @@ -0,0 +1,169 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + + +suite("test_hive_struct_add_column", "all_types,p0,external,hive,external_docker,external_docker_hive") { + + + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String hivePrefix ="hive3"; + setHivePrefix(hivePrefix) + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hmsPort = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String hdfs_port = context.config.otherConfigs.get(hivePrefix + "HdfsPort") + + String catalog_name = "test_hive_struct_add_column" + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hadoop.username' = 'hadoop', + 'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hmsPort}' + ); + """ + + sql """use `${catalog_name}`.`default`""" + + qt_desc """ desc test_hive_struct_add_column_orc;""" + qt_test_1 """ select * from test_hive_struct_add_column_orc order by id;""" + qt_test_2 """ select * from test_hive_struct_add_column_orc where id = 1 order by id;""" + qt_test_3 """ select * from test_hive_struct_add_column_orc where complex is null order by id;""" + qt_test_4 """ select * from test_hive_struct_add_column_orc where complex is not null order by id""" + qt_test_5 """ select complex from test_hive_struct_add_column_orc where complex is null order by id """ + qt_test_6 """ select complex from test_hive_struct_add_column_orc where complex is not null order by id """ + qt_test_7 """select complex from test_hive_struct_add_column_orc where complex is null order by id; """ + qt_test_8 """select complex from test_hive_struct_add_column_orc where complex is not null order by id;""" + qt_test_9 """select sex from test_hive_struct_add_column_orc where sex = 0 order by id;""" + qt_test_10 """select sex from test_hive_struct_add_column_orc where sex = 1 order by id;""" + qt_test_11 """select sex from test_hive_struct_add_column_orc where sex = 2 order by id;""" + qt_test_12 """select * from test_hive_struct_add_column_orc where sex = 2 order by id; """ + qt_test_13 """select * from test_hive_struct_add_column_orc where id =sex order by id;""" + qt_test_14 """select * from test_hive_struct_add_column_orc where id -52=sex order by id;""" + qt_test_15 """select *,complex[1] from test_hive_struct_add_column_orc where struct_element(complex[1],1) = 1 order by id;""" + qt_test_16 """ select complex from test_hive_struct_add_column_orc where struct_element(complex[1],1) = 2 and struct_element(complex[1],2) is null order by id ; """ + qt_test_17 """select details from test_hive_struct_add_column_orc where struct_element(details,1) = 25 and struct_element(details,4) is not null order by id;""" + qt_test_18 """select details from test_hive_struct_add_column_orc where struct_element(details,1) = 25 and struct_element(details,4) is null order by id;""" + qt_test_19 """ select details,id from test_hive_struct_add_column_orc where struct_element(details,1) = 25 and struct_element(details,4) is not null order by id ;""" + qt_test_20 """ select details,id from test_hive_struct_add_column_orc where struct_element(details,1) = 25 and struct_element(details,4) is null order by id;""" + qt_test_21 """ select sex,count(*) from test_hive_struct_add_column_orc group by sex order by count(*);""" + + + + qt_desc """ desc test_hive_struct_add_column_parquet;""" + qt_test_1 """ select * from test_hive_struct_add_column_parquet order by id;""" + qt_test_2 """ select * from test_hive_struct_add_column_parquet where id = 1 order by id;""" + qt_test_3 """ select * from test_hive_struct_add_column_parquet where complex is null order by id;""" + qt_test_4 """ select * from test_hive_struct_add_column_parquet where complex is not null order by id""" + qt_test_5 """ select complex from test_hive_struct_add_column_parquet where complex is null order by id """ + qt_test_6 """ select complex from test_hive_struct_add_column_parquet where complex is not null order by id """ + qt_test_7 """select complex from test_hive_struct_add_column_parquet where complex is null order by id; """ + qt_test_8 """select complex from test_hive_struct_add_column_parquet where complex is not null order by id;""" + qt_test_9 """select sex from test_hive_struct_add_column_parquet where sex = 0 order by id;""" + qt_test_10 """select sex from test_hive_struct_add_column_parquet where sex = 1 order by id;""" + qt_test_11 """select sex from test_hive_struct_add_column_parquet where sex = 2 order by id;""" + qt_test_12 """select * from test_hive_struct_add_column_parquet where sex = 2 order by id; """ + qt_test_13 """select * from test_hive_struct_add_column_parquet where id =sex order by id;""" + qt_test_14 """select * from test_hive_struct_add_column_parquet where id -52=sex order by id;""" + qt_test_15 """select *,complex[1] from test_hive_struct_add_column_parquet where struct_element(complex[1],1) = 1 order by id;""" + qt_test_16 """ select complex from test_hive_struct_add_column_parquet where struct_element(complex[1],1) = 2 and struct_element(complex[1],2) is null order by id ; """ + qt_test_17 """select details from test_hive_struct_add_column_parquet where struct_element(details,1) = 25 and struct_element(details,4) is not null order by id;""" + qt_test_18 """select details from test_hive_struct_add_column_parquet where struct_element(details,1) = 25 and struct_element(details,4) is null order by id;""" + qt_test_19 """ select details,id from test_hive_struct_add_column_parquet where struct_element(details,1) = 25 and struct_element(details,4) is not null order by id ;""" + qt_test_20 """ select details,id from test_hive_struct_add_column_parquet where struct_element(details,1) = 25 and struct_element(details,4) is null order by id;""" + qt_test_21 """ select sex,count(*) from test_hive_struct_add_column_parquet group by sex order by count(*);""" + + + + sql """drop catalog if exists ${catalog_name}""" + } +} + +/* +drop table user_info_orc; +CREATE TABLE user_info_orc ( + id INT, + name STRING, + details STRUCT +) +stored as orc; +INSERT INTO TABLE user_info_orc +VALUES + (1, 'Alice', named_struct('age', 25, 'city', 'New York')), + (2, 'Blice', named_struct('age', 26, 'city', 'New York New York')), + (3, 'Clice', named_struct('age', 27, 'city', 'New York New York New York')), + (4, 'Dlice', named_struct('age', 28, 'city', 'New York New York New York New York')), + (5, 'Elice', named_struct('age', 29, 'city', 'New York New York New York New York New York')); +ALTER TABLE user_info_orc CHANGE COLUMN details details STRUCT; +INSERT INTO TABLE user_info_orc +VALUES + (11, 'AAlice', named_struct('age', 125, 'city', 'acity', 'email', 'alice@example.com')), + (12, 'BBlice', named_struct('age', 126, 'city', 'bcity', 'email', 'bob@example.com')), + (13, 'CClice', named_struct('age', 127, 'city', 'ccity', 'email', 'alicebob@example.com')), + (14, 'DDlice', named_struct('age', 128, 'city', 'dcity', 'email', 'xxxxxbob@example.com')), + (15, 'EElice', named_struct('age', 129, 'city', 'ecity', 'email', NULL)); +ALTER TABLE user_info_orc CHANGE COLUMN details details STRUCT; +INSERT INTO user_info_orc +VALUES + (21, 'Charlie', named_struct('age', 218, 'city', 'San Francisco', 'email', 'asdacharlie@example.com','phone',123)), + (22, 'Charlie', named_struct('age', 228, 'city', 'San-Francisco', 'email', 'ssscharlie@example.com','phone',1234)), + (23, 'Charlie', named_struct('age', 238, 'city', 'SanxFrancisco', 'email', '333charlie@example.com','phone',12345)), + (24, 'Charlie', named_struct('age', 248, 'city', 'San888Francisco', 'email', '777charlie@example.com','phone',123456)), + (25, 'Charlie', named_struct('age', 258, 'city', 'San0000Francisco', 'email', '9999chasasrlie@example.com','phone',NULL)); +desc user_info_orc; +ALTER TABLE user_info_orc add columns (sex int); +INSERT INTO TABLE user_info_orc +VALUES + (31, 'Alice', named_struct('age', 25, 'city', 'New York', 'email', 'alice@example.com', 'phone', 123456),0), + (32, 'Bob', named_struct('age', 30, 'city', 'Los Angeles', 'email', 'bob@example.com', 'phone', 789012),0), + (33, 'Charlie', named_struct('age', 28, 'city', 'San Francisco', 'email', 'charlie@example.com', 'phone', 456789),1), + (34, 'David', named_struct('age', 32, 'city', 'Chicago', 'email', 'david@example.com', 'phone', 987654),0), + (35, 'Eve', named_struct('age', 27, 'city', 'Seattle', 'email', 'eve@example.com', 'phone', NULL),NULL); +ALTER TABLE user_info_orc add columns (complex array>); +INSERT INTO TABLE user_info_orc +VALUES + (41,'Alice', named_struct('age', 25, 'city', 'New York', 'email', 'alice@example.com', 'phone', 123456), 1, array(named_struct('a', 1),named_struct('a', 1))), + (42,'Bob', named_struct('age', 30, 'city', 'Los Angeles', 'email', 'bob@example.com', 'phone', 789012), 1, array(named_struct('a', 2),named_struct('a', 1))), + (43,'Charlie', named_struct('age', 28, 'city', 'San Francisco', 'email', 'charlie@example.com', 'phone', 456789), 2, array(named_struct('a', 3),named_struct('a', 1))), + (44,'David', named_struct('age', 32, 'city', 'Chicago', 'email', 'david@example.com', 'phone', 987654), 1, array(named_struct('a', 4),named_struct('a', 1))), + (45,'Eve', named_struct('age', 27, 'city', 'Seattle', 'email', 'eve@example.com', 'phone', 654321), 2, array(named_struct('a', 5),named_struct('a', 1))); + +ALTER TABLE user_info_orc CHANGE COLUMN complex complex array>>; +INSERT INTO TABLE user_info_orc +VALUES + (51, 'Alice', named_struct('age', 25, 'city', 'New York', 'email', 'alice@example.com', 'phone', 123456), 1, array(named_struct('a', 1, 'b', named_struct('aa', 'foo', 'bb', 100)),named_struct('a', 1, 'b', named_struct('aa', 'foo', 'bb', 100)))), + (52, 'Bob', named_struct('age', 30, 'city', 'Los Angeles', 'email', 'bob@example.com', 'phone', 789012), 2, array(named_struct('a', 2, 'b', named_struct('aa', 'bar', 'bb', 200)))), + (53, 'Charlie', named_struct('age', 28, 'city', 'San Francisco', 'email', 'charlie@example.com', 'phone', 456789), 1, array(named_struct('a', 3, 'b', named_struct('aa', 'baz', 'bb', 300)))), + (54, 'David', named_struct('age', 32, 'city', 'Chicago', 'email', 'david@example.com', 'phone', 987654), 2, array(named_struct('a', 8, 'b', named_struct('aa', 'qux', 'bb', 400)))), + (55, 'Eve', named_struct('age', 27, 'city', 'Seattle', 'email', 'eve@example.com', 'phone', 654321), 1, array(named_struct('a', 5, 'b', named_struct('aa', 'abcd', 'bb', 500)),named_struct('a', 5, 'b', named_struct('aa', 'abcdffff', 'bb', 5000)),named_struct('a', 5, 'b', named_struct('aa', 'abcdtttt', 'bb', 500000)))); + + +cp user_info_orc/ => test_hive_struct_add_column_orc/ + +create table test_hive_struct_add_column_orc ( + `id` int, + `name` string, + `details` struct, + `sex` int, + `complex` array>> +) +STORED AS ORC; +LOCATION '/user/doris/preinstalled_data/orc_table/test_hive_struct_add_column_orc'; + +*/ \ No newline at end of file