From 397389deda8c1c832651270f6c284a1409d0af44 Mon Sep 17 00:00:00 2001 From: slothever Date: Thu, 22 Aug 2024 18:55:23 +0800 Subject: [PATCH] [fix](multi-catalog)fix max compute array and map type read offset (#39680) fix for https://github.com/apache/doris/pull/39259 (cherry picked from commit ab56888d5b4e7dc11d030fd177e8647cb47fb045) --- .../maxcompute/MaxComputeColumnValue.java | 47 ++++- .../mc/test_max_compute_complex_type.out | 17 ++ .../mc/test_max_compute_complex_type.groovy | 175 ++++++++++++++++++ 3 files changed, 234 insertions(+), 5 deletions(-) create mode 100644 regression-test/data/external_table_p0/mc/test_max_compute_complex_type.out create mode 100644 regression-test/suites/external_table_p0/mc/test_max_compute_complex_type.groovy diff --git a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java index 65810163840e34..644caf80d97d0a 100644 --- a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java +++ b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java @@ -32,8 +32,12 @@ import org.apache.arrow.vector.SmallIntVector; import org.apache.arrow.vector.TimeStampNanoVector; import org.apache.arrow.vector.TinyIntVector; +import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.StructVector; import org.apache.log4j.Logger; import java.math.BigDecimal; @@ -49,15 +53,22 @@ public class MaxComputeColumnValue implements ColumnValue { private static final Logger LOG = Logger.getLogger(MaxComputeColumnValue.class); private int idx; - private FieldVector column; + private int offset = 0; // for complex type + private ValueVector column; public MaxComputeColumnValue() { idx = 0; } - public void reset(FieldVector column) { + public MaxComputeColumnValue(ValueVector valueVector, int i) { + this.column = valueVector; + this.idx = i; + } + + public void reset(ValueVector column) { this.column = column; this.idx = 0; + this.offset = 0; } @Override @@ -222,16 +233,42 @@ public byte[] getBytes() { @Override public void unpackArray(List values) { - + skippedIfNull(); + ListVector listCol = (ListVector) column; + int elemSize = listCol.getObject(idx).size(); + for (int i = 0; i < elemSize; i++) { + MaxComputeColumnValue val = new MaxComputeColumnValue(listCol.getDataVector(), offset); + values.add(val); + offset++; + } + idx++; } @Override public void unpackMap(List keys, List values) { - + skippedIfNull(); + MapVector mapCol = (MapVector) column; + int elemSize = mapCol.getObject(idx).size(); + FieldVector keyList = mapCol.getDataVector().getChildrenFromFields().get(0); + FieldVector valList = mapCol.getDataVector().getChildrenFromFields().get(1); + for (int i = 0; i < elemSize; i++) { + MaxComputeColumnValue key = new MaxComputeColumnValue(keyList, offset); + keys.add(key); + MaxComputeColumnValue val = new MaxComputeColumnValue(valList, offset); + values.add(val); + offset++; + } + idx++; } @Override public void unpackStruct(List structFieldIndex, List values) { - + skippedIfNull(); + StructVector structCol = (StructVector) column; + for (Integer fieldIndex : structFieldIndex) { + MaxComputeColumnValue val = new MaxComputeColumnValue(structCol.getChildByOrdinal(fieldIndex), idx); + values.add(val); + } + idx++; } } diff --git a/regression-test/data/external_table_p0/mc/test_max_compute_complex_type.out b/regression-test/data/external_table_p0/mc/test_max_compute_complex_type.out new file mode 100644 index 00000000000000..86df55f16c56e0 --- /dev/null +++ b/regression-test/data/external_table_p0/mc/test_max_compute_complex_type.out @@ -0,0 +1,17 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !mc_q1 -- +3 [1.3] [1, 2, 3] ["2023-05-23 05:55:12.000"] ["a", "b", "c"] +2 [1.2, 1.3] [1, 2, 3] ["2023-05-23 05:55:12.000"] ["a", "b", "c"] +1 [1.2, 1.3] [1, 2, 3] ["2023-05-23 05:55:12.000"] ["a", "b", "c"] + +-- !mc_q2 -- +{1:"example1", 2:"example2"} {1:2.5, 2:3.75} +{349:"asd", 324:"uid"} {3:2.5, 99:3.75} + +-- !mc_q3 -- +{"phone_number":123450, "email":"user1@example.com", "addr":"Addr1"} {"id":"user1", "age":25} +{"phone_number":2345671, "email":"user2@example.com", "addr":"Addr2"} {"id":"user2", "age":30} + +-- !mc_q4 -- +user1 [{"activity_date":"2024-08-01", "activities":{"cooking":{"details":"Made vegan meal", "metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched action movie", "metrics":{"time_spent":1.5, "calories":500}}}}, {"activity_date":"2024-08-02", "activities":{"cooking":{"details":"Made vegan meal", "metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched action movie", "metrics":{"time_spent":1.5, "calories":500}}}}] +user2 [{"activity_date":"2024-08-01", "activities":{"cooking":{"details":"Made vegan meal", "metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched action movie", "metrics":{"time_spent":1.5, "calories":500}}}}, {"activity_date":"2024-08-02", "activities":{"cooking":{"details":"Made vegan meal", "metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched action movie", "metrics":{"time_spent":1.5, "calories":500}}}}] diff --git a/regression-test/suites/external_table_p0/mc/test_max_compute_complex_type.groovy b/regression-test/suites/external_table_p0/mc/test_max_compute_complex_type.groovy new file mode 100644 index 00000000000000..a3de3715f91fc4 --- /dev/null +++ b/regression-test/suites/external_table_p0/mc/test_max_compute_complex_type.groovy @@ -0,0 +1,175 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/* + // Test Case DDL + create table array_table ( + id int, + arr1 ARRAY, + arr2 ARRAY, + arr3 ARRAY, + arr4 ARRAY, + arr5 ARRAY + ); + INSERT INTO array_table VALUES(1, array(1, 2, 3), array('a', 'b', 'c'), array(1.2, 1.3), array(date('2023-05-23')), array(datetime('2023-05-23 13:55:12'))); + INSERT INTO array_table VALUES(2, array(1, 2, 3), array('a', 'b', 'c'), array(1.2, 1.3), array(date('2023-05-23')), array(datetime('2023-05-23 13:55:12'))); + INSERT INTO array_table VALUES(3, array(1, 2, 3), array('a', 'b', 'c'), array(1.3), array(date('2023-05-23')), array(datetime('2023-05-23 13:55:12'))); + + create table map_table ( + arr1 MAP, + arr2 MAP + ); + INSERT INTO map_table (arr1, arr2) + VALUES ( + MAP(1, 2.5, 2, 3.75), + MAP(1, 'example1', 2, 'example2') + ); + INSERT INTO map_table (arr1, arr2) + VALUES ( + MAP(3, 2.5, 99, 3.75), + MAP(349, 'asd', 324, 'uid') + ); + + create table struct_table ( + user_info STRUCT, + contact_info STRUCT + ); + + INSERT INTO struct_table VALUES + ( + named_struct('id', 'user1', 'age', 25), + named_struct('phone_number', 123450, 'email', 'user1@example.com', 'addr', 'Addr1') + ), + ( + named_struct('id', 'user2', 'age', 30), + named_struct('phone_number', 2345671, 'email', 'user2@example.com', 'addr', 'Addr2') + ), + ( + named_struct('id', 'user3', 'age', 35), + named_struct('phone_number', 3456789, 'email', 'user3@example.com', 'addr', 'Addr3') + ); + + CREATE TABLE nested_complex_table ( + user_id STRING, + user_profile STRUCT< + name: STRING, + age: INT, + preferences: MAP< + STRING, + STRUCT< + preference_id: INT, + preference_values: ARRAY + > + > + >, + activity_log ARRAY< + STRUCT< + activity_date: STRING, + activities: MAP< + STRING, + STRUCT< + details: STRING, + metrics: MAP + > + > + > + > + ); + INSERT INTO nested_complex_table VALUES + ( + 'user1', + named_struct('name', 'Alice', 'age', 28, 'preferences', map( + 'sports', named_struct('preference_id', 101, 'preference_values', array('soccer', 'tennis')), + 'music', named_struct('preference_id', 102, 'preference_values', array('rock', 'classical')) + )), + array( + named_struct('activity_date', '2024-08-01', 'activities', map( + 'workout', named_struct('details', 'Morning run', 'metrics', map('duration', 30.5, 'calories', 200.0)), + 'reading', named_struct('details', 'Read book on Hive', 'metrics', map('pages', 50.0, 'time', 2.0)) + )), + named_struct('activity_date', '2024-08-02', 'activities', map( + 'travel', named_struct('details', 'Flight to NY', 'metrics', map('distance', 500.0, 'time', 3.0)), + 'meeting', named_struct('details', 'Project meeting', 'metrics', map('duration', 1.5, 'participants', 5.0)) + )) + ) + ), + ( + 'user2', + named_struct('name', 'Bob', 'age', 32, 'preferences', map( + 'books', named_struct('preference_id', 201, 'preference_values', array('fiction', 'non-fiction')), + 'travel', named_struct('preference_id', 202, 'preference_values', array('beaches', 'mountains')) + )), + array( + named_struct('activity_date', '2024-08-01', 'activities', map( + 'hiking', named_struct('details', 'Mountain trail', 'metrics', map('distance', 10.0, 'elevation', 500.0)), + 'photography', named_struct('details', 'Wildlife photoshoot', 'metrics', map('photos_taken', 100.0, 'time', 4.0)) + )), + named_struct('activity_date', '2024-08-02', 'activities', map( + 'workshop', named_struct('details', 'Photography workshop', 'metrics', map('duration', 3.0, 'participants', 15.0)), + 'shopping', named_struct('details', 'Bought camera gear', 'metrics', map('items', 5.0, 'cost', 1500.0)) + )) + ) + ), + ( + 'user3', + named_struct('name', 'Carol', 'age', 24, 'preferences', map( + 'food', named_struct('preference_id', 301, 'preference_values', array('vegan', 'desserts')), + 'movies', named_struct('preference_id', 302, 'preference_values', array('action', 'comedy')) + )), + array( + named_struct('activity_date', '2024-08-01', 'activities', map( + 'cooking', named_struct('details', 'Made vegan meal', 'metrics', map('time_spent', 1.5, 'calories', 500.0)), + 'movie', named_struct('details', 'Watched action movie', 'metrics', map('duration', 2.0, 'rating', 8.5)) + )), + named_struct('activity_date', '2024-08-02', 'activities', map( + 'gym', named_struct('details', 'Strength training', 'metrics', map('duration', 1.0, 'calories', 300.0)), + 'shopping', named_struct('details', 'Bought groceries', 'metrics', map('items', 10.0, 'cost', 100.0)) + )) + ) + ); + */ +suite("test_max_compute_complex_type", "p0,external,doris,external_docker,external_docker_doris") { + String enabled = context.config.otherConfigs.get("enableMaxComputeTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String ak = context.config.otherConfigs.get("aliYunAk") + String sk = context.config.otherConfigs.get("aliYunSk") + String mc_catalog_name = "test_max_compute_complex_type" + sql """drop catalog if exists ${mc_catalog_name} """ + sql """ + CREATE CATALOG IF NOT EXISTS ${mc_catalog_name} PROPERTIES ( + "type" = "max_compute", + "mc.default.project" = "mc_datalake", + "mc.region" = "cn-beijing", + "mc.access_key" = "${ak}", + "mc.secret_key" = "${sk}", + "mc.public_access" = "true" + ); + """ + + logger.info("catalog " + mc_catalog_name + " created") + sql """switch ${mc_catalog_name};""" + logger.info("switched to catalog " + mc_catalog_name) + sql """ use mc_datalake """ + + qt_mc_q1 """ select id,arr3,arr1,arr5,arr2 from array_table order by id desc """ + qt_mc_q2 """ select arr2,arr1 from map_table order by id limit 2 """ + qt_mc_q3 """ select contact_info,user_info from struct_table order by id limit 2 """ + qt_mc_q4 """ select user_id,activity_log from nested_complex_table order by user_id limit 2 """ + + sql """drop catalog ${mc_catalog_name};""" + } +}