From b22b43bfcc1e2adbcc22e073bb56a5436b9069bb Mon Sep 17 00:00:00 2001 From: hui lai <1353307710@qq.com> Date: Wed, 26 Jun 2024 19:19:40 +0800 Subject: [PATCH] [fix](load) fix no error url if no partition can be found (#36831) ## Proposed changes before ``` Stream load result: { "TxnId": 2014, "Label": "83ba46bd-280c-4e22-b581-4eb126fd49cf", "Comment": "", "TwoPhaseCommit": "false", "Status": "Fail", "Message": "[DATA_QUALITY_ERROR]Encountered unqualified data, stop processing", "NumberTotalRows": 1, "NumberLoadedRows": 1, "NumberFilteredRows": 0, "NumberUnselectedRows": 0, "LoadBytes": 1669, "LoadTimeMs": 58, "BeginTxnTimeMs": 0, "StreamLoadPutTimeMs": 10, "ReadDataTimeMs": 0, "WriteDataTimeMs": 47, "CommitAndPublishTimeMs": 0 } ``` after ``` Stream load result: { "TxnId": 2014, "Label": "83ba46bd-280c-4e22-b581-4eb126fd49cf", "Comment": "", "TwoPhaseCommit": "false", "Status": "Fail", "Message": "[DATA_QUALITY_ERROR]too many filtered rows", "NumberTotalRows": 1, "NumberLoadedRows": 0, "NumberFilteredRows": 1, "NumberUnselectedRows": 0, "LoadBytes": 1669, "LoadTimeMs": 58, "BeginTxnTimeMs": 0, "StreamLoadPutTimeMs": 10, "ReadDataTimeMs": 0, "WriteDataTimeMs": 47, "CommitAndPublishTimeMs": 0, "ErrorURL": "http://XXXX:8040/api/_load_error_log?file=__shard_4/error_log_insert_stmt_c6461270125a615b-2873833fb48d56a3_c6461270125a615b_2873833fb48d56a3" } ``` --- be/src/vec/sink/vrow_distribution.cpp | 17 ++-- .../load_p0/stream_load/test_error_url_1.csv | 1 + .../test_stream_load_error_url.groovy | 94 +++++++++++++++++++ 3 files changed, 104 insertions(+), 8 deletions(-) create mode 100644 regression-test/data/load_p0/stream_load/test_error_url_1.csv diff --git a/be/src/vec/sink/vrow_distribution.cpp b/be/src/vec/sink/vrow_distribution.cpp index f740e8f5767861..2b635872e8ba8f 100644 --- a/be/src/vec/sink/vrow_distribution.cpp +++ b/be/src/vec/sink/vrow_distribution.cpp @@ -443,22 +443,23 @@ Status VRowDistribution::generate_rows_distribution( _vpartition->set_transformed_slots(partition_cols_idx); } + Status st = Status::OK(); if (_vpartition->is_auto_detect_overwrite()) { // when overwrite, no auto create partition allowed. - RETURN_IF_ERROR(_generate_rows_distribution_for_auto_overwrite( - block.get(), has_filtered_rows, row_part_tablet_ids)); + st = _generate_rows_distribution_for_auto_overwrite(block.get(), has_filtered_rows, + row_part_tablet_ids); } else if (_vpartition->is_auto_partition() && !_deal_batched) { - RETURN_IF_ERROR(_generate_rows_distribution_for_auto_partition( - block.get(), partition_cols_idx, has_filtered_rows, row_part_tablet_ids, - rows_stat_val)); + st = _generate_rows_distribution_for_auto_partition(block.get(), partition_cols_idx, + has_filtered_rows, row_part_tablet_ids, + rows_stat_val); } else { // not auto partition - RETURN_IF_ERROR(_generate_rows_distribution_for_non_auto_partition( - block.get(), has_filtered_rows, row_part_tablet_ids)); + st = _generate_rows_distribution_for_non_auto_partition(block.get(), has_filtered_rows, + row_part_tablet_ids); } filtered_rows = _block_convertor->num_filtered_rows() + _tablet_finder->num_filtered_rows() - prev_filtered_rows; - return Status::OK(); + return st; } // reuse vars for find_tablets diff --git a/regression-test/data/load_p0/stream_load/test_error_url_1.csv b/regression-test/data/load_p0/stream_load/test_error_url_1.csv new file mode 100644 index 00000000000000..69b86901155889 --- /dev/null +++ b/regression-test/data/load_p0/stream_load/test_error_url_1.csv @@ -0,0 +1 @@ +2024-01-09 15:40:46|lhCH2|ctxoxUuJnspAIJAmMuVJKh1B2sLXAwr9XWqcKYuH2ygU71QfcCfB1r8kOBFtllQewP3Hqw3dRq0zv0Bnlwm6Q20yPSQkU4gDmXAbEzxVBCSwU|3roU9YLxSVUVS5vIce1RA9wzNxBEcXGXEAonAGORyVwf6JMYYTlHb58we0|WWrrXKsWzMGAaJfw1veudp5sxSd5FjSzFSqfDWUwVGe96SAikxQhZKt4kpwSiINkJRNkUBf|neq__service_li|kodo-testing-kodoasynq.redis|92xlFBZ1FpQFfnt1ysS71TmVxcekRmCqesO2fBUxV2hWK7LeMbAFoGry6IbqbV2c5hKi9l8NuTG35mXnot9QTvjHKqdWkdy86fMn6|4XBHWHKxZOwh5kVsLz3xLy59W2NbwNDmpWpMQBci6h34uiUknTnseKy2F7dAYr6jjzq5VG7ulv2gwHor6V4HnA3U1XyQNoA9wJ8MFT9wt|Bv0Et5tUNM2JjFz0hS3cCBPX2S9begi8p1lv48M6W0PZQxfB3UoTvavaeyxwxhpfYkOsfqGQTCOMQt7Wwyn3dl2MvFqclt80kNYp7D25ZqREAYskBxWPhOSUaXWDTc|nBj3PlVV|uxoqGdn2SJlIaSEdMlb8hKW2XXgQcL4Tcz9rRqkv9O9rIIqYQ|RrHHtJ5BMmSiBGJsY3cgA4JqoI5l1otuYQoiXE70QQ2X17dOiF3t5umoK8pFEPk|x8zRUfaRJFFkdgBYQyzHrp3bhWIboTv8s2TuLxQ7jbAwduLEI0MFZLgxZHYSge8627ZPgiu96Qwqt9hKK7ZHQpYMEjQTlp2GvZwJiiuxDTYefAbrl6P|R9hEZdbWERAggflyCWNbfqBjNLNWSc0Xyu02Zl8OKpSVPrSIbI|KGHWT4C2GQyOXY2PkWsulr0gvBbfYHVMlKgYU6CEkywO8DQqvzPZkL7axRuVXTq6smU9NiT3KMirozpkTkvPL5MlwBYzsj|9CYIs6GGwpeRov|YNTDSLVHGRrevdOPBewx76od1htIsfcM6c8aYNTLrjoQ1jI9N6M2T5c7zsYQ7vZVobDnD3tqYCpxbjJNrI6LyqkKPxXsOQWTXnmhU2MLygRQrBPyPYkNseTIIve9|I5VKX1noAIeW7EgWM4lpEFvmUy7WgullmYtdNWBBkjhTrz7eetkTm6RYh0eH1lC69xfL2NaIcXbzTkwNPPAug1qnsYrfPQ4xdVvSgjJDropG4SMmT6y134AbnBun|k6wuJs0QDf4YmumR0hOoCDVwf18OpL3q1NoyfLZbt2mhY0YrMH7A8JqdTwvvdjMrYLyMCW1zCK8png4|Q7xOuIxRNVQt6wBntMq6nCauyORBXh7BV0Ac7z6SEReRWYBwyr|iZTSBUU7zukS0z9iPO9IfAJu|GXJRuS5mEUB|OTf42X35rt4T5dtmTO7377i0lphYj2uHb7ZUfJ|1117637773.099065|4614638931763895839|5341939051272067353|3954792818428854896|1967|-6700276424423912861|EPaT0V9uLfy3TrB6St44a9MRtxUuHPbg66JH0mCT4UXEcUAYQv2sXO8VNDP7K \ No newline at end of file diff --git a/regression-test/suites/load_p0/stream_load/test_stream_load_error_url.groovy b/regression-test/suites/load_p0/stream_load/test_stream_load_error_url.groovy index 72fc212e241af8..c9c62ad0b88652 100644 --- a/regression-test/suites/load_p0/stream_load/test_stream_load_error_url.groovy +++ b/regression-test/suites/load_p0/stream_load/test_stream_load_error_url.groovy @@ -73,4 +73,98 @@ suite("test_stream_load_error_url", "p0") { } finally { sql """ DROP TABLE IF EXISTS ${tableName} """ } + + try { + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + `time` DATETIME(6) NULL, + `__docid` VARCHAR(64) NULL, + `__source` TEXT NULL COMMENT 'hidden', + `message` TEXT NULL, + `__namespace` TEXT NULL COMMENT 'hidden', + `source` TEXT NULL, + `service` TEXT NULL, + `container_host` TEXT NULL, + `endpoint` TEXT NULL, + `env` TEXT NULL, + `http_host` TEXT NULL, + `http_method` TEXT NULL, + `http_route` TEXT NULL, + `http_status_code` TEXT NULL, + `http_url` TEXT NULL, + `operation` TEXT NULL, + `project` TEXT NULL, + `source_type` TEXT NULL, + `status` TEXT NULL, + `span_type` TEXT NULL, + `parent_id` TEXT NULL, + `resource` TEXT NULL, + `span_id` TEXT NULL, + `trace_id` TEXT NULL, + `sample_rate` DOUBLE NULL, + `date` BIGINT NULL, + `create_time` BIGINT NULL, + `priority` BIGINT NULL, + `duration` BIGINT NULL, + `start` BIGINT NULL, + `var` TEXT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`time`, `__docid`) + COMMENT 'default' + PARTITION BY RANGE(`time`) + (PARTITION p20240625 VALUES [('2024-06-25 00:00:00'), ('2024-06-26 00:00:00')), + PARTITION p20240626 VALUES [('2024-06-26 00:00:00'), ('2024-06-27 00:00:00'))) + DISTRIBUTED BY RANDOM BUCKETS AUTO + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "min_load_replica_num" = "-1", + "is_being_synced" = "false", + "dynamic_partition.enable" = "true", + "dynamic_partition.time_unit" = "DAY", + "dynamic_partition.time_zone" = "Asia/Shanghai", + "dynamic_partition.start" = "-100000", + "dynamic_partition.end" = "1", + "dynamic_partition.prefix" = "p", + "dynamic_partition.replication_allocation" = "tag.location.default: 1", + "dynamic_partition.buckets" = "10", + "dynamic_partition.create_history_partition" = "false", + "dynamic_partition.history_partition_num" = "16", + "dynamic_partition.hot_partition_num" = "0", + "dynamic_partition.reserved_history_periods" = "NULL", + "dynamic_partition.storage_policy" = "", + "storage_medium" = "hdd", + "storage_format" = "V2", + "inverted_index_storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false", + "group_commit_interval_ms" = "10000", + "group_commit_data_bytes" = "134217728" + ); + """ + + streamLoad { + table "${tableName}" + set 'column_separator', '|' + set 'columns', '`time`,`__docid`,`__source`,`message`,`__namespace`,`source`,`service`,`container_host`,`endpoint`,`env`,`http_host`,`http_method`,`http_route`,`http_status_code`,`http_url`,`operation`,`project`,`source_type`,`status`,`span_type`,`parent_id`,`resource`,`span_id`,`trace_id`,`sample_rate`,`date`,`create_time`,`priority`,`duration`,`start`,`var`' + file 'test_error_url_1.csv' + + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("fail", json.Status.toLowerCase()) + assertTrue(json.Message.contains("[DATA_QUALITY_ERROR]too many filtered rows")) + def (code, out, err) = curl("GET", json.ErrorURL) + log.info("error result: " + out) + assertTrue(out.contains("no partition for this tuple")) + log.info("url: " + json.ErrorURL) + } + } + } finally { + sql """ DROP TABLE IF EXISTS ${tableName} """ + } } \ No newline at end of file