-
Notifications
You must be signed in to change notification settings - Fork 3.7k
[Enhancement](Load) Stream Load using SQL #22509
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c2c3f14
9bab338
c6f64e8
b5d7f3f
a47ab61
1bec888
65e6dec
1579dd1
773e04f
bce76f4
a4586d7
83c8e79
0929bae
db9573e
c083718
7c6043c
9248708
706d30e
cb33d15
83462bd
70a4003
1e3c314
3200045
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,65 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <functional> | ||
|
|
||
| #include "gen_cpp/PlanNodes_types.h" | ||
| #include "http/http_handler.h" | ||
| #include "runtime/client_cache.h" | ||
| #include "runtime/message_body_sink.h" | ||
|
|
||
| namespace doris { | ||
|
|
||
| class ExecEnv; | ||
| class Status; | ||
| class StreamLoadContext; | ||
|
|
||
| class StreamLoadWithSqlAction : public HttpHandler { | ||
| public: | ||
| StreamLoadWithSqlAction(ExecEnv* exec_env); | ||
| ~StreamLoadWithSqlAction() override; | ||
|
|
||
| void handle(HttpRequest* req) override; | ||
|
|
||
| bool request_will_be_read_progressively() override { return true; } | ||
|
|
||
| int on_header(HttpRequest* req) override; | ||
|
|
||
| void on_chunk_data(HttpRequest* req) override; | ||
| void free_handler_ctx(std::shared_ptr<void> ctx) override; | ||
|
|
||
| private: | ||
| Status _on_header(HttpRequest* http_req, std::shared_ptr<StreamLoadContext> ctx); | ||
| Status _handle(HttpRequest* req, std::shared_ptr<StreamLoadContext> ctx); | ||
| Status _data_saved_path(HttpRequest* req, std::string* file_path); | ||
| Status _process_put(HttpRequest* http_req, std::shared_ptr<StreamLoadContext> ctx); | ||
| void _save_stream_load_record(std::shared_ptr<StreamLoadContext> ctx, const std::string& str); | ||
| void _parse_format(const std::string& format_str, const std::string& compress_type_str, | ||
| TFileFormatType::type* format_type, TFileCompressType::type* compress_type); | ||
| bool _is_format_support_streaming(TFileFormatType::type format); | ||
|
|
||
| private: | ||
| ExecEnv* _exec_env; | ||
| std::shared_ptr<MetricEntity> _stream_load_with_sql_entity; | ||
| IntCounter* streaming_load_with_sql_requests_total; | ||
| IntCounter* streaming_load_with_sql_duration_ms; | ||
| IntGauge* streaming_load_with_sql_current_processing; | ||
| }; | ||
|
|
||
| } // namespace doris |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -242,6 +242,31 @@ Stream Load 由于使用的是 HTTP 协议,所以所有导入任务有关的 | |
| } | ||
| ``` | ||
|
|
||
| ### 使用SQL表达Stream Load的参数 | ||
|
|
||
| 可以在Header中添加一个`sql`的参数,去替代之前参数中的`column_separator`、`line_delimiter`、`where`、`columns`参数,方便使用。 | ||
|
|
||
| ``` | ||
| curl --location-trusted -u user:passwd [-H "sql: ${load_sql}"...] -T data.file -XPUT http://fe_host:http_port/api/{db}/{table}/_stream_load_with_sql | ||
|
|
||
|
|
||
| # -- load_sql | ||
| # insert into db.table (col, ...) select stream_col, ... from stream("property1"="value1"); | ||
|
|
||
| # stream | ||
| # ( | ||
| # "column_separator" = ",", | ||
| # "format" = "CSV", | ||
| # ... | ||
| # ) | ||
| ``` | ||
|
|
||
| 示例: | ||
|
|
||
| ``` | ||
| curl --location-trusted -u root: -T test.csv -H "sql:insert into demo.example_tbl_1(user_id, age, cost) select c1, c4, c7 * 2 from stream("format" = "CSV", "column_separator" = "," ) where age >= 30" http://127.0.0.1:28030/api/demo/example_tbl_1/_stream_load_with_sql | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 转义是怎么处理的? |
||
| ``` | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 是不是再加一个直接请求 be 的 example? since they have different URI pattern.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里的文档应该是错的,我们先把代码合入,然后慢慢改吧。我怕得反复的rebase |
||
|
|
||
|
|
||
| ### 返回结果 | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now that we support stream load, we should also update the err msg.