From 413c4b3517406ab3bd33e69fa7dec92ca8e9e0c9 Mon Sep 17 00:00:00 2001 From: xy720 Date: Mon, 5 Aug 2019 17:07:45 +0800 Subject: [PATCH 1/9] Help document collation (integration of help and documentation documents) --- docs/Makefile | 9 +- .../aggregate-functions/group_concat.md | 21 -- .../cn/sql-reference/sql-functions/cast.md | 6 +- .../Account Management/help.md | 0 .../Administration/admin_repair_stmt.md | 0 .../Administration/admin_show_stmt.md | 0 .../Administration/admin_stmt.md | 0 .../Administration/small_files.md | 0 .../{ => Data Definition}/create-function.md | 0 .../Data Definition/ddl_stmt.md | 41 --- .../{ => Data Definition}/drop-function.md | 0 .../{ => Data Definition}/show-function.md | 0 .../Data Manipulation/broker_load.md | 5 +- .../{ => Data Manipulation}/insert.md | 12 + .../Data Manipulation/manipulation_stmt.md | 11 +- .../Data Manipulation/routine_load.md | 2 +- .../Data Manipulation/streaming.md | 0 .../sql-statements}/Data Types/help.md | 0 .../sql-statements/Utility/util_stmt.md | 11 + .../Functions/Date and Time Functions/help.md | 310 ------------------ docs/help/Contents/Utility/util_stmt.md | 13 - docs/script/keyword.sh | 68 ++++ docs/script/merge.sh | 164 +++++++++ docs/script/split.sh | 73 +++++ 24 files changed, 344 insertions(+), 402 deletions(-) delete mode 100755 docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/group_concat.md rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Account Management/help.md (100%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Administration/admin_repair_stmt.md (100%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Administration/admin_show_stmt.md (100%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Administration/admin_stmt.md (100%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Administration/small_files.md (100%) rename docs/documentation/cn/sql-reference/sql-statements/{ => Data Definition}/create-function.md (100%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Data Definition/ddl_stmt.md (97%) rename docs/documentation/cn/sql-reference/sql-statements/{ => Data Definition}/drop-function.md (100%) rename docs/documentation/cn/sql-reference/sql-statements/{ => Data Definition}/show-function.md (100%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Data Manipulation/broker_load.md (98%) rename docs/documentation/cn/sql-reference/sql-statements/{ => Data Manipulation}/insert.md (79%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Data Manipulation/manipulation_stmt.md (98%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Data Manipulation/routine_load.md (99%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Data Manipulation/streaming.md (100%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Data Types/help.md (100%) create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Utility/util_stmt.md delete mode 100644 docs/help/Contents/Functions/Date and Time Functions/help.md delete mode 100644 docs/help/Contents/Utility/util_stmt.md create mode 100755 docs/script/keyword.sh create mode 100755 docs/script/merge.sh create mode 100755 docs/script/split.sh diff --git a/docs/Makefile b/docs/Makefile index d337c2091bdeeb..bad7301708cc65 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -20,17 +20,22 @@ # BUILD_DIR = ${CURDIR}/build +HELP_DIR = ${CURDIR}/contents all: zip_help .PHONY: all ${BUILD_DIR}: mkdir -p ${BUILD_DIR} +${HELP_DIR}: + mkdir -p ${HELP_DIR} # build help zip HELP_OUTPUT = ${BUILD_DIR}/help-resource.zip -${HELP_OUTPUT}: help | ${BUILD_DIR} - zip -r $@ $< +${HELP_OUTPUT}: documentation/cn/sql-reference ${BUILD_DIR} ${HELP_DIR} + cp -r $ select group_concat(`query_id`) from log_statis group by datetime; -+---------------------------------------------------------------------------------------------------------+ -| group_concat(`query_id`) | -+---------------------------------------------------------------------------------------------------------+ -| 445f5875f8854dfa:b9358d5cd86488a2, d5055534e6c04fa0:9b3f645913c4973c, a55801d0764d47fe:a98f3710649dc558 | -+------------------------------------------------------------------------------------------------ --------+ -``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/cast.md b/docs/documentation/cn/sql-reference/sql-functions/cast.md index b9898a2f710693..3d07805522a28f 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/cast.md +++ b/docs/documentation/cn/sql-reference/sql-functions/cast.md @@ -12,15 +12,15 @@ cast (input as type) ## BIGINT type -### Syntax +## Syntax ``` cast (input as BIGINT) ``` -### Description +## Description 将当前列 input 转换为 BIGINT 类型 -### Examples +## Examples 1. 转常量,或表中某列 diff --git a/docs/help/Contents/Account Management/help.md b/docs/documentation/cn/sql-reference/sql-statements/Account Management/help.md similarity index 100% rename from docs/help/Contents/Account Management/help.md rename to docs/documentation/cn/sql-reference/sql-statements/Account Management/help.md diff --git a/docs/help/Contents/Administration/admin_repair_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/admin_repair_stmt.md similarity index 100% rename from docs/help/Contents/Administration/admin_repair_stmt.md rename to docs/documentation/cn/sql-reference/sql-statements/Administration/admin_repair_stmt.md diff --git a/docs/help/Contents/Administration/admin_show_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/admin_show_stmt.md similarity index 100% rename from docs/help/Contents/Administration/admin_show_stmt.md rename to docs/documentation/cn/sql-reference/sql-statements/Administration/admin_show_stmt.md diff --git a/docs/help/Contents/Administration/admin_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/admin_stmt.md similarity index 100% rename from docs/help/Contents/Administration/admin_stmt.md rename to docs/documentation/cn/sql-reference/sql-statements/Administration/admin_stmt.md diff --git a/docs/help/Contents/Administration/small_files.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/small_files.md similarity index 100% rename from docs/help/Contents/Administration/small_files.md rename to docs/documentation/cn/sql-reference/sql-statements/Administration/small_files.md diff --git a/docs/documentation/cn/sql-reference/sql-statements/create-function.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/create-function.md similarity index 100% rename from docs/documentation/cn/sql-reference/sql-statements/create-function.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Definition/create-function.md diff --git a/docs/help/Contents/Data Definition/ddl_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/ddl_stmt.md similarity index 97% rename from docs/help/Contents/Data Definition/ddl_stmt.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Definition/ddl_stmt.md index e723d24348cc1f..f126b9b7b34b15 100644 --- a/docs/help/Contents/Data Definition/ddl_stmt.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/ddl_stmt.md @@ -1109,44 +1109,3 @@ ## keyword COLOCATE, JOIN, CREATE TABLE - -# CREATE FUNCTION -## description - Used to create a UDF/UDAF/UDTF - Syntax: - CREATE [AGGREGATE] FUNCTION funcName (argType [, ...]) - RETURNS retType - PROPERTIES ( - k1=v1 [, k2=v2] - ) - - valid PROPERTIES: - "symbol": UDF's symbol, which Doris call this symbol's function to execute. MUST BE SET - "object_file": UDF library's URL, Doris use it to download library. MUST BE SET - "md5": when this property is set, Doris will check library's md5um against this value. This is a option - -## example - 1. create a function "my_func", receive two int and return one int - CREATE FUNCTION my_func (int, int) RETURNS int - PROPERTIES ("symbol"="my_func_symbol", "object_file"="http://127.0.0.1/my_func.so") - 2. create a variadic function "my_func" - CREATE FUNCTION my_func (int, ...) RETURNS int - PROPERTIES ("symbol"="my_func_symbol", "object_file"="http://127.0.0.1/my_func.so") - -## keyword - CREATE, FUNCTION - -# DROP FUNCTION -## description - Used to drop a UDF/UDAF/UDTF - Syntax: - DROP FUNCTION funcName (argType [, ...]) - -## example - 1. drop a UDF whose name is my_func - DROP FUNCTION my_func (int, int) - 2. drop a variadic function - DROP FUNCTION my_func (int, ...) - -## keyword - DROP, FUNCTION diff --git a/docs/documentation/cn/sql-reference/sql-statements/drop-function.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/drop-function.md similarity index 100% rename from docs/documentation/cn/sql-reference/sql-statements/drop-function.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Definition/drop-function.md diff --git a/docs/documentation/cn/sql-reference/sql-statements/show-function.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/show-function.md similarity index 100% rename from docs/documentation/cn/sql-reference/sql-statements/show-function.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Definition/show-function.md diff --git a/docs/help/Contents/Data Manipulation/broker_load.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/broker_load.md similarity index 98% rename from docs/help/Contents/Data Manipulation/broker_load.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/broker_load.md index 3f83280c16da4d..a40c217ee8d68f 100644 --- a/docs/help/Contents/Data Manipulation/broker_load.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/broker_load.md @@ -2,7 +2,7 @@ ## description Broker load 通过随 Palo 集群一同部署的 broker 进行,访问对应数据源的数据,进行数据导入。 - 可以通过 show broker 命令查看已经部署的 broker。 + 不同的数据源需要部署不同的 broker 进程。可以通过 show broker 命令查看已经部署的 broker。 目前支持以下4种数据源: 1. Baidu HDFS:百度内部的 hdfs,仅限于百度内部使用。 @@ -110,7 +110,7 @@ 3. broker_name - 所使用的 broker 名称,可以通过 show broker 命令查看。 + 所使用的 broker 名称,可以通过 show broker 命令查看。不同的数据源需使用对应的 broker。 4. broker_properties @@ -161,7 +161,6 @@ max_filter_ratio:最大容忍可过滤(数据不规范等原因)的数据比例。默认零容忍。 exec_mem_limit: 设置导入使用的内存上限。默认为2G,单位字节。这里是指单个 BE 节点的内存上限。 一个导入可能分布于多个BE。我们假设 1GB 数据在单个节点处理需要最大5GB内存。那么假设1GB文件分布在2个节点处理,那么理论上,每个节点需要内存为2.5GB。则该参数可以设置为 2684354560,即2.5GB - strict mode: 是否对数据进行严格限制。默认为true。 5. 导入数据格式样例 diff --git a/docs/documentation/cn/sql-reference/sql-statements/insert.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md similarity index 79% rename from docs/documentation/cn/sql-reference/sql-statements/insert.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md index 6f4cf4b24d4f20..b610e475f2b189 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/insert.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md @@ -10,6 +10,18 @@ INSERT INTO table_name { VALUES ( { expression | DEFAULT } [, ...] ) [, ...] | query } ``` +## Description + +INSERT 向一张表里插入数据。用户可以通过 VALUES 语法插入一条或者多条数据,或者通过一个查询来插入0条或者多条数据。 + +partition是目标分区,如果指定目标分区,则只会导入符合目标分区的数据。如果没有指定,则默认值为这张表的所有分区。 + +column是目标列,可以以任意的顺序存在。如果没有指定目标列,那么默认值是这张表的所有列。 + +如果表中的某个列没有存在目标列中,那么这个列需要有默认值,否则 INSERT 就会执行失败。 + +如果表达式的类型与目标列的类型不一致,那么会调用隐式类型转化,如果不能够进行转化,那么 INSERT 语句会报语法解析错误。 + ## Parameters > tablet_name: 导入数据的目的表。可以是 `db_name.table_name` 形式 diff --git a/docs/help/Contents/Data Manipulation/manipulation_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/manipulation_stmt.md similarity index 98% rename from docs/help/Contents/Data Manipulation/manipulation_stmt.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/manipulation_stmt.md index cc55e5f25602a9..3e84f50fd989ee 100644 --- a/docs/help/Contents/Data Manipulation/manipulation_stmt.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/manipulation_stmt.md @@ -622,7 +622,7 @@ [STATE = ["PENDING"|"ETL"|"LOADING"|"FINISHED"|"CANCELLED"|]] ] [ORDER BY ...] - [LIMIT limit][OFFSET offset]; + [LIMIT limit]; 说明: 1) 如果不指定 db_name,使用当前默认db @@ -631,8 +631,7 @@ 4) 如果指定了 STATE,则匹配 LOAD 状态 5) 可以使用 ORDER BY 对任意列组合进行排序 6) 如果指定了 LIMIT,则显示 limit 条匹配记录。否则全部显示 - 7) 如果指定了 OFFSET,则从偏移量offset开始显示查询结果。默认情况下偏移量为0。 - 8) 如果是使用 broker/mini load,则 URL 列中的连接可以使用以下命令查看: + 7) 如果是使用 broker/mini load,则 URL 列中的连接可以使用以下命令查看: SHOW LOAD WARNINGS ON 'url' @@ -648,12 +647,8 @@ 4. 展示指定 db 的导入任务,指定 label 为 "load_example_db_20140102" ,state 为 "loading", 并按 LoadStartTime 降序排序 SHOW LOAD FROM example_db WHERE LABEL = "load_example_db_20140102" AND STATE = "loading" ORDER BY LoadStartTime DESC; - - 5. 展示指定 db 的导入任务 并按 LoadStartTime 降序排序,并从偏移量5开始显示10条查询结果 - SHOW LOAD FROM example_db ORDER BY LoadStartTime DESC limit 5,10; - SHOW LOAD FROM example_db ORDER BY LoadStartTime DESC limit 10 offset 5; - 6. 小批量导入是查看导入状态的命令 + 5. 小批量导入是查看导入状态的命令 curl --location-trusted -u {user}:{passwd} http://{hostname}:{port}/api/{database}/_load_info?label={labelname} ## keyword diff --git a/docs/help/Contents/Data Manipulation/routine_load.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/routine_load.md similarity index 99% rename from docs/help/Contents/Data Manipulation/routine_load.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/routine_load.md index 9dcd7d687cc651..4fd4568eddaa47 100644 --- a/docs/help/Contents/Data Manipulation/routine_load.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/routine_load.md @@ -439,5 +439,5 @@ SHOW ROUTINE LOAD TASK WHERE JobName = "test1"; -# keyword +## keyword SHOW,ROUTINE,LOAD,TASK diff --git a/docs/help/Contents/Data Manipulation/streaming.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/streaming.md similarity index 100% rename from docs/help/Contents/Data Manipulation/streaming.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/streaming.md diff --git a/docs/help/Contents/Data Types/help.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/help.md similarity index 100% rename from docs/help/Contents/Data Types/help.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Types/help.md diff --git a/docs/documentation/cn/sql-reference/sql-statements/Utility/util_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Utility/util_stmt.md new file mode 100644 index 00000000000000..8154a9cdee1f9a --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Utility/util_stmt.md @@ -0,0 +1,11 @@ +# DESCRIBE +## description + 该语�~O��~T��~N�~U示�~L~G�~Z table �~Z~D schema 信�~A� + 语�~U�~Z + DESC[RIBE] [db_name.]table_name [ALL]; + + 说�~X~N�~Z + �~B�~^~\�~L~G�~Z ALL�~L�~H~Y�~X�示该 table �~Z~D�~I~@�~\~I index �~Z~D schema + +## keyword + DESCRIBE,DESC diff --git a/docs/help/Contents/Functions/Date and Time Functions/help.md b/docs/help/Contents/Functions/Date and Time Functions/help.md deleted file mode 100644 index dedb8ca2616e91..00000000000000 --- a/docs/help/Contents/Functions/Date and Time Functions/help.md +++ /dev/null @@ -1,310 +0,0 @@ -# unix_timestamp -## description -Syntax: -UNIX_TIMESTAMP(), UNIX_TIMESTAMP(date) - -将Date或者Datetime类型转化为unix时间戳 -如果没有参数,则是将当前的时间转化为时间戳 -参数需要是Date或者Datetime类型 - -## example -mysql> SELECT UNIX_TIMESTAMP(); - -> 1196440210 -mysql> SELECT UNIX_TIMESTAMP('2007-11-30 10:30:19'); - -> 1196418619 - -# from_unixtime -## description -Syntax: - FROM_UNIXTIME(int unix_timestamp[, string string_format]) - -将unix时间戳转化位对应的time格式,返回的格式由string_format指定 -默认为yyyy-MM-dd HH:mm:ss -传入的是整形,返回的是字符串类型 -目前string_format只支持两种类型的格式:yyyy-MM-dd,yyyy-MM-dd HH:mm:ss -其余string_format格式是非法的,返回NULL - -## example -mysql> SELECT FROM_UNIXTIME(1196440219); - -> '2007-12-01 00:30:19' - -mysql> SELECT FROM_UNIXTIME(1196440219, 'yyyy-MM-dd'); - -> '2007-12-01' - -mysql> SELECT FROM_UNIXTIME(1196440219, 'yyyy-MM-dd HH:mm:ss'); - -> '2007-12-01 00:30:19' - -# year -## description -Syntax: -YEAR(date) - -返回date类型的year部分,范围从1000-9999 -参数为Date或者Datetime类型 -## example -mysql> SELECT YEAR('1987-01-01'); - -> 1987 - -# month -## description -Syntax: -MONTH(date) - -返回时间类型中的月份信息,范围是1, 12 -参数为Date或者Datetime类型 - -## example -mysql> SELECT MONTH('1987-01-02'); - -> 01 - -# day -## description -Syntax: -DAY(date) - -与DAYOFMONTH是同义词,请`help dayofmonth` - -# dayofmonth -## description -Syntax: -DAYOFMONTH(date) - -获得日期中的天信息,返回值范围从1-31。 -需要传入date类型 - -## example -mysql> SELECT DAYOFMONTH('1987-01-02'); - -> 2 - -# dayofyear -## description -Syntax: -DAYOFYEAR(date) - -获得日期中对应当年中的哪一天。 -输入值为date类型 - -## example -mysql> SELECT DAYOFYEAR('2007-02-03'); - -> 34 - -# weekofyear -## description -Syntax: -WEEKOFYEAR(date) - -获得一年中的第几周 -输入值为date类型 - -## example -mysql> SELECT WEEKOFYEAR('2008-02-20'); - -> 8 - -# hour -## description -Syntax: -HOUR(date) - -获得时间中对应的小时信息 -这里Palo跟MySQL不太一样,因为MySQL是支持Time类型的 -Palo没有Time类型,所以输入的内容是Date或者Datetime。 - -## example -mysql> select hour("2000-01-02 12:34:56"); - -> 12 - -# minute -## description -Syntax: -MINUTE(date) - -获得日期中的分钟信息 -这里Palo跟MySQL不太一样,因为MySQL是支持Time类型的 -Palo没有Time类型,所以输入的内容是Date或者Datetime。 - -## example -mysql> SELECT MINUTE("2000-01-02 12:34:56"); - -> 34 - -# second -## description -Syntax: -SECOND(date) - -获得时间中的秒信息 -这里Palo跟MySQL不太一样,因为MySQL是支持Time类型的 -Palo没有Time类型,所以输入的内容是Date或者Datetime。 -## example -mysql> SELECT SECOND("2000-01-02 12:34:56"); - -> 56 - -# now -## description -Syntax: -NOW() - -获得当前的时间,以Datetime类型返回 - -## example -mysql> SELECT NOW(); - -> '2007-12-15 23:50:26' - -# current_timestamp -## description -Syntax: -CURRENT_TIMESTAMP() - -与NOW()是同义词 - -# datediff -## description -Syntax: -DATEDIFF(expr1,expr2) - -计算expr1 - expr2,结果精确到天。 -要求传入的两个值需要是datetime类型。 - -## example -mysql> SELECT DATEDIFF(CAST ('2007-12-31 23:59:59' AS DATETIME),CAST ('2007-12-30' AS DATETIME)); - -> 1 -mysql> SELECT DATEDIFF(CAST('2010-11-30 23:59:59' AS DATETIME), CAST('2010-12-31' AS DATETIME)); - -> -31 - -# date_add -## description -Syntax: -DATE_ADD(date,INTERVAL expr unit) - -对时间类型进行加法运算 -支持的time unit包括 -YEAR, MONTH, DAY, HOUR, MINUTE, SECOND - -## example -mysql> DATE_ADD(date,INTERVAL expr unit) - -> 1987-01-02 00:00:00 - -# date_sub -## description -Syntax: -DATE_SUB(date,INTERVAL expr unit) - -与DATE_ADD相反,对时间类型进行减法运算 -支持的time unit包括 -YEAR, MONTH, DAY, HOUR, MINUTE, SECOND - -## example -mysql> DATE_SUB(date,INTERVAL expr unit) - -> 1986-12-31 00:00:00 - -# date_format -## description -Syntax: -DATE_FORMAT(date, format) - -将日期类型按照format的类型转化位字符串, -当前支持最大128字节的字符串,如果返回值长度超过128,则返回NULL -format的含义如下: -%a Abbreviated weekday name (Sun..Sat) -%b Abbreviated month name (Jan..Dec) -%c Month, numeric (0..12) -%D Day of the month with English suffix (0th, 1st, 2nd, 3rd, …) -%d Day of the month, numeric (00..31) -%e Day of the month, numeric (0..31) -%f Microseconds (000000..999999) -%H Hour (00..23) -%h Hour (01..12) -%I Hour (01..12) -%i Minutes, numeric (00..59) -%j Day of year (001..366) -%k Hour (0..23) -%l Hour (1..12) -%M Month name (January..December) -%m Month, numeric (00..12) -%p AM or PM -%r Time, 12-hour (hh:mm:ss followed by AM or PM) -%S Seconds (00..59) -%s Seconds (00..59) -%T Time, 24-hour (hh:mm:ss) -%U Week (00..53), where Sunday is the first day of the week; WEEK() mode 0 -%u Week (00..53), where Monday is the first day of the week; WEEK() mode 1 -%V Week (01..53), where Sunday is the first day of the week; WEEK() mode 2; used with %X -%v Week (01..53), where Monday is the first day of the week; WEEK() mode 3; used with %x -%W Weekday name (Sunday..Saturday) -%w Day of the week (0=Sunday..6=Saturday) -%X Year for the week where Sunday is the first day of the week, numeric, four digits; used with %V -%x Year for the week, where Monday is the first day of the week, numeric, four digits; used with %v -%Y Year, numeric, four digits -%y Year, numeric (two digits) -%% A literal “%” character -%x x, for any “x” not listed above - -## example -mysql> SELECT DATE_FORMAT('2009-10-04 22:23:00', '%W %M %Y'); - -> 'Sunday October 2009' -mysql> SELECT DATE_FORMAT('2007-10-04 22:23:00', '%H:%i:%s'); - -> '22:23:00' -mysql> SELECT DATE_FORMAT('1900-10-04 22:23:00', '%D %y %a %d %m %b %j'); - -> '4th 00 Thu 04 10 Oct 277' -mysql> SELECT DATE_FORMAT('1997-10-04 22:23:00', '%H %k %I %r %T %S %w'); - -> '22 22 10 10:23:00 PM 22:23:00 00 6' -mysql> SELECT DATE_FORMAT('1999-01-01', '%X %V'); - -> '1998 52' -mysql> SELECT DATE_FORMAT('2006-06-01', '%d'); - -> '01' - -# from_days -## description -Syntax: -FROM_DAYS(N) -通过距离0000-01-01日的天数计算出哪一天 - -## example -mysql> SELECT FROM_DAYS(730669); - -> '2007-07-03' - -# to_days -## description -Syntax: -TO_DAYS(date) -返回date距离0000-01-01的天数 - -## example -mysql> SELECT TO_DAYS(950501); - -> 728779 -mysql> SELECT TO_DAYS('2007-10-07'); - -> 733321 - -# str_to_date -## description -Syntax: -STR_TO_DATE(str, format) -通过format指定的方式将str转化为DATE类型,如果转化结果不对返回NULL -支持的format格式与date_format一致 - -## example -mysql> SELECT STR_TO_DATE('2014-12-21 12:34:56', '%Y-%m-%d %H:%i:%s'); - -> 2014-12-21 12:34:56 -mysql> SELECT STR_TO_DATE('200442 Monday', '%X%V %W'); - -> 2004-10-18 - -# monthname -## description -Syntax: -MONTHNAME(DATE) - -返回日期对应的月份名字 - -## example -mysql> SELECT MONTHNAME('2008-02-03'); - -> 'February' - -# monthname -## description -Syntax: -MONTHNAME(DATE) - -返回日期对应的日期名字 - -## example -mysql> SELECT DAYNAME('2007-02-03'); - -> 'Saturday' diff --git a/docs/help/Contents/Utility/util_stmt.md b/docs/help/Contents/Utility/util_stmt.md deleted file mode 100644 index 2fbafe8295f9b0..00000000000000 --- a/docs/help/Contents/Utility/util_stmt.md +++ /dev/null @@ -1,13 +0,0 @@ -# DESCRIBE -## description - 该语句用于展示指定 table 的 schema 信息 - 语法: - DESC[RIBE] [db_name.]table_name [ALL]; - - 说明: - 如果指定 ALL,则显示该 table 的所有 index 的 schema - -## example - -## keyword - DESCRIBE,DESC \ No newline at end of file diff --git a/docs/script/keyword.sh b/docs/script/keyword.sh new file mode 100755 index 00000000000000..fa34211852746d --- /dev/null +++ b/docs/script/keyword.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#this shell adds keywords to MD files without keywords + +IFS=`echo -en "\n\b"` + +ROOTDIR=`dirname "$0"` +ROOTDIR=`cd "$ROOT"; pwd` + +scandir() { + for file in `ls $*`; do + if [[ ! -d $*"/"$file ]]; then + if [[ $file == *".md" ]]; then + readfile $*"/"${file} + fi + else + scandir $*"/"${file} + fi + done +} + +readfile() { + local file=$* + local topic=`cat $file | grep "^#[^#].*" | grep -o "[^# ]\+\( \+[^ ]\+\)*"` + local keywordNum=`cat $file | grep "^##[^#]*keyword[ ]*$" | wc -l` + if [[ $keywordNum != 0 || -z $topic ]]; then + return + fi + local SAVEIFS=$IFS + IFS=' ' + local array=`echo $topic | tr '\`' ' ' | tr ',' ' '` + local keywords= + for keyword in ${array[*]}; do + keywords=$keywords","$keyword + done + array=`echo $array | tr '_' ' '` + for keyword in ${array[*]}; do + keywords=$keywords","$keyword + done + keywords=`echo ${keywords:1} | tr 'a-z' 'A-Z'` + IFS=$SAVEIFS + file=`echo $file | sed 's/[ \(\)]/\\\&/g'` + eval sed -i '"\$a ##keyword"' $file + eval sed -i '"\$a ${keywords}"' $file +} + +main() { + scandir $ROOTDIR +} + +main "$@" +exit 0 diff --git a/docs/script/merge.sh b/docs/script/merge.sh new file mode 100755 index 00000000000000..4fbdf6f9f99501 --- /dev/null +++ b/docs/script/merge.sh @@ -0,0 +1,164 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#run split.h first +#this shell merges user-defined heads in the MD file into "description" head +################ ################# +# #topic # # #topic # +# # # # +# ##syntax # # ##descrption # +# somelines # # somelines # +# # # # +# ##descrption # # syntax # +# somelines # ==> # somelines # +# # # # +# ##example # # parameter # +# somelines # # somelines # +# # # # +# ##parameter # # ##example # +# somelines # # somelines # +# # # # +################ ################# + +IFS=`echo -en "\n\b"` + +ROOTDIR=`dirname "$0"` +ROOTDIR=`cd "$ROOT"; pwd` + +keywords=" +examples +example +description +keywords +keyword +url +" + +matchKeyword(){ + for keyword in ${keywords[*]}; do + if [[ "$1" == $keyword ]]; then + return 0 + fi + done + return 1 +} + +merge(){ + file=$* + sed -n '/^#[^#]/p' $file > ${ROOTDIR}/tempp + sed -n '/^<>TARGET<>/,/^<>END<>/p' $file >> ${ROOTDIR}/tempp + sed -n '/^>>>/,/^<<> ${ROOTDIR}/tempp + sed -n '/^\^\^\^/,/^\$\$\$/p' $file >> ${ROOTDIR}/tempp + sed -i 's/^<>TARGET<>//;s/^<>END<>//;s/^>>>//;s/^<<END<>/"' $tempfile + ;; + *) + echo "Internal error" ; exit 1 + ;; + esac + fi + + if [[ $row == $TotalRow && headlevel > 0 ]]; then + merge $tempfile + continue + fi + + if [[ $line == "##"* ]]; then + headlevel=2 + line=`echo ${line:2} | tr '[A-Z]' '[a-z]' | grep -o "[^ ]\+\( \+[^ ]\+\)*"` + if [[ $line == "description" ]]; then + eval sed -i '"${row}s/description/description/i"' $tempfile + elif [[ $line == "examples" ]]; then + eval sed -i '"${row}s/examples/example/i"' $tempfile + elif [[ $line == "keywords" ]]; then + eval sed -i '"${row}s/keywords/keyword/i"' $tempfile + fi + matchKeyword ${line} + if [[ $? == 1 ]]; then + style="unmatch" + eval sed -i '"${row}s/^##/>>>/"' $tempfile + else + if [[ $line == "description" ]]; then + style="description" + eval sed -i '"${row}s/^/<>TARGET<>/"' $tempfile + continue + fi + style="match" + eval sed -i '"${row}s/^/\^\^\^/"' $tempfile + fi + elif [[ $line == "#"* ]]; then + if [[ headlevel == 0 ]]; then + headlevel=1 + continue + fi + headleve=1 + fi + fi + done < $tempfile + if [[ -f $tempfile ]]; then + rm $tempfile + fi + if [[ -f ${ROOTDIR}/tempp ]]; then + cp ${ROOTDIR}/tempp $* && rm ${ROOTDIR}/tempp + fi +} + +main() { + scandir $ROOTDIR +} + +main "$@" +exit 0 diff --git a/docs/script/split.sh b/docs/script/split.sh new file mode 100755 index 00000000000000..70f48251f666af --- /dev/null +++ b/docs/script/split.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#this shell splits topics in the same MD file +IFS=`echo -en "\n\b"` + +ROOTDIR=`dirname "$0"` +ROOTDIR=`cd "$ROOT"; pwd` + +scandir() { + for file in `ls $*`; do + if [[ ! -d $*"/"$file ]]; then + if [[ $file == *".md" ]]; then + splitfile $*"/"${file} + fi + else + scandir $*"/"${file} + fi + done +} + +splitfile() { + local file=$* + local filedir=${file%/*} + local evalfile=`echo $file | sed 's/[ \(\)]/\\\&/g'` + + local row=0 + local split=1 + local name= + local TotalRow=`wc -l $file | awk '{print $1}'` + local TopicNum=`grep -o '^#[^#].*' $file | wc -l` + if [ $TopicNum -lt 2 ]; then + return + fi + while read line; do + ((row++)) + + if [[ $row == $TotalRow || $line =~ ^#[^#].* ]]; then + if [[ -n $name && $split != $row ]]; then + eval awk '"NR==${split},NR==$(($row==$TotalRow?row:row-1))"' ${evalfile} > ${ROOTDIR}/tempp + cp ${ROOTDIR}/tempp ${filedir}/${name}.md + fi + name=`echo $line | grep -o "[^# ]\+\( \+[^ ]\+\)*"` + split=$row + fi + done < $file + if [[ -f ${ROOTDIR}/tempp ]]; then + rm ${ROOTDIR}/tempp + fi + rm $file +} + +main() { + scandir $ROOTDIR +} + +main "$@" +exit 0 From 71614ac60eb1dd56281740f85a7121d049e5d9c4 Mon Sep 17 00:00:00 2001 From: xy720 Date: Mon, 5 Aug 2019 17:24:44 +0800 Subject: [PATCH 2/9] Help document collation (integration of help and documentation documents) --- docs/script/split.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/script/split.sh b/docs/script/split.sh index 70f48251f666af..52764749936377 100755 --- a/docs/script/split.sh +++ b/docs/script/split.sh @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. -#this shell splits topics in the same MD file +#This shell splits topics in the same MD file IFS=`echo -en "\n\b"` ROOTDIR=`dirname "$0"` From b3cfeb4c6a54b87dddd0aec4c19178bbce6171b3 Mon Sep 17 00:00:00 2001 From: xy720 Date: Tue, 6 Aug 2019 02:59:43 +0800 Subject: [PATCH 3/9] use space instead of tab --- docs/Makefile | 9 +- .../aggregate-functions/group_concat.md | 21 -- .../cn/sql-reference/sql-functions/cast.md | 6 +- .../Account Management/help.md | 0 .../Administration/admin_repair_stmt.md | 0 .../Administration/admin_show_stmt.md | 0 .../Administration/admin_stmt.md | 0 .../Administration/small_files.md | 0 .../{ => Data Definition}/create-function.md | 0 .../Data Definition/ddl_stmt.md | 41 --- .../{ => Data Definition}/drop-function.md | 0 .../{ => Data Definition}/show-function.md | 0 .../Data Manipulation/broker_load.md | 0 .../{ => Data Manipulation}/insert.md | 2 + .../Data Manipulation/manipulation_stmt.md | 0 .../Data Manipulation/routine_load.md | 2 +- .../Data Manipulation/streaming.md | 0 .../sql-statements}/Data Types/help.md | 0 .../sql-statements}/Utility/util_stmt.md | 26 +- .../Functions/Date and Time Functions/help.md | 310 ------------------ docs/script/keyword.sh | 68 ++++ docs/script/merge.sh | 164 +++++++++ docs/script/split.sh | 73 +++++ 23 files changed, 331 insertions(+), 391 deletions(-) delete mode 100755 docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/group_concat.md rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Account Management/help.md (100%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Administration/admin_repair_stmt.md (100%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Administration/admin_show_stmt.md (100%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Administration/admin_stmt.md (100%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Administration/small_files.md (100%) rename docs/documentation/cn/sql-reference/sql-statements/{ => Data Definition}/create-function.md (100%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Data Definition/ddl_stmt.md (97%) rename docs/documentation/cn/sql-reference/sql-statements/{ => Data Definition}/drop-function.md (100%) rename docs/documentation/cn/sql-reference/sql-statements/{ => Data Definition}/show-function.md (100%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Data Manipulation/broker_load.md (100%) rename docs/documentation/cn/sql-reference/sql-statements/{ => Data Manipulation}/insert.md (99%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Data Manipulation/manipulation_stmt.md (100%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Data Manipulation/routine_load.md (99%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Data Manipulation/streaming.md (100%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Data Types/help.md (100%) rename docs/{help/Contents => documentation/cn/sql-reference/sql-statements}/Utility/util_stmt.md (81%) delete mode 100644 docs/help/Contents/Functions/Date and Time Functions/help.md create mode 100755 docs/script/keyword.sh create mode 100755 docs/script/merge.sh create mode 100755 docs/script/split.sh diff --git a/docs/Makefile b/docs/Makefile index d337c2091bdeeb..bad7301708cc65 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -20,17 +20,22 @@ # BUILD_DIR = ${CURDIR}/build +HELP_DIR = ${CURDIR}/contents all: zip_help .PHONY: all ${BUILD_DIR}: mkdir -p ${BUILD_DIR} +${HELP_DIR}: + mkdir -p ${HELP_DIR} # build help zip HELP_OUTPUT = ${BUILD_DIR}/help-resource.zip -${HELP_OUTPUT}: help | ${BUILD_DIR} - zip -r $@ $< +${HELP_OUTPUT}: documentation/cn/sql-reference ${BUILD_DIR} ${HELP_DIR} + cp -r $ select group_concat(`query_id`) from log_statis group by datetime; -+---------------------------------------------------------------------------------------------------------+ -| group_concat(`query_id`) | -+---------------------------------------------------------------------------------------------------------+ -| 445f5875f8854dfa:b9358d5cd86488a2, d5055534e6c04fa0:9b3f645913c4973c, a55801d0764d47fe:a98f3710649dc558 | -+------------------------------------------------------------------------------------------------ --------+ -``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/cast.md b/docs/documentation/cn/sql-reference/sql-functions/cast.md index b9898a2f710693..3d07805522a28f 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/cast.md +++ b/docs/documentation/cn/sql-reference/sql-functions/cast.md @@ -12,15 +12,15 @@ cast (input as type) ## BIGINT type -### Syntax +## Syntax ``` cast (input as BIGINT) ``` -### Description +## Description 将当前列 input 转换为 BIGINT 类型 -### Examples +## Examples 1. 转常量,或表中某列 diff --git a/docs/help/Contents/Account Management/help.md b/docs/documentation/cn/sql-reference/sql-statements/Account Management/help.md similarity index 100% rename from docs/help/Contents/Account Management/help.md rename to docs/documentation/cn/sql-reference/sql-statements/Account Management/help.md diff --git a/docs/help/Contents/Administration/admin_repair_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/admin_repair_stmt.md similarity index 100% rename from docs/help/Contents/Administration/admin_repair_stmt.md rename to docs/documentation/cn/sql-reference/sql-statements/Administration/admin_repair_stmt.md diff --git a/docs/help/Contents/Administration/admin_show_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/admin_show_stmt.md similarity index 100% rename from docs/help/Contents/Administration/admin_show_stmt.md rename to docs/documentation/cn/sql-reference/sql-statements/Administration/admin_show_stmt.md diff --git a/docs/help/Contents/Administration/admin_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/admin_stmt.md similarity index 100% rename from docs/help/Contents/Administration/admin_stmt.md rename to docs/documentation/cn/sql-reference/sql-statements/Administration/admin_stmt.md diff --git a/docs/help/Contents/Administration/small_files.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/small_files.md similarity index 100% rename from docs/help/Contents/Administration/small_files.md rename to docs/documentation/cn/sql-reference/sql-statements/Administration/small_files.md diff --git a/docs/documentation/cn/sql-reference/sql-statements/create-function.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/create-function.md similarity index 100% rename from docs/documentation/cn/sql-reference/sql-statements/create-function.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Definition/create-function.md diff --git a/docs/help/Contents/Data Definition/ddl_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/ddl_stmt.md similarity index 97% rename from docs/help/Contents/Data Definition/ddl_stmt.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Definition/ddl_stmt.md index 147511f1e329f1..b0bb79ecd0d7cb 100644 --- a/docs/help/Contents/Data Definition/ddl_stmt.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/ddl_stmt.md @@ -1110,44 +1110,3 @@ ## keyword COLOCATE, JOIN, CREATE TABLE - -# CREATE FUNCTION -## description - Used to create a UDF/UDAF/UDTF - Syntax: - CREATE [AGGREGATE] FUNCTION funcName (argType [, ...]) - RETURNS retType - PROPERTIES ( - k1=v1 [, k2=v2] - ) - - valid PROPERTIES: - "symbol": UDF's symbol, which Doris call this symbol's function to execute. MUST BE SET - "object_file": UDF library's URL, Doris use it to download library. MUST BE SET - "md5": when this property is set, Doris will check library's md5um against this value. This is a option - -## example - 1. create a function "my_func", receive two int and return one int - CREATE FUNCTION my_func (int, int) RETURNS int - PROPERTIES ("symbol"="my_func_symbol", "object_file"="http://127.0.0.1/my_func.so") - 2. create a variadic function "my_func" - CREATE FUNCTION my_func (int, ...) RETURNS int - PROPERTIES ("symbol"="my_func_symbol", "object_file"="http://127.0.0.1/my_func.so") - -## keyword - CREATE, FUNCTION - -# DROP FUNCTION -## description - Used to drop a UDF/UDAF/UDTF - Syntax: - DROP FUNCTION funcName (argType [, ...]) - -## example - 1. drop a UDF whose name is my_func - DROP FUNCTION my_func (int, int) - 2. drop a variadic function - DROP FUNCTION my_func (int, ...) - -## keyword - DROP, FUNCTION diff --git a/docs/documentation/cn/sql-reference/sql-statements/drop-function.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/drop-function.md similarity index 100% rename from docs/documentation/cn/sql-reference/sql-statements/drop-function.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Definition/drop-function.md diff --git a/docs/documentation/cn/sql-reference/sql-statements/show-function.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/show-function.md similarity index 100% rename from docs/documentation/cn/sql-reference/sql-statements/show-function.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Definition/show-function.md diff --git a/docs/help/Contents/Data Manipulation/broker_load.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/broker_load.md similarity index 100% rename from docs/help/Contents/Data Manipulation/broker_load.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/broker_load.md diff --git a/docs/documentation/cn/sql-reference/sql-statements/insert.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md similarity index 99% rename from docs/documentation/cn/sql-reference/sql-statements/insert.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md index 6f4cf4b24d4f20..d571410ebf459a 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/insert.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md @@ -10,6 +10,8 @@ INSERT INTO table_name { VALUES ( { expression | DEFAULT } [, ...] ) [, ...] | query } ``` +## description + ## Parameters > tablet_name: 导入数据的目的表。可以是 `db_name.table_name` 形式 diff --git a/docs/help/Contents/Data Manipulation/manipulation_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/manipulation_stmt.md similarity index 100% rename from docs/help/Contents/Data Manipulation/manipulation_stmt.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/manipulation_stmt.md diff --git a/docs/help/Contents/Data Manipulation/routine_load.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/routine_load.md similarity index 99% rename from docs/help/Contents/Data Manipulation/routine_load.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/routine_load.md index 9dcd7d687cc651..4fd4568eddaa47 100644 --- a/docs/help/Contents/Data Manipulation/routine_load.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/routine_load.md @@ -439,5 +439,5 @@ SHOW ROUTINE LOAD TASK WHERE JobName = "test1"; -# keyword +## keyword SHOW,ROUTINE,LOAD,TASK diff --git a/docs/help/Contents/Data Manipulation/streaming.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/streaming.md similarity index 100% rename from docs/help/Contents/Data Manipulation/streaming.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/streaming.md diff --git a/docs/help/Contents/Data Types/help.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/help.md similarity index 100% rename from docs/help/Contents/Data Types/help.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Types/help.md diff --git a/docs/help/Contents/Utility/util_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Utility/util_stmt.md similarity index 81% rename from docs/help/Contents/Utility/util_stmt.md rename to docs/documentation/cn/sql-reference/sql-statements/Utility/util_stmt.md index 2fbafe8295f9b0..146a4b68974806 100644 --- a/docs/help/Contents/Utility/util_stmt.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Utility/util_stmt.md @@ -1,13 +1,13 @@ -# DESCRIBE -## description - 该语句用于展示指定 table 的 schema 信息 - 语法: - DESC[RIBE] [db_name.]table_name [ALL]; - - 说明: - 如果指定 ALL,则显示该 table 的所有 index 的 schema - -## example - -## keyword - DESCRIBE,DESC \ No newline at end of file +# DESCRIBE +## description + 该语句用于展示指定 table 的 schema 信息 + 语法: + DESC[RIBE] [db_name.]table_name [ALL]; + + 说明: + 如果指定 ALL,则显示该 table 的所有 index 的 schema + +## example + +## keyword + DESCRIBE,DESC diff --git a/docs/help/Contents/Functions/Date and Time Functions/help.md b/docs/help/Contents/Functions/Date and Time Functions/help.md deleted file mode 100644 index dedb8ca2616e91..00000000000000 --- a/docs/help/Contents/Functions/Date and Time Functions/help.md +++ /dev/null @@ -1,310 +0,0 @@ -# unix_timestamp -## description -Syntax: -UNIX_TIMESTAMP(), UNIX_TIMESTAMP(date) - -将Date或者Datetime类型转化为unix时间戳 -如果没有参数,则是将当前的时间转化为时间戳 -参数需要是Date或者Datetime类型 - -## example -mysql> SELECT UNIX_TIMESTAMP(); - -> 1196440210 -mysql> SELECT UNIX_TIMESTAMP('2007-11-30 10:30:19'); - -> 1196418619 - -# from_unixtime -## description -Syntax: - FROM_UNIXTIME(int unix_timestamp[, string string_format]) - -将unix时间戳转化位对应的time格式,返回的格式由string_format指定 -默认为yyyy-MM-dd HH:mm:ss -传入的是整形,返回的是字符串类型 -目前string_format只支持两种类型的格式:yyyy-MM-dd,yyyy-MM-dd HH:mm:ss -其余string_format格式是非法的,返回NULL - -## example -mysql> SELECT FROM_UNIXTIME(1196440219); - -> '2007-12-01 00:30:19' - -mysql> SELECT FROM_UNIXTIME(1196440219, 'yyyy-MM-dd'); - -> '2007-12-01' - -mysql> SELECT FROM_UNIXTIME(1196440219, 'yyyy-MM-dd HH:mm:ss'); - -> '2007-12-01 00:30:19' - -# year -## description -Syntax: -YEAR(date) - -返回date类型的year部分,范围从1000-9999 -参数为Date或者Datetime类型 -## example -mysql> SELECT YEAR('1987-01-01'); - -> 1987 - -# month -## description -Syntax: -MONTH(date) - -返回时间类型中的月份信息,范围是1, 12 -参数为Date或者Datetime类型 - -## example -mysql> SELECT MONTH('1987-01-02'); - -> 01 - -# day -## description -Syntax: -DAY(date) - -与DAYOFMONTH是同义词,请`help dayofmonth` - -# dayofmonth -## description -Syntax: -DAYOFMONTH(date) - -获得日期中的天信息,返回值范围从1-31。 -需要传入date类型 - -## example -mysql> SELECT DAYOFMONTH('1987-01-02'); - -> 2 - -# dayofyear -## description -Syntax: -DAYOFYEAR(date) - -获得日期中对应当年中的哪一天。 -输入值为date类型 - -## example -mysql> SELECT DAYOFYEAR('2007-02-03'); - -> 34 - -# weekofyear -## description -Syntax: -WEEKOFYEAR(date) - -获得一年中的第几周 -输入值为date类型 - -## example -mysql> SELECT WEEKOFYEAR('2008-02-20'); - -> 8 - -# hour -## description -Syntax: -HOUR(date) - -获得时间中对应的小时信息 -这里Palo跟MySQL不太一样,因为MySQL是支持Time类型的 -Palo没有Time类型,所以输入的内容是Date或者Datetime。 - -## example -mysql> select hour("2000-01-02 12:34:56"); - -> 12 - -# minute -## description -Syntax: -MINUTE(date) - -获得日期中的分钟信息 -这里Palo跟MySQL不太一样,因为MySQL是支持Time类型的 -Palo没有Time类型,所以输入的内容是Date或者Datetime。 - -## example -mysql> SELECT MINUTE("2000-01-02 12:34:56"); - -> 34 - -# second -## description -Syntax: -SECOND(date) - -获得时间中的秒信息 -这里Palo跟MySQL不太一样,因为MySQL是支持Time类型的 -Palo没有Time类型,所以输入的内容是Date或者Datetime。 -## example -mysql> SELECT SECOND("2000-01-02 12:34:56"); - -> 56 - -# now -## description -Syntax: -NOW() - -获得当前的时间,以Datetime类型返回 - -## example -mysql> SELECT NOW(); - -> '2007-12-15 23:50:26' - -# current_timestamp -## description -Syntax: -CURRENT_TIMESTAMP() - -与NOW()是同义词 - -# datediff -## description -Syntax: -DATEDIFF(expr1,expr2) - -计算expr1 - expr2,结果精确到天。 -要求传入的两个值需要是datetime类型。 - -## example -mysql> SELECT DATEDIFF(CAST ('2007-12-31 23:59:59' AS DATETIME),CAST ('2007-12-30' AS DATETIME)); - -> 1 -mysql> SELECT DATEDIFF(CAST('2010-11-30 23:59:59' AS DATETIME), CAST('2010-12-31' AS DATETIME)); - -> -31 - -# date_add -## description -Syntax: -DATE_ADD(date,INTERVAL expr unit) - -对时间类型进行加法运算 -支持的time unit包括 -YEAR, MONTH, DAY, HOUR, MINUTE, SECOND - -## example -mysql> DATE_ADD(date,INTERVAL expr unit) - -> 1987-01-02 00:00:00 - -# date_sub -## description -Syntax: -DATE_SUB(date,INTERVAL expr unit) - -与DATE_ADD相反,对时间类型进行减法运算 -支持的time unit包括 -YEAR, MONTH, DAY, HOUR, MINUTE, SECOND - -## example -mysql> DATE_SUB(date,INTERVAL expr unit) - -> 1986-12-31 00:00:00 - -# date_format -## description -Syntax: -DATE_FORMAT(date, format) - -将日期类型按照format的类型转化位字符串, -当前支持最大128字节的字符串,如果返回值长度超过128,则返回NULL -format的含义如下: -%a Abbreviated weekday name (Sun..Sat) -%b Abbreviated month name (Jan..Dec) -%c Month, numeric (0..12) -%D Day of the month with English suffix (0th, 1st, 2nd, 3rd, …) -%d Day of the month, numeric (00..31) -%e Day of the month, numeric (0..31) -%f Microseconds (000000..999999) -%H Hour (00..23) -%h Hour (01..12) -%I Hour (01..12) -%i Minutes, numeric (00..59) -%j Day of year (001..366) -%k Hour (0..23) -%l Hour (1..12) -%M Month name (January..December) -%m Month, numeric (00..12) -%p AM or PM -%r Time, 12-hour (hh:mm:ss followed by AM or PM) -%S Seconds (00..59) -%s Seconds (00..59) -%T Time, 24-hour (hh:mm:ss) -%U Week (00..53), where Sunday is the first day of the week; WEEK() mode 0 -%u Week (00..53), where Monday is the first day of the week; WEEK() mode 1 -%V Week (01..53), where Sunday is the first day of the week; WEEK() mode 2; used with %X -%v Week (01..53), where Monday is the first day of the week; WEEK() mode 3; used with %x -%W Weekday name (Sunday..Saturday) -%w Day of the week (0=Sunday..6=Saturday) -%X Year for the week where Sunday is the first day of the week, numeric, four digits; used with %V -%x Year for the week, where Monday is the first day of the week, numeric, four digits; used with %v -%Y Year, numeric, four digits -%y Year, numeric (two digits) -%% A literal “%” character -%x x, for any “x” not listed above - -## example -mysql> SELECT DATE_FORMAT('2009-10-04 22:23:00', '%W %M %Y'); - -> 'Sunday October 2009' -mysql> SELECT DATE_FORMAT('2007-10-04 22:23:00', '%H:%i:%s'); - -> '22:23:00' -mysql> SELECT DATE_FORMAT('1900-10-04 22:23:00', '%D %y %a %d %m %b %j'); - -> '4th 00 Thu 04 10 Oct 277' -mysql> SELECT DATE_FORMAT('1997-10-04 22:23:00', '%H %k %I %r %T %S %w'); - -> '22 22 10 10:23:00 PM 22:23:00 00 6' -mysql> SELECT DATE_FORMAT('1999-01-01', '%X %V'); - -> '1998 52' -mysql> SELECT DATE_FORMAT('2006-06-01', '%d'); - -> '01' - -# from_days -## description -Syntax: -FROM_DAYS(N) -通过距离0000-01-01日的天数计算出哪一天 - -## example -mysql> SELECT FROM_DAYS(730669); - -> '2007-07-03' - -# to_days -## description -Syntax: -TO_DAYS(date) -返回date距离0000-01-01的天数 - -## example -mysql> SELECT TO_DAYS(950501); - -> 728779 -mysql> SELECT TO_DAYS('2007-10-07'); - -> 733321 - -# str_to_date -## description -Syntax: -STR_TO_DATE(str, format) -通过format指定的方式将str转化为DATE类型,如果转化结果不对返回NULL -支持的format格式与date_format一致 - -## example -mysql> SELECT STR_TO_DATE('2014-12-21 12:34:56', '%Y-%m-%d %H:%i:%s'); - -> 2014-12-21 12:34:56 -mysql> SELECT STR_TO_DATE('200442 Monday', '%X%V %W'); - -> 2004-10-18 - -# monthname -## description -Syntax: -MONTHNAME(DATE) - -返回日期对应的月份名字 - -## example -mysql> SELECT MONTHNAME('2008-02-03'); - -> 'February' - -# monthname -## description -Syntax: -MONTHNAME(DATE) - -返回日期对应的日期名字 - -## example -mysql> SELECT DAYNAME('2007-02-03'); - -> 'Saturday' diff --git a/docs/script/keyword.sh b/docs/script/keyword.sh new file mode 100755 index 00000000000000..ceaaa57673610c --- /dev/null +++ b/docs/script/keyword.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#this shell adds keywords to MD files without keywords + +IFS=`echo -en "\n\b"` + +ROOTDIR=`dirname "$0"` +ROOTDIR=`cd "$ROOT"; pwd` + +scandir() { + for file in `ls $*`; do + if [[ ! -d $*"/"$file ]]; then + if [[ $file == *".md" ]]; then + readfile $*"/"${file} + fi + else + scandir $*"/"${file} + fi + done +} + +readfile() { + local file=$* + local topic=`cat $file | grep "^#[^#].*" | grep -o "[^# ]\+\( \+[^ ]\+\)*"` + local keywordNum=`cat $file | grep "^##[^#]*keyword[ ]*$" | wc -l` + if [[ $keywordNum != 0 || -z $topic ]]; then + return + fi + local SAVEIFS=$IFS + IFS=' ' + local array=`echo $topic | tr '\`' ' ' | tr ',' ' '` + local keywords= + for keyword in ${array[*]}; do + keywords=$keywords","$keyword + done + array=`echo $array | tr '_' ' '` + for keyword in ${array[*]}; do + keywords=$keywords","$keyword + done + keywords=`echo ${keywords:1} | tr 'a-z' 'A-Z'` + IFS=$SAVEIFS + file=`echo $file | sed 's/[ \(\)]/\\\&/g'` + eval sed -i '"\$a ##keyword"' $file + eval sed -i '"\$a ${keywords}"' $file +} + +main() { + scandir $ROOTDIR +} + +main "$@" +exit 0 diff --git a/docs/script/merge.sh b/docs/script/merge.sh new file mode 100755 index 00000000000000..5d252c7c697b77 --- /dev/null +++ b/docs/script/merge.sh @@ -0,0 +1,164 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#run split.h first +#this shell merges user-defined heads in the MD file into "description" head +################ ################# +# #topic # # #topic # +# # # # +# ##syntax # # ##descrption # +# somelines # # somelines # +# # # # +# ##descrption # # syntax # +# somelines # ==> # somelines # +# # # # +# ##example # # parameter # +# somelines # # somelines # +# # # # +# ##parameter # # ##example # +# somelines # # somelines # +# # # # +################ ################# + +IFS=`echo -en "\n\b"` + +ROOTDIR=`dirname "$0"` +ROOTDIR=`cd "$ROOT"; pwd` + +keywords=" +examples +example +description +keywords +keyword +url +" + +matchKeyword(){ + for keyword in ${keywords[*]}; do + if [[ "$1" == $keyword ]]; then + return 0 + fi + done + return 1 +} + +merge(){ + file=$* + sed -n '/^#[^#]/p' $file > ${ROOTDIR}/tempp + sed -n '/^<>TARGET<>/,/^<>END<>/p' $file >> ${ROOTDIR}/tempp + sed -n '/^>>>/,/^<<> ${ROOTDIR}/tempp + sed -n '/^\^\^\^/,/^\$\$\$/p' $file >> ${ROOTDIR}/tempp + sed -i 's/^<>TARGET<>//;s/^<>END<>//;s/^>>>//;s/^<<END<>/"' $tempfile + ;; + *) + echo "Internal error" ; exit 1 + ;; + esac + fi + + if [[ $row == $TotalRow && headlevel > 0 ]]; then + merge $tempfile + continue + fi + + if [[ $line == "##"* ]]; then + headlevel=2 + line=`echo ${line:2} | tr '[A-Z]' '[a-z]' | grep -o "[^ ]\+\( \+[^ ]\+\)*"` + if [[ $line == "description" ]]; then + eval sed -i '"${row}s/description/description/i"' $tempfile + elif [[ $line == "examples" ]]; then + eval sed -i '"${row}s/examples/example/i"' $tempfile + elif [[ $line == "keywords" ]]; then + eval sed -i '"${row}s/keywords/keyword/i"' $tempfile + fi + matchKeyword ${line} + if [[ $? == 1 ]]; then + style="unmatch" + eval sed -i '"${row}s/^##/>>>/"' $tempfile + else + if [[ $line == "description" ]]; then + style="description" + eval sed -i '"${row}s/^/<>TARGET<>/"' $tempfile + continue + fi + style="match" + eval sed -i '"${row}s/^/\^\^\^/"' $tempfile + fi + elif [[ $line == "#"* ]]; then + if [[ headlevel == 0 ]]; then + headlevel=1 + continue + fi + headleve=1 + fi + fi + done < $tempfile + if [[ -f $tempfile ]]; then + rm $tempfile + fi + if [[ -f ${ROOTDIR}/tempp ]]; then + cp ${ROOTDIR}/tempp $* && rm ${ROOTDIR}/tempp + fi +} + +main() { + scandir $ROOTDIR +} + +main "$@" +exit 0 diff --git a/docs/script/split.sh b/docs/script/split.sh new file mode 100755 index 00000000000000..18c7a6ab997f93 --- /dev/null +++ b/docs/script/split.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#this shell splits topics in the same MD file +IFS=`echo -en "\n\b"` + +ROOTDIR=`dirname "$0"` +ROOTDIR=`cd "$ROOT"; pwd` + +scandir() { + for file in `ls $*`; do + if [[ ! -d $*"/"$file ]]; then + if [[ $file == *".md" ]]; then + splitfile $*"/"${file} + fi + else + scandir $*"/"${file} + fi + done +} + +splitfile() { + local file=$* + local filedir=${file%/*} + local evalfile=`echo $file | sed 's/[ \(\)]/\\\&/g'` + + local row=0 + local split=1 + local name= + local TotalRow=`wc -l $file | awk '{print $1}'` + local TopicNum=`grep -o '^#[^#].*' $file | wc -l` + if [ $TopicNum -lt 2 ]; then + return + fi + while read line; do + ((row++)) + + if [[ $row == $TotalRow || $line =~ ^#[^#].* ]]; then + if [[ -n $name && $split != $row ]]; then + eval awk '"NR==${split},NR==$(($row==$TotalRow?row:row-1))"' ${evalfile} > ${ROOTDIR}/tempp + cp ${ROOTDIR}/tempp ${filedir}/${name}.md + fi + name=`echo $line | grep -o "[^# ]\+\( \+[^ ]\+\)*"` + split=$row + fi + done < $file + if [[ -f ${ROOTDIR}/tempp ]]; then + rm ${ROOTDIR}/tempp + fi + rm $file +} + +main() { + scandir $ROOTDIR +} + +main "$@" +exit 0 From d0d3b3a937ab44798abbbc3806268810b67623eb Mon Sep 17 00:00:00 2001 From: xy720 Date: Tue, 6 Aug 2019 10:58:08 +0800 Subject: [PATCH 4/9] Change directly without scripting --- docs/Makefile | 3 +- .../sql-functions/aggregate-functions/avg.md | 15 +- .../aggregate-functions/count.md | 13 +- .../aggregate-functions/count_distinct.md | 13 +- .../aggregate-functions/hll_union_agg.md | 15 +- .../sql-functions/aggregate-functions/max.md | 13 +- .../sql-functions/aggregate-functions/min.md | 13 +- .../sql-functions/aggregate-functions/ndv.md | 15 +- .../aggregate-functions/percentile_approx.md | 16 +- .../aggregate-functions/stddev.md | 13 +- .../aggregate-functions/stddev_samp.md | 13 +- .../sql-functions/aggregate-functions/sum.md | 13 +- .../aggregate-functions/var_samp.md | 14 +- .../aggregate-functions/variance.md | 14 +- .../cn/sql-reference/sql-functions/cast.md | 25 +- .../date-time-functions/current_timestamp.md | 14 +- .../date-time-functions/date_add.md | 16 +- .../date-time-functions/date_format.md | 15 +- .../date-time-functions/date_sub.md | 16 +- .../date-time-functions/datediff.md | 15 +- .../sql-functions/date-time-functions/day.md | 16 +- .../date-time-functions/dayname.md | 16 +- .../date-time-functions/dayofmonth.md | 16 +- .../date-time-functions/dayofweek.md | 16 +- .../date-time-functions/dayofyear.md | 16 +- .../date-time-functions/from_days.md | 14 +- .../date-time-functions/from_unixtime.md | 16 +- .../date-time-functions/month.md | 16 +- .../date-time-functions/monthname.md | 16 +- .../sql-functions/date-time-functions/now.md | 14 +- .../date-time-functions/str_to_date.md | 16 +- .../date-time-functions/timediff.md | 15 +- .../date-time-functions/to_days.md | 16 +- .../date-time-functions/unix_timestamp.md | 16 +- .../date-time-functions/utc_timestamp.md | 16 +- .../date-time-functions/workofyear.md | 16 +- .../sql-functions/date-time-functions/year.md | 16 +- .../spatial-functions/st_astext.md | 13 +- .../spatial-functions/st_circle.md | 15 +- .../spatial-functions/st_contains.md | 13 +- .../spatial-functions/st_distance_sphere.md | 13 +- .../spatial-functions/st_geometryfromtext.md | 13 +- .../spatial-functions/st_linefromtext.md | 13 +- .../spatial-functions/st_point.md | 15 +- .../spatial-functions/st_polygon.md | 13 +- .../sql-functions/spatial-functions/st_x.md | 13 +- .../sql-functions/spatial-functions/st_y.md | 13 +- .../sql-functions/string-functions/ascii.md | 13 +- .../sql-functions/string-functions/concat.md | 13 +- .../string-functions/concat_ws.md | 15 +- .../string-functions/find_in_set.md | 13 +- .../string-functions/get_json_double.md | 15 +- .../string-functions/get_json_int.md | 15 +- .../string-functions/get_json_string.md | 15 +- .../string-functions/group_concat.md | 13 +- .../sql-functions/string-functions/instr.md | 13 +- .../sql-functions/string-functions/lcase.md | 11 +- .../sql-functions/string-functions/left.md | 13 +- .../sql-functions/string-functions/length.md | 13 +- .../sql-functions/string-functions/locate.md | 13 +- .../sql-functions/string-functions/lower.md | 13 +- .../sql-functions/string-functions/lpad.md | 13 +- .../sql-functions/string-functions/ltrim.md | 13 +- .../string-functions/money_format.md | 13 +- .../string-functions/regexp_extract.md | 13 +- .../string-functions/regexp_replace.md | 13 +- .../sql-functions/string-functions/repeat.md | 13 +- .../sql-functions/string-functions/right.md | 13 +- .../string-functions/split_part.md | 13 +- .../sql-functions/string-functions/strleft.md | 13 +- .../string-functions/strright.md | 13 +- .../Account Management/CREATE ROLE.md | 19 + .../Account Management/CREATE USER.md | 49 + .../Account Management/DROP ROLE.md | 18 + .../Account Management/DROP USER.md | 18 + .../Account Management/GRANT.md | 55 + .../Account Management/REVOKE.md | 23 + .../Account Management/SET PASSWORD.md | 30 + .../Account Management/SET PROPERTY.md | 56 + .../Account Management/SHOW GRANTS.md | 30 + .../Account Management/SHOW ROLES.md | 33 + .../sql-statements/Account Management/help.md | 335 ----- .../Administration/ADMIN CANCEL REPAIR.md | 22 + .../{admin_repair_stmt.md => ADMIN REPAIR.md} | 22 - .../Administration/ADMIN SET CONFIG.md | 18 + .../Administration/ADMIN SHOW CONFIG.md | 27 + .../ADMIN SHOW REPLICA DISTRIBUTION.md | 26 + .../ADMIN SHOW REPLICA STATUS.md | 39 + .../Administration/ALTER CLUSTER.md | 28 + .../Administration/ALTER SYSTEM.md | 94 ++ .../Administration/CANCEL DECOMMISSION.md | 15 + .../Administration/CREATE CLUSTER.md | 36 + .../{small_files.md => CREATE FILE.md} | 52 - .../Administration/DROP CLUSTER.md | 18 + .../Administration/DROP FILE.md | 25 + .../sql-statements/Administration/ENTER.md | 18 + .../Administration/LINK DATABASE.md | 24 + .../Administration/MIGRATE DATABASE.md | 20 + .../Administration/SHOW BACKENDS.md | 22 + .../Administration/SHOW BROKER.md | 15 + .../Administration/SHOW FILE.md | 27 + .../Administration/SHOW FRONTENDS.md | 18 + .../Administration/SHOW MIGRATIONS.md | 12 + .../Administration/admin_show_stmt.md | 92 -- .../Administration/admin_stmt.md | 338 ----- .../Data Definition/ALTER DATABASE.md | 29 + .../Data Definition/ALTER TABLE.md | 240 ++++ .../sql-statements/Data Definition/BACKUP.md | 39 + .../Data Definition/CANCEL ALTER.md | 32 + .../Data Definition/CANCEL BACKUP.md | 13 + .../Data Definition/CANCEL RESTORE.md | 16 + .../Data Definition/CREATE DATABASE.md | 13 + .../Data Definition/CREATE REPOSITORY.md | 50 + .../Data Definition/CREATE TABLE.md | 334 +++++ .../Data Definition/CREATE VIEW.md | 22 + .../Data Definition/Colocate Join.md | 72 ++ .../Data Definition/DROP DATABASE.md | 16 + .../Data Definition/DROP REPOSITORY.md | 16 + .../Data Definition/DROP TABLE.md | 19 + .../Data Definition/DROP VIEW.md | 14 + .../sql-statements/Data Definition/HLL.md | 80 ++ .../sql-statements/Data Definition/RECOVER.md | 28 + .../sql-statements/Data Definition/RESTORE.md | 52 + .../Data Definition/TRUNCATE TABLE.md | 27 + .../Data Definition/create-function.md | 19 +- .../Data Definition/ddl_stmt.md | 1112 ----------------- .../Data Definition/drop-function.md | 15 +- .../Data Definition/show-function.md | 19 +- .../Data Manipulation/CANCEL DELETE.md | 10 + .../Data Manipulation/CANCEL LABEL.md | 33 + .../Data Manipulation/CANCEL LOAD.md | 20 + .../Data Manipulation/DELETE.md | 36 + .../Data Manipulation/EXPORT.md | 58 + .../Data Manipulation/GET LABEL STATE.md | 33 + .../sql-statements/Data Manipulation/LOAD.md | 284 +++++ .../Data Manipulation/MINI LOAD.md | 104 ++ .../Data Manipulation/MULTI LOAD.md | 82 ++ .../Data Manipulation/PAUSE ROUTINE LOAD.md | 10 + .../Data Manipulation/RESTORE TABLET.md | 15 + .../Data Manipulation/RESUME ROUTINE LOAD.md | 10 + .../{routine_load.md => ROUTINE LOAD.md} | 194 --- .../Data Manipulation/SHOW ALTER.md | 25 + .../Data Manipulation/SHOW BACKUP.md | 37 + .../Data Manipulation/SHOW DATA.md | 21 + .../Data Manipulation/SHOW DATABASES.md | 9 + .../Data Manipulation/SHOW DELETE.md | 13 + .../Data Manipulation/SHOW EXPORT.md | 36 + .../Data Manipulation/SHOW LOAD.md | 49 + .../Data Manipulation/SHOW PARTITIONS.md | 16 + .../Data Manipulation/SHOW PROPERTY.md | 16 + .../Data Manipulation/SHOW REPOSITORIES.md | 23 + .../Data Manipulation/SHOW RESTORE.md | 41 + .../SHOW ROUTINE LOAD TASK.md | 9 + .../Data Manipulation/SHOW ROUTINE LOAD.md | 32 + .../Data Manipulation/SHOW SNAPSHOT.md | 31 + .../Data Manipulation/SHOW TABLES.md | 9 + .../Data Manipulation/SHOW TABLET.md | 17 + .../Data Manipulation/STOP ROUTINE LOAD.md | 10 + .../{streaming.md => STREAM LOAD.md} | 66 - .../Data Manipulation/insert.md | 13 +- .../Data Manipulation/manipulation_stmt.md | 950 -------------- .../sql-statements/Data Types/BIGINT.md | 7 + .../sql-statements/Data Types/BOOLEAN.md | 7 + .../sql-statements/Data Types/CHAR.md | 7 + .../sql-statements/Data Types/DATE.md | 15 + .../sql-statements/Data Types/DATETIME.md | 8 + .../sql-statements/Data Types/DECIMAL.md | 8 + .../sql-statements/Data Types/DOUBLE.md | 7 + .../sql-statements/Data Types/FLOAT.md | 7 + .../Data Types/HLL(HyperLogLog).md | 9 + .../sql-statements/Data Types/INT.md | 7 + .../sql-statements/Data Types/SMALLINT.md | 7 + .../sql-statements/Data Types/TINYINT.md | 7 + .../sql-statements/Data Types/VARCHAR.md | 7 + .../sql-statements/Data Types/help.md | 77 -- docs/script/keyword.sh | 68 - docs/script/merge.sh | 164 --- docs/script/split.sh | 73 -- 178 files changed, 3699 insertions(+), 4052 deletions(-) create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Account Management/CREATE ROLE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Account Management/CREATE USER.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Account Management/DROP ROLE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Account Management/DROP USER.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Account Management/GRANT.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Account Management/REVOKE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Account Management/SET PASSWORD.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Account Management/SET PROPERTY.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Account Management/SHOW GRANTS.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Account Management/SHOW ROLES.md delete mode 100644 docs/documentation/cn/sql-reference/sql-statements/Account Management/help.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR.md rename docs/documentation/cn/sql-reference/sql-statements/Administration/{admin_repair_stmt.md => ADMIN REPAIR.md} (60%) create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN SET CONFIG.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/ALTER CLUSTER.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/ALTER SYSTEM.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/CREATE CLUSTER.md rename docs/documentation/cn/sql-reference/sql-statements/Administration/{small_files.md => CREATE FILE.md} (63%) create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/DROP CLUSTER.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/DROP FILE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/ENTER.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/LINK DATABASE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/MIGRATE DATABASE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW BACKENDS.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW BROKER.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW FILE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW FRONTENDS.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW MIGRATIONS.md delete mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/admin_show_stmt.md delete mode 100644 docs/documentation/cn/sql-reference/sql-statements/Administration/admin_stmt.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/ALTER DATABASE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/ALTER TABLE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/BACKUP.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/CANCEL ALTER.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/CANCEL BACKUP.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/CANCEL RESTORE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE DATABASE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE TABLE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE VIEW.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/Colocate Join.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/DROP DATABASE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/DROP REPOSITORY.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/DROP TABLE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/DROP VIEW.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/HLL.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/RECOVER.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/RESTORE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE.md delete mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Definition/ddl_stmt.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/CANCEL DELETE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/CANCEL LABEL.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/DELETE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/EXPORT.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/GET LABEL STATE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/LOAD.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/MINI LOAD.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/MULTI LOAD.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/RESTORE TABLET.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD.md rename docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/{routine_load.md => ROUTINE LOAD.md} (62%) create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW ALTER.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW DATA.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW DELETE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW LOAD.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW TABLES.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW TABLET.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD.md rename docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/{streaming.md => STREAM LOAD.md} (78%) delete mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/manipulation_stmt.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Types/BIGINT.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Types/BOOLEAN.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Types/CHAR.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Types/DATE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Types/DATETIME.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Types/DECIMAL.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Types/DOUBLE.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Types/FLOAT.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Types/HLL(HyperLogLog).md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Types/INT.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Types/SMALLINT.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Types/TINYINT.md create mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Types/VARCHAR.md delete mode 100644 docs/documentation/cn/sql-reference/sql-statements/Data Types/help.md delete mode 100755 docs/script/keyword.sh delete mode 100755 docs/script/merge.sh delete mode 100755 docs/script/split.sh diff --git a/docs/Makefile b/docs/Makefile index bad7301708cc65..a51e1336b6c279 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -33,8 +33,7 @@ ${HELP_DIR}: # build help zip HELP_OUTPUT = ${BUILD_DIR}/help-resource.zip ${HELP_OUTPUT}: documentation/cn/sql-reference ${BUILD_DIR} ${HELP_DIR} - cp -r $ SELECT datetime, AVG(cost_time) FROM log_statis group by datetime; @@ -28,3 +27,5 @@ mysql> SELECT datetime, AVG(distinct cost_time) FROM log_statis group by datetim +---------------------+---------------------------+ ``` +##keyword +AVG,AVG diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count.md index 507ac1c649b8fb..39562a563b6710 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count.md @@ -1,14 +1,13 @@ # COUNT +## description -## Syntax - -`COUNT([DISTINCT] expr)` +用于返回满足要求的行的数目 -## Description + Syntax -用于返回满足要求的行的数目 +`COUNT([DISTINCT] expr)` -## Examples +## example ``` MySQL > select count(*) from log_statis group by datetime; @@ -32,3 +31,5 @@ MySQL > select count(distinct datetime) from log_statis group by datetime; | 71045 | +-------------------------------+ ``` +##keyword +COUNT,COUNT diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count_distinct.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count_distinct.md index ecaee4400eafd4..b666220ca70185 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count_distinct.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count_distinct.md @@ -1,14 +1,13 @@ # COUNT_DISTINCT +## description -## Syntax - -`COUNT_DISTINCT(expr)` +用于返回满足要求的行的数目,或者非NULL行的数目 -## Description + Syntax -用于返回满足要求的行的数目,或者非NULL行的数目 +`COUNT_DISTINCT(expr)` -## Examples +## example ``` MySQL > select count_distinct(query_id) from log_statis group by datetime; @@ -18,3 +17,5 @@ MySQL > select count_distinct(query_id) from log_statis group by datetime; | 577 | +----------------------------+ ``` +##keyword +COUNT_DISTINCT,COUNT,DISTINCT diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md index 69253c8c7289a0..06d27b3ecab07f 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md @@ -1,10 +1,5 @@ # HLL_UNION_AGG - -## Syntax - -`HLL_UNION_AGG(hll)` - -## Description +## description HLL是基于HyperLogLog算法的工程实现,用于保存HyperLogLog计算过程的中间结果 @@ -14,7 +9,11 @@ HLL是基于HyperLogLog算法的工程实现,用于保存HyperLogLog计算过 导入的时候通过hll_hash函数来指定数据中哪一列用于生成hll列,它常用于替代count distinct,通过结合rollup在业务上用于快速计算uv等 -## Examples + Syntax + +`HLL_UNION_AGG(hll)` + +## example ``` MySQL > select HLL_UNION_AGG(uv_set) from test_uv;; +-------------------------+ @@ -23,3 +22,5 @@ MySQL > select HLL_UNION_AGG(uv_set) from test_uv;; | 17721 | +-------------------------+ ``` +##keyword +HLL_UNION_AGG,HLL,UNION,AGG diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/max.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/max.md index d4ca2506471650..0cb3acbfad2ebf 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/max.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/max.md @@ -1,14 +1,13 @@ # MAX +## description -## Syntax - -`MAX(expr)` +返回expr表达式的最大值 -## Description + Syntax -返回expr表达式的最大值 +`MAX(expr)` -## Examples +## example ``` MySQL > select max(scan_rows) from log_statis group by datetime; +------------------+ @@ -17,3 +16,5 @@ MySQL > select max(scan_rows) from log_statis group by datetime; | 4671587 | +------------------+ ``` +##keyword +MAX,MAX diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/min.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/min.md index e6c8f18516e4e3..275d9432fa08c9 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/min.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/min.md @@ -1,14 +1,13 @@ # MIN +## description -## Syntax - -`MIN(expr)` +返回expr表达式的最小值 -## Description + Syntax -返回expr表达式的最小值 +`MIN(expr)` -## Examples +## example ``` MySQL > select min(scan_rows) from log_statis group by datetime; +------------------+ @@ -17,3 +16,5 @@ MySQL > select min(scan_rows) from log_statis group by datetime; | 0 | +------------------+ ``` +##keyword +MIN,MIN diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/ndv.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/ndv.md index ff3a08753421e4..3b01515a9379fa 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/ndv.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/ndv.md @@ -1,16 +1,15 @@ # NDV - -## Syntax - -`NDV(expr)` - -## Description +## description 返回类似于 COUNT(DISTINCT col) 结果的近似值聚合函数。 它比 COUNT 和 DISTINCT 组合的速度更快,并使用固定大小的内存,因此对于高基数的列可以使用更少的内存。 -## Examples + Syntax + +`NDV(expr)` + +## example ``` MySQL > select ndv(query_id) from log_statis group by datetime; +-----------------+ @@ -19,3 +18,5 @@ MySQL > select ndv(query_id) from log_statis group by datetime; | 17721 | +-----------------+ ``` +##keyword +NDV,NDV diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/percentile_approx.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/percentile_approx.md index 329f6b104a0071..fef57b4cfc40ee 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/percentile_approx.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/percentile_approx.md @@ -1,16 +1,15 @@ # PERCENTILE_APPROX - -## Syntax - -`PERCENTILE_APPROX(expr, DOUBLE p)` - -## Description +## description 返回第p个百分位点的近似值,p的值介于0到1之间 该函数使用固定大小的内存,因此对于高基数的列可以使用更少的内存,可用于计算tp99等统计值 -## Examples + Syntax + +`PERCENTILE_APPROX(expr, DOUBLE p)` + +## example ``` MySQL > select `table`, percentile_approx(cost_time,0.99) from log_statis group by `table`; +---------------------+---------------------------+ @@ -18,4 +17,5 @@ MySQL > select `table`, percentile_approx(cost_time,0.99) from log_statis group +----------+--------------------------------------+ | test | 54.22 | +----------+--------------------------------------+ -``` \ No newline at end of file +##keyword +PERCENTILE_APPROX,PERCENTILE,APPROX diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev.md index 393ffdf1419925..fe1d990bf4ed47 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev.md @@ -1,14 +1,13 @@ # STDDEV,STDDEV_POP +## description -## Syntax - -`STDDEV(expr)` +返回expr表达式的标准差 -## Description + Syntax -返回expr表达式的标准差 +`STDDEV(expr)` -## Examples +## example ``` MySQL > select stddev(scan_rows) from log_statis group by datetime; +---------------------+ @@ -24,3 +23,5 @@ MySQL > select stddev_pop(scan_rows) from log_statis group by datetime; | 2.3722760595994914 | +-------------------------+ ``` +##keyword +STDDEV,STDDEV_POP,STDDEV,STDDEV,POP diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev_samp.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev_samp.md index 5243a99b90d12b..d28123d13745d6 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev_samp.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev_samp.md @@ -1,14 +1,13 @@ # STDDEV_SAMP +## description -## Syntax - -`STDDEV_SAMP(expr)` +返回expr表达式的样本标准差 -## Description + Syntax -返回expr表达式的样本标准差 +`STDDEV_SAMP(expr)` -## Examples +## example ``` MySQL > select stddev_samp(scan_rows) from log_statis group by datetime; +--------------------------+ @@ -17,3 +16,5 @@ MySQL > select stddev_samp(scan_rows) from log_statis group by datetime; | 2.372044195280762 | +--------------------------+ ``` +##keyword +STDDEV_SAMP,STDDEV,SAMP diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/sum.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/sum.md index ad4fe7c9932b59..e69589a2444744 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/sum.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/sum.md @@ -1,14 +1,13 @@ # SUM +## description -## Syntax - -`SUM(expr)` +用于返回选中字段所有值的和 -## Description + Syntax -用于返回选中字段所有值的和 +`SUM(expr)` -## Examples +## example ``` MySQL > select sum(scan_rows) from log_statis group by datetime; +------------------+ @@ -17,3 +16,5 @@ MySQL > select sum(scan_rows) from log_statis group by datetime; | 8217360135 | +------------------+ ``` +##keyword +SUM,SUM diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/var_samp.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/var_samp.md index 5da00ad73daeb3..58ba697e283355 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/var_samp.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/var_samp.md @@ -1,14 +1,13 @@ # VAR_SAMP,VARIANCE_SAMP +## description -## Syntax - -`VAR_SAMP(expr)` +返回expr表达式的样本方差 -## Description + Syntax -返回expr表达式的样本方差 +`VAR_SAMP(expr)` -## Examples +## example ``` MySQL > select var_samp(scan_rows) from log_statis group by datetime; +-----------------------+ @@ -16,4 +15,5 @@ MySQL > select var_samp(scan_rows) from log_statis group by datetime; +-----------------------+ | 5.6227132145741789 | +-----------------------+ -``` \ No newline at end of file +##keyword +VAR_SAMP,VARIANCE_SAMP,VAR,SAMP,VARIANCE,SAMP diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/variance.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/variance.md index 27e948f3aec453..a20b18ebad8526 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/variance.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/variance.md @@ -1,14 +1,13 @@ # VARIANCE,VAR_POP,VARIANCE_POP +## description -## Syntax - -`VARIANCE(expr)` +返回expr表达式的方差 -## Description + Syntax -返回expr表达式的方差 +`VARIANCE(expr)` -## Examples +## example ``` MySQL > select variance(scan_rows) from log_statis group by datetime; +-----------------------+ @@ -23,4 +22,5 @@ MySQL > select var_pop(scan_rows) from log_statis group by datetime; +----------------------+ | 5.6230744719006163 | +----------------------+ -``` \ No newline at end of file +##keyword +VARIANCE,VAR_POP,VARIANCE_POP,VARIANCE,VAR,POP,VARIANCE,POP diff --git a/docs/documentation/cn/sql-reference/sql-functions/cast.md b/docs/documentation/cn/sql-reference/sql-functions/cast.md index 3d07805522a28f..ad0b58a79b9e40 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/cast.md +++ b/docs/documentation/cn/sql-reference/sql-functions/cast.md @@ -1,26 +1,25 @@ # CAST +## description -## Syntax +将 input 转成 指定的 type + +## description + +将当前列 input 转换为 BIGINT 类型 + + Syntax ``` cast (input as type) ``` -## Description + BIGINT type -将 input 转成 指定的 type - -## BIGINT type - -## Syntax + Syntax ``` cast (input as BIGINT) ``` -## Description - -将当前列 input 转换为 BIGINT 类型 - -## Examples +## example 1. 转常量,或表中某列 @@ -54,3 +53,5 @@ mysql> select cast(cast ("11.2" as double) as bigint); +----------------------------------------+ 1 row in set (0.00 sec) ``` +##keyword +CAST,CAST diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/current_timestamp.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/current_timestamp.md index c73c10a79d579f..2524ad6805eeee 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/current_timestamp.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/current_timestamp.md @@ -1,14 +1,13 @@ # current_timestamp +## description -## Syntax - -`DATETIME CURRENT_TIMESTAMP()` +获得当前的时间,以Datetime类型返回 -## Description + Syntax -获得当前的时间,以Datetime类型返回 +`DATETIME CURRENT_TIMESTAMP()` -## Examples +## example ``` mysql> select current_timestamp(); @@ -17,4 +16,5 @@ mysql> select current_timestamp(); +---------------------+ | 2019-05-27 15:59:33 | +---------------------+ -``` \ No newline at end of file +##keyword +CURRENT_TIMESTAMP,CURRENT,TIMESTAMP diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_add.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_add.md index 1ae067295caeab..e86c1d7ede9179 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_add.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_add.md @@ -1,10 +1,5 @@ # date_add - -## Syntax - -`INT DATE_ADD(DATETIME date,INTERVAL expr type)` - -## Description +## description 向日期添加指定的时间间隔。 @@ -14,7 +9,11 @@ expr 参数是您希望添加的时间间隔。 type 参数可以是下列值:YEAR, MONTH, DAY, HOUR, MINUTE, SECOND -## Examples + Syntax + +`INT DATE_ADD(DATETIME date,INTERVAL expr type)` + +## example ``` mysql> select date_add('2010-11-30 23:59:59', INTERVAL 2 DAY); @@ -23,4 +22,5 @@ mysql> select date_add('2010-11-30 23:59:59', INTERVAL 2 DAY); +-------------------------------------------------+ | 2010-12-02 23:59:59 | +-------------------------------------------------+ -``` \ No newline at end of file +##keyword +DATE_ADD,DATE,ADD diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_format.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_format.md index 95be89645e3066..c90482e4421f05 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_format.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_format.md @@ -1,10 +1,5 @@ # date_format - -## Syntax - -`VARCHAR DATE_FORMAT(DATETIME date, VARCHAR format)` - -## Description +## description 将日期类型按照format的类型转化位字符串, 当前支持最大128字节的字符串,如果返回值长度超过128,则返回NULL @@ -75,7 +70,11 @@ date 参数是合法的日期。format 规定日期/时间的输出格式。 %y | 年,2 位 -## Examples + Syntax + +`VARCHAR DATE_FORMAT(DATETIME date, VARCHAR format)` + +## example ``` mysql> select date_format('2009-10-04 22:23:00', '%W %M %Y'); @@ -120,3 +119,5 @@ mysql> select date_format('2006-06-01', '%d'); | 01 | +------------------------------------------+ ``` +##keyword +DATE_FORMAT,DATE,FORMAT diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_sub.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_sub.md index 20150ad9b2326c..0f3eb613206f6e 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_sub.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_sub.md @@ -1,10 +1,5 @@ # date_sub - -## Syntax - -`INT DATE_SUB(DATETIME date,INTERVAL expr type)` - -## Description +## description 从日期减去指定的时间间隔 @@ -14,7 +9,11 @@ expr 参数是您希望添加的时间间隔。 type 参数可以是下列值:YEAR, MONTH, DAY, HOUR, MINUTE, SECOND -## Examples + Syntax + +`INT DATE_SUB(DATETIME date,INTERVAL expr type)` + +## example ``` mysql> select date_sub('2010-11-30 23:59:59', INTERVAL 2 DAY); @@ -23,4 +22,5 @@ mysql> select date_sub('2010-11-30 23:59:59', INTERVAL 2 DAY); +-------------------------------------------------+ | 2010-11-28 23:59:59 | +-------------------------------------------------+ -``` \ No newline at end of file +##keyword +DATE_SUB,DATE,SUB diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/datediff.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/datediff.md index 085d1c8930f388..fb6e1a00398452 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/datediff.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/datediff.md @@ -1,10 +1,5 @@ # datediff - -## Syntax - -`DATETIME DATEDIFF(DATETIME expr1,DATETIME expr2)` - -## Description +## description 计算expr1 - expr2,结果精确到天。 @@ -12,7 +7,11 @@ expr1 和 expr2 参数是合法的日期或日期/时间表达式。 注释:只有值的日期部分参与计算。 -## Examples + Syntax + +`DATETIME DATEDIFF(DATETIME expr1,DATETIME expr2)` + +## example ``` mysql> select datediff(CAST('2007-12-31 23:59:59' AS DATETIME), CAST('2007-12-30' AS DATETIME)); @@ -29,3 +28,5 @@ mysql> select datediff(CAST('2010-11-30 23:59:59' AS DATETIME), CAST('2010-12-31 | -31 | +-----------------------------------------------------------------------------------+ ``` +##keyword +DATEDIFF,DATEDIFF diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/day.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/day.md index 25852adde340c2..a59c3ede489f04 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/day.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/day.md @@ -1,16 +1,15 @@ # day - -## Syntax - -`INT DAY(DATETIME date)` - -## Description +## description 获得日期中的天信息,返回值范围从1-31。 参数为Date或者Datetime类型 -## Examples + Syntax + +`INT DAY(DATETIME date)` + +## example ``` mysql> select day('1987-01-31'); @@ -19,4 +18,5 @@ mysql> select day('1987-01-31'); +----------------------------+ | 31 | +----------------------------+ -``` \ No newline at end of file +##keyword +DAY,DAY diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayname.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayname.md index b6353f609192ab..450d417b798d42 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayname.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayname.md @@ -1,16 +1,15 @@ # dayname - -## Syntax - -`VARCHAR DAYNAME(DATE)` - -## Description +## description 返回日期对应的日期名字 参数为Date或者Datetime类型 -## Examples + Syntax + +`VARCHAR DAYNAME(DATE)` + +## example ``` mysql> select dayname('2007-02-03 00:00:00'); @@ -19,4 +18,5 @@ mysql> select dayname('2007-02-03 00:00:00'); +--------------------------------+ | Saturday | +--------------------------------+ -``` \ No newline at end of file +##keyword +DAYNAME,DAYNAME diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofmonth.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofmonth.md index 40fe41767c0844..a75bd2799e64a9 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofmonth.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofmonth.md @@ -1,16 +1,15 @@ # dayofmonth - -## Syntax - -`INT DAYOFMONTH(DATETIME date)` - -## Description +## description 获得日期中的天信息,返回值范围从1-31。 参数为Date或者Datetime类型 -## Examples + Syntax + +`INT DAYOFMONTH(DATETIME date)` + +## example ``` mysql> select dayofmonth('1987-01-31'); @@ -19,4 +18,5 @@ mysql> select dayofmonth('1987-01-31'); +-----------------------------------+ | 31 | +-----------------------------------+ -``` \ No newline at end of file +##keyword +DAYOFMONTH,DAYOFMONTH diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofweek.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofweek.md index 43e6511600d016..6806d0cc55e5bf 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofweek.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofweek.md @@ -1,16 +1,15 @@ # dayofweek - -## Syntax - -`INT dayofweek(DATETIME date)` - -## Description +## description DAYOFWEEK函数返回日期的工作日索引值,即星期日为1,星期一为2,星期六为7 参数为Date或者Datetime类型 -## Examples + Syntax + +`INT dayofweek(DATETIME date)` + +## example ``` mysql> select dayofweek('2019-06-25'); @@ -19,4 +18,5 @@ mysql> select dayofweek('2019-06-25'); +----------------------------------+ | 3 | +----------------------------------+ -``` \ No newline at end of file +##keyword +DAYOFWEEK,DAYOFWEEK diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofyear.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofyear.md index 6f91d92e3dd1e4..e060a7326c1ce6 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofyear.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofyear.md @@ -1,16 +1,15 @@ # dayofyear - -## Syntax - -`INT DAYOFYEAR(DATETIME date)` - -## Description +## description 获得日期中对应当年中的哪一天。 参数为Date或者Datetime类型 -## Examples + Syntax + +`INT DAYOFYEAR(DATETIME date)` + +## example ``` mysql> select dayofyear('2007-02-03 00:00:00'); @@ -19,4 +18,5 @@ mysql> select dayofyear('2007-02-03 00:00:00'); +----------------------------------+ | 34 | +----------------------------------+ -``` \ No newline at end of file +##keyword +DAYOFYEAR,DAYOFYEAR diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/from_days.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/from_days.md index abfbdb7216f699..aea93d22683bc0 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/from_days.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/from_days.md @@ -1,14 +1,13 @@ # from_days +## description -## Syntax - -`DATE FROM_DAYS(INT N)` +通过距离0000-01-01日的天数计算出哪一天 -## Description + Syntax -通过距离0000-01-01日的天数计算出哪一天 +`DATE FROM_DAYS(INT N)` -## Examples +## example ``` mysql> select from_days(730669); @@ -17,4 +16,5 @@ mysql> select from_days(730669); +-------------------+ | 2000-07-03 | +-------------------+ -``` \ No newline at end of file +##keyword +FROM_DAYS,FROM,DAYS diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/from_unixtime.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/from_unixtime.md index 401362d83578ae..5a9a208d428dd2 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/from_unixtime.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/from_unixtime.md @@ -1,10 +1,5 @@ # from_unixtime - -## Syntax - -`DATETIME FROM_UNIXTIME(INT unix_timestamp[, VARCHAR string_format])` - -## Description +## description 将unix时间戳转化位对应的time格式,返回的格式由string_format指定 @@ -16,7 +11,11 @@ 其余string_format格式是非法的,返回NULL -## Examples + Syntax + +`DATETIME FROM_UNIXTIME(INT unix_timestamp[, VARCHAR string_format])` + +## example ``` mysql> select from_unixtime(1196440219); @@ -39,4 +38,5 @@ mysql> select from_unixtime(1196440219, 'yyyy-MM-dd HH:mm:ss'); +--------------------------------------------------+ | 2007-12-01 00:30:19 | +--------------------------------------------------+ -``` \ No newline at end of file +##keyword +FROM_UNIXTIME,FROM,UNIXTIME diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/month.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/month.md index b85715b4784bbe..b03bcfeccbf711 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/month.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/month.md @@ -1,16 +1,15 @@ # month - -## Syntax - -`INT MONTH(DATETIME date)` - -## Description +## description 返回时间类型中的月份信息,范围是1, 12 参数为Date或者Datetime类型 -## Examples + Syntax + +`INT MONTH(DATETIME date)` + +## example ``` mysql> select month('1987-01-01'); @@ -19,4 +18,5 @@ mysql> select month('1987-01-01'); +-----------------------------+ | 1 | +-----------------------------+ -``` \ No newline at end of file +##keyword +MONTH,MONTH diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/monthname.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/monthname.md index 6ba49bf6afb03c..54036e0b4b6339 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/monthname.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/monthname.md @@ -1,16 +1,15 @@ # monthname - -## Syntax - -`VARCHAR MONTHNAME(DATE)` - -## Description +## description 返回日期对应的月份名字 参数为Date或者Datetime类型 -## Examples + Syntax + +`VARCHAR MONTHNAME(DATE)` + +## example ``` mysql> select monthname('2008-02-03 00:00:00'); @@ -19,4 +18,5 @@ mysql> select monthname('2008-02-03 00:00:00'); +----------------------------------+ | February | +----------------------------------+ -``` \ No newline at end of file +##keyword +MONTHNAME,MONTHNAME diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/now.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/now.md index ff2a365a92bec7..0a358030b7b926 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/now.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/now.md @@ -1,14 +1,13 @@ # now +## description -## Syntax - -`DATETIME NOW()` +获得当前的时间,以Datetime类型返回 -## Description + Syntax -获得当前的时间,以Datetime类型返回 +`DATETIME NOW()` -## Examples +## example ``` mysql> select now(); @@ -17,4 +16,5 @@ mysql> select now(); +---------------------+ | 2019-05-27 15:58:25 | +---------------------+ -``` \ No newline at end of file +##keyword +NOW,NOW diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/str_to_date.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/str_to_date.md index b44fc84a46cb93..15acc87ac5ecce 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/str_to_date.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/str_to_date.md @@ -1,16 +1,15 @@ # str_to_date - -## Syntax - -`DATETIME STR_TO_DATE(VARCHAR str, VARCHAR format)` - -## Description +## description 通过format指定的方式将str转化为DATE类型,如果转化结果不对返回NULL 支持的format格式与date_format一致 -## Examples + Syntax + +`DATETIME STR_TO_DATE(VARCHAR str, VARCHAR format)` + +## example ``` mysql> select str_to_date('2014-12-21 12:34:56', '%Y-%m-%d %H:%i:%s'); @@ -26,4 +25,5 @@ mysql> select str_to_date('200442 Monday', '%X%V %W'); +-----------------------------------------+ | 2004-10-18 | +-----------------------------------------+ -``` \ No newline at end of file +##keyword +STR_TO_DATE,STR,TO,DATE diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/timediff.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/timediff.md index d45e6b4ef34232..20d3efa76013e6 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/timediff.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/timediff.md @@ -1,10 +1,5 @@ # timediff - -## Syntax - -`TIME TIMEDIFF(DATETIME expr1, DATETIME expr2)` - -## Description +## description TIMEDIFF返回两个DATETIME之间的差值 @@ -12,7 +7,11 @@ TIMEDIFF函数返回表示为时间值的expr1 - expr2的结果,返回值为TI 其结果被限制在从-838:59:59到838:59:59之间的TIME值范围内 -## Examples + Syntax + +`TIME TIMEDIFF(DATETIME expr1, DATETIME expr2)` + +## example ``` mysql> SELECT TIMEDIFF(now(),utc_timestamp()); @@ -43,3 +42,5 @@ mysql> SELECT TIMEDIFF('2019-01-01 00:00:00', NULL); | NULL | +---------------------------------------+ ``` +##keyword +TIMEDIFF,TIMEDIFF diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/to_days.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/to_days.md index 2deb3cc5809684..e95c874914784a 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/to_days.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/to_days.md @@ -1,16 +1,15 @@ # to_days - -## Syntax - -`INT TO_DAYS(DATETIME date)` - -## Description +## description 返回date距离0000-01-01的天数 参数为Date或者Datetime类型 -## Examples + Syntax + +`INT TO_DAYS(DATETIME date)` + +## example ``` mysql> select to_days('2007-10-07'); @@ -19,4 +18,5 @@ mysql> select to_days('2007-10-07'); +-----------------------+ | 733321 | +-----------------------+ -``` \ No newline at end of file +##keyword +TO_DAYS,TO,DAYS diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/unix_timestamp.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/unix_timestamp.md index dfdb0461103973..1bdc57a5490f93 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/unix_timestamp.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/unix_timestamp.md @@ -1,10 +1,5 @@ # unix_timestamp - -## Syntax - -`INT UNIX_TIMESTAMP(), UNIX_TIMESTAMP(DATETIME date)` - -## Description +## description 将Date或者Datetime类型转化为unix时间戳 @@ -12,7 +7,11 @@ 参数需要是Date或者Datetime类型 -## Examples + Syntax + +`INT UNIX_TIMESTAMP(), UNIX_TIMESTAMP(DATETIME date)` + +## example ``` mysql> select unix_timestamp(); @@ -28,4 +27,5 @@ mysql> select unix_timestamp('2007-11-30 10:30:19'); +---------------------------------------+ | 1196389819 | +---------------------------------------+ -``` \ No newline at end of file +##keyword +UNIX_TIMESTAMP,UNIX,TIMESTAMP diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/utc_timestamp.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/utc_timestamp.md index a93ed98a139c86..d2cdd5dcd41393 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/utc_timestamp.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/utc_timestamp.md @@ -1,10 +1,5 @@ # utc_timestamp - -## Syntax - -`DATETIME UTC_TIMESTAMP()` - -## Description +## description 返回当前UTC日期和时间在 "YYYY-MM-DD HH:MM:SS" 或 @@ -12,7 +7,11 @@ 根据该函数是否用在字符串或数字语境中 -## Examples + Syntax + +`DATETIME UTC_TIMESTAMP()` + +## example ``` mysql> select utc_timestamp(),utc_timestamp() + 1; @@ -21,4 +20,5 @@ mysql> select utc_timestamp(),utc_timestamp() + 1; +---------------------+---------------------+ | 2019-07-10 12:31:18 | 20190710123119 | +---------------------+---------------------+ -``` \ No newline at end of file +##keyword +UTC_TIMESTAMP,UTC,TIMESTAMP diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/workofyear.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/workofyear.md index e04f2a45ad84c3..bb904f63425432 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/workofyear.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/workofyear.md @@ -1,17 +1,16 @@ # weekofyear - -## Syntax - -`INT WEEKOFYEAR(DATETIME date)` - -## Description +## description 获得一年中的第几周 参数为Date或者Datetime类型 -## Examples + Syntax + +`INT WEEKOFYEAR(DATETIME date)` + +## example ``` mysql> select weekofyear('2008-02-20 00:00:00'); @@ -20,4 +19,5 @@ mysql> select weekofyear('2008-02-20 00:00:00'); +-----------------------------------+ | 8 | +-----------------------------------+ -``` \ No newline at end of file +##keyword +WEEKOFYEAR,WEEKOFYEAR diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/year.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/year.md index e2511edf4d43cf..324c24cf57e1f7 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/year.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/year.md @@ -1,16 +1,15 @@ # year - -## Syntax - -`INT YEAR(DATETIME date)` - -## Description +## description 返回date类型的year部分,范围从1000-9999 参数为Date或者Datetime类型 -## Examples + Syntax + +`INT YEAR(DATETIME date)` + +## example ``` mysql> select year('1987-01-01'); @@ -19,4 +18,5 @@ mysql> select year('1987-01-01'); +-----------------------------+ | 1987 | +-----------------------------+ -``` \ No newline at end of file +##keyword +YEAR,YEAR diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_astext.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_astext.md index 4b37351235b6f0..a4dc5b6e37a571 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_astext.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_astext.md @@ -1,14 +1,13 @@ # `ST_AsText`,`ST_AsWKT` +## description -## Syntax - -`VARCHAR ST_AsText(GEOMETRY geo)` +将一个几何图形转化为WKT(Well Known Text)的表示形式 -## Description + Syntax -将一个几何图形转化为WKT(Well Known Text)的表示形式 +`VARCHAR ST_AsText(GEOMETRY geo)` -## Examples +## example ``` mysql> SELECT ST_AsText(ST_Point(24.7, 56.7)); @@ -18,3 +17,5 @@ mysql> SELECT ST_AsText(ST_Point(24.7, 56.7)); | POINT (24.7 56.7) | +---------------------------------+ ``` +##keyword +ST_ASTEXT,ST_ASWKT,ST,ASTEXT,ST,ASWKT diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_circle.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_circle.md index 073d49f6c80f86..0c02204403c06b 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_circle.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_circle.md @@ -1,15 +1,14 @@ # `ST_Circle` - -## Syntax - -`GEOMETRY ST_Circle(DOUBLE center_lng, DOUBLE center_lat, DOUBLE radius)` - -## Description +## description 将一个WKT(Well Known Text)转化为地球球面上的一个圆。其中`center_lng`表示的圆心的经度, `center_lat`表示的是圆心的纬度,`radius`表示的是圆的半径,单位是米 -## Examples + Syntax + +`GEOMETRY ST_Circle(DOUBLE center_lng, DOUBLE center_lat, DOUBLE radius)` + +## example ``` mysql> SELECT ST_AsText(ST_Circle(111, 64, 10000)); @@ -19,3 +18,5 @@ mysql> SELECT ST_AsText(ST_Circle(111, 64, 10000)); | CIRCLE ((111 64), 10000) | +--------------------------------------------+ ``` +##keyword +ST_CIRCLE,ST,CIRCLE diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_contains.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_contains.md index 47e939f4099609..5269281c0c6254 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_contains.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_contains.md @@ -1,14 +1,13 @@ # `ST_Contains` +## description -## Syntax - -`BOOL ST_Contains(GEOMETRY shape1, GEOMETRY shape2)` +判断几何图形shape1是否完全能够包含几何图形shape2 -## Description + Syntax -判断几何图形shape1是否完全能够包含几何图形shape2 +`BOOL ST_Contains(GEOMETRY shape1, GEOMETRY shape2)` -## Examples +## example ``` mysql> SELECT ST_Contains(ST_Polygon("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))"), ST_Point(5, 5)); @@ -25,3 +24,5 @@ mysql> SELECT ST_Contains(ST_Polygon("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))"), | 0 | +------------------------------------------------------------------------------------------+ ``` +##keyword +ST_CONTAINS,ST,CONTAINS diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md index 4b5b034f07e814..fc48a1ae2e3e64 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md @@ -1,14 +1,13 @@ # `ST_Distance_Sphere` +## description -## Syntax - -`DOUBLE ST_Distance_Sphere(DOUBLE x_lng, DOUBLE x_lat, DOUBLE y_lng, DOUBLE x_lat)` +计算地球两点之间的球面距离,单位为 米。传入的参数分别为X点的经度,X点的纬度,Y点的经度,Y点的纬度。 -## Description + Syntax -计算地球两点之间的球面距离,单位为 米。传入的参数分别为X点的经度,X点的纬度,Y点的经度,Y点的纬度。 +`DOUBLE ST_Distance_Sphere(DOUBLE x_lng, DOUBLE x_lat, DOUBLE y_lng, DOUBLE x_lat)` -## Examples +## example ``` mysql> select st_distance_sphere(116.35620117, 39.939093, 116.4274406433, 39.9020987219); @@ -18,3 +17,5 @@ mysql> select st_distance_sphere(116.35620117, 39.939093, 116.4274406433, 39.902 | 7336.9135549995917 | +----------------------------------------------------------------------------+ ``` +##keyword +ST_DISTANCE_SPHERE,ST,DISTANCE,SPHERE diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md index 577d1558a09ccb..bdd99cbdc759fb 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md @@ -1,14 +1,13 @@ # `ST_GeometryFromText`,`ST_GeomFromText` +## description -## Syntax - -`GEOMETRY ST_GeometryFromText(VARCHAR wkt)` +将一个WKT(Well Known Text)转化为对应的内存的几何形式 -## Description + Syntax -将一个WKT(Well Known Text)转化为对应的内存的几何形式 +`GEOMETRY ST_GeometryFromText(VARCHAR wkt)` -## Examples +## example ``` mysql> SELECT ST_AsText(ST_GeometryFromText("LINESTRING (1 1, 2 2)")); @@ -18,3 +17,5 @@ mysql> SELECT ST_AsText(ST_GeometryFromText("LINESTRING (1 1, 2 2)")); | LINESTRING (1 1, 2 2) | +---------------------------------------------------------+ ``` +##keyword +ST_GEOMETRYFROMTEXT,ST_GEOMFROMTEXT,ST,GEOMETRYFROMTEXT,ST,GEOMFROMTEXT diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_linefromtext.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_linefromtext.md index 40201909932ad3..bad31994e470dd 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_linefromtext.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_linefromtext.md @@ -1,14 +1,13 @@ # `ST_LineFromText`,`ST_LineStringFromText` +## description -## Syntax - -`GEOMETRY ST_LineFromText(VARCHAR wkt)` +将一个WKT(Well Known Text)转化为一个Line形式的内存表现形式 -## Description + Syntax -将一个WKT(Well Known Text)转化为一个Line形式的内存表现形式 +`GEOMETRY ST_LineFromText(VARCHAR wkt)` -## Examples +## example ``` mysql> SELECT ST_AsText(ST_LineFromText("LINESTRING (1 1, 2 2)")); @@ -18,3 +17,5 @@ mysql> SELECT ST_AsText(ST_LineFromText("LINESTRING (1 1, 2 2)")); | LINESTRING (1 1, 2 2) | +---------------------------------------------------------+ ``` +##keyword +ST_LINEFROMTEXT,ST_LINESTRINGFROMTEXT,ST,LINEFROMTEXT,ST,LINESTRINGFROMTEXT diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_point.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_point.md index 9bf5e30f2178f8..19a43f9f1a0457 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_point.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_point.md @@ -1,15 +1,14 @@ # `ST_Point` - -## Syntax - -`POINT ST_Point(DOUBLE x, DOUBLE y)` - -## Description +## description 通过给定的X坐标值,Y坐标值返回对应的Point。 当前这个值只是在球面集合上有意义,X/Y对应的是经度/纬度(longitude/latitude) -## Examples + Syntax + +`POINT ST_Point(DOUBLE x, DOUBLE y)` + +## example ``` mysql> SELECT ST_AsText(ST_Point(24.7, 56.7)); @@ -19,3 +18,5 @@ mysql> SELECT ST_AsText(ST_Point(24.7, 56.7)); | POINT (24.7 56.7) | +---------------------------------+ ``` +##keyword +ST_POINT,ST,POINT diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_polygon.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_polygon.md index 6e5765e3e61918..74cd5a93da14ac 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_polygon.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_polygon.md @@ -1,14 +1,13 @@ # `ST_Polygon`,`ST_PolyFromText`,`ST_PolygonFromText` +## description -## Syntax - -`GEOMETRY ST_Polygon(VARCHAR wkt)` +将一个WKT(Well Known Text)转化为对应的多边形内存形式 -## Description + Syntax -将一个WKT(Well Known Text)转化为对应的多边形内存形式 +`GEOMETRY ST_Polygon(VARCHAR wkt)` -## Examples +## example ``` mysql> SELECT ST_AsText(ST_Polygon("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))")); @@ -18,3 +17,5 @@ mysql> SELECT ST_AsText(ST_Polygon("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))")); | POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0)) | +------------------------------------------------------------------+ ``` +##keyword +ST_POLYGON,ST_POLYFROMTEXT,ST_POLYGONFROMTEXT,ST,POLYGON,ST,POLYFROMTEXT,ST,POLYGONFROMTEXT diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_x.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_x.md index 644b9ee11dc1a6..523aecae93f270 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_x.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_x.md @@ -1,14 +1,13 @@ # `ST_X` +## description -## Syntax - -`DOUBLE ST_X(POINT point)` +当point是一个合法的POINT类型时,返回对应的X坐标值 -## Description + Syntax -当point是一个合法的POINT类型时,返回对应的X坐标值 +`DOUBLE ST_X(POINT point)` -## Examples +## example ``` mysql> SELECT ST_X(ST_Point(24.7, 56.7)); @@ -18,3 +17,5 @@ mysql> SELECT ST_X(ST_Point(24.7, 56.7)); | 24.7 | +----------------------------+ ``` +##keyword +ST_X,ST,X diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_y.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_y.md index fb02ddb102d4c0..6b23348b23a620 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_y.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_y.md @@ -1,14 +1,13 @@ # `ST_Y` +## description -## Syntax - -`DOUBLE ST_Y(POINT point)` +当point是一个合法的POINT类型时,返回对应的Y坐标值 -## Description + Syntax -当point是一个合法的POINT类型时,返回对应的Y坐标值 +`DOUBLE ST_Y(POINT point)` -## Examples +## example ``` mysql> SELECT ST_Y(ST_Point(24.7, 56.7)); @@ -18,3 +17,5 @@ mysql> SELECT ST_Y(ST_Point(24.7, 56.7)); | 56.7 | +----------------------------+ ``` +##keyword +ST_Y,ST,Y diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/ascii.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/ascii.md index 95debfd84a0a0d..832d791418ae10 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/ascii.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/ascii.md @@ -1,14 +1,13 @@ # ascii +## description -## Syntax - -`INT ascii(VARCHAR str)` +返回字符串第一个字符对应的 ascii 码 -## Description + Syntax -返回字符串第一个字符对应的 ascii 码 +`INT ascii(VARCHAR str)` -## Examples +## example ``` mysql> select ascii('1'); @@ -25,3 +24,5 @@ mysql> select ascii('234'); | 50 | +--------------+ ``` +##keyword +ASCII,ASCII diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat.md index 92ba559f613bf5..59600c30a8973c 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat.md @@ -1,14 +1,13 @@ # concat +## description -## Syntax - -`VARCHAR concat(VARCHAR,...)` +将多个字符串连接起来, 如果参数中任意一个值是 NULL,那么返回的结果就是 NULL -## Description + Syntax -将多个字符串连接起来, 如果参数中任意一个值是 NULL,那么返回的结果就是 NULL +`VARCHAR concat(VARCHAR,...)` -## Examples +## example ``` mysql> select concat("a", "b"); @@ -32,3 +31,5 @@ mysql> select concat("a", null, "c"); | NULL | +------------------------+ ``` +##keyword +CONCAT,CONCAT diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat_ws.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat_ws.md index d72b7fd7e6b816..83e38b7b4e2aaf 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat_ws.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat_ws.md @@ -1,16 +1,15 @@ # concat_ws - -## Syntax - -`VARCHAR concat_ws(VARCHAR sep, VARCHAR str,...)` - -## Description +## description 使用第一个参数 sep 作为连接符,将第二个参数以及后续所有参数拼接成一个字符串. 如果分隔符是 NULL,返回 NULL。 `concat_ws`函数不会跳过空字符串,会跳过 NULL 值 -## Examples + Syntax + +`VARCHAR concat_ws(VARCHAR sep, VARCHAR str,...)` + +## example ``` mysql> select concat_ws("or", "d", "is"); @@ -34,3 +33,5 @@ mysql> select concat_ws("or", "d", NULL,"is"); | doris | +---------------------------------+ ``` +##keyword +CONCAT_WS,CONCAT,WS diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/find_in_set.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/find_in_set.md index 121a6672e297f1..15e992ea5642d4 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/find_in_set.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/find_in_set.md @@ -1,14 +1,13 @@ # find_in_set +## description -## Syntax - -`INT find_in_set(VARCHAR str, VARCHAR strlist)` +返回 strlist 中第一次出现 str 的位置(从1开始计数)。strlist 是用逗号分隔的字符串。如果没有找到,返回0。任意参数为 NULL ,返回 NULL。 -## Description + Syntax -返回 strlist 中第一次出现 str 的位置(从1开始计数)。strlist 是用逗号分隔的字符串。如果没有找到,返回0。任意参数为 NULL ,返回 NULL。 +`INT find_in_set(VARCHAR str, VARCHAR strlist)` -## Examples +## example ``` mysql> select find_in_set("b", "a,b,c"); @@ -18,3 +17,5 @@ mysql> select find_in_set("b", "a,b,c"); | 2 | +---------------------------+ ``` +##keyword +FIND_IN_SET,FIND,IN,SET diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_double.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_double.md index a7b1bd198c7bcd..7eff7cad1eb22f 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_double.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_double.md @@ -1,10 +1,5 @@ # get_json_double - -## Syntax - -`DOUBLE get_json_double(VARCHAR json_str, VARCHAR json_path) - -## Description +## description 解析并获取 json 字符串内指定路径的浮点型内容。 其中 json_path 必须以 $ 符号作为开头,使用 . 作为路径分割符。如果路径中包含 . ,则可以使用双引号包围。 @@ -12,7 +7,11 @@ path 的内容不能包含 ", [ 和 ]。 如果 json_string 格式不对,或 json_path 格式不对,或无法找到匹配项,则返回 NULL。 -## Examples + Syntax + +`DOUBLE get_json_double(VARCHAR json_str, VARCHAR json_path) + +## example 1. 获取 key 为 "k1" 的 value @@ -45,3 +44,5 @@ mysql> SELECT get_json_double('{"k1.key":{"k2":[1.1, 2.2]}}', '$."k1.key".k2[0]' | 1.1 | +---------------------------------------------------------------------+ ``` +##keyword +GET_JSON_DOUBLE,GET,JSON,DOUBLE diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_int.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_int.md index 005fad597c9c48..5b6088223b350b 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_int.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_int.md @@ -1,10 +1,5 @@ # get_json_int - -## Syntax - -`INT get_json_int(VARCHAR json_str, VARCHAR json_path) - -## Description +## description 解析并获取 json 字符串内指定路径的整型内容。 其中 json_path 必须以 $ 符号作为开头,使用 . 作为路径分割符。如果路径中包含 . ,则可以使用双引号包围。 @@ -12,7 +7,11 @@ path 的内容不能包含 ", [ 和 ]。 如果 json_string 格式不对,或 json_path 格式不对,或无法找到匹配项,则返回 NULL。 -## Examples + Syntax + +`INT get_json_int(VARCHAR json_str, VARCHAR json_path) + +## example 1. 获取 key 为 "k1" 的 value @@ -45,3 +44,5 @@ mysql> SELECT get_json_int('{"k1.key":{"k2":[1, 2]}}', '$."k1.key".k2[0]'); | 1 | +--------------------------------------------------------------+ ``` +##keyword +GET_JSON_INT,GET,JSON,INT diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_string.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_string.md index ab576a8ac189d3..eb377a5bf30e39 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_string.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_string.md @@ -1,10 +1,5 @@ # get_json_string - -## Syntax - -`VARCHAR get_json_string(VARCHAR json_str, VARCHAR json_path) - -## Description +## description 解析并获取 json 字符串内指定路径的字符串内容。 其中 json_path 必须以 $ 符号作为开头,使用 . 作为路径分割符。如果路径中包含 . ,则可以使用双引号包围。 @@ -12,7 +7,11 @@ path 的内容不能包含 ", [ 和 ]。 如果 json_string 格式不对,或 json_path 格式不对,或无法找到匹配项,则返回 NULL。 -## Examples + Syntax + +`VARCHAR get_json_string(VARCHAR json_str, VARCHAR json_path) + +## example 1. 获取 key 为 "k1" 的 value @@ -55,3 +54,5 @@ mysql> SELECT get_json_string('[{"k1":"v1"}, {"k2":"v2"}, {"k1":"v3"}, {"k1":"v4 | ["v1","v3","v4"] | +---------------------------------------------------------------------------------+ ``` +##keyword +GET_JSON_STRING,GET,JSON,STRING diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/group_concat.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/group_concat.md index f1a3151516588c..6a6f58d2d862f9 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/group_concat.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/group_concat.md @@ -1,14 +1,13 @@ # group_concat +## description -## Syntax - -`VARCHAR group_concat(VARCHAR str[, VARCHAR sep])` +该函数是类似于 sum() 的聚合函数,group_concat 将结果集中的多行结果连接成一个字符串。第二个参数 sep 为字符串之间的连接符号,该参数可以省略。该函数通常需要和 group by 语句一起使用。 -## Description + Syntax -该函数是类似于 sum() 的聚合函数,group_concat 将结果集中的多行结果连接成一个字符串。第二个参数 sep 为字符串之间的连接符号,该参数可以省略。该函数通常需要和 group by 语句一起使用。 +`VARCHAR group_concat(VARCHAR str[, VARCHAR sep])` -## Examples +## example ``` mysql> select value from test; @@ -34,3 +33,5 @@ mysql> select group_concat(value, " ") from test; | a b c | +----------------------------+ ``` +##keyword +GROUP_CONCAT,GROUP,CONCAT diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/instr.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/instr.md index 58276bf4c4c098..8da641329705e6 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/instr.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/instr.md @@ -1,14 +1,13 @@ # instr +## description -## Syntax - -`INT instr(VARCHAR str, VARCHAR substr)` +返回 substr 在 str 中第一次出现的位置(从1开始计数)。如果 substr 不在 str 中出现,则返回0。 -## Description + Syntax -返回 substr 在 str 中第一次出现的位置(从1开始计数)。如果 substr 不在 str 中出现,则返回0。 +`INT instr(VARCHAR str, VARCHAR substr)` -## Examples +## example ``` mysql> select instr("abc", "b"); @@ -25,3 +24,5 @@ mysql> select instr("abc", "d"); | 0 | +-------------------+ ``` +##keyword +INSTR,INSTR diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lcase.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lcase.md index be8464ba14c674..a56b914f568aee 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lcase.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lcase.md @@ -1,10 +1,11 @@ # lcase +## description -## Syntax - -`INT lcase(VARCHAR str)` +与`lower`一致 -## Description + Syntax -与`lower`一致 +`INT lcase(VARCHAR str)` +##keyword +LCASE,LCASE diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/left.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/left.md index b8376bfbca0e9e..6279e98aed8f96 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/left.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/left.md @@ -1,14 +1,13 @@ # left +## description -## Syntax - -`VARCHAR left(VARCHAR str)` +它返回具有指定长度的字符串的左边部分 -## Description + Syntax -它返回具有指定长度的字符串的左边部分 +`VARCHAR left(VARCHAR str)` -## Examples +## example ``` mysql> select left("Hello doris",5); @@ -18,3 +17,5 @@ mysql> select left("Hello doris",5); | Hello | +------------------------+ ``` +##keyword +LEFT,LEFT diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/length.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/length.md index 03b3ce42bbf534..cc4bf04618c5df 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/length.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/length.md @@ -1,14 +1,13 @@ # length +## description -## Syntax - -`INT length(VARCHAR str)` +返回字符串的长度,对于多字节字符,返回的字符数。比如5个两字节宽度字,返回的长度是10。 -## Description + Syntax -返回字符串的长度,对于多字节字符,返回的字符数。比如5个两字节宽度字,返回的长度是10。 +`INT length(VARCHAR str)` -## Examples +## example ``` mysql> select length("abc"); @@ -25,3 +24,5 @@ mysql> select length("中国"); | 6 | +------------------+ ``` +##keyword +LENGTH,LENGTH diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/locate.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/locate.md index e6a7704381335f..b7b178e0fa409a 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/locate.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/locate.md @@ -1,14 +1,13 @@ # locate +## description -## Syntax - -`INT locate(VARCHAR substr, VARCHAR str[, INT pos])` +返回 substr 在 str 中出现的位置(从1开始计数)。如果指定第3个参数 pos,则从 str 以 pos 下标开始的字符串处开始查找 substr 出现的位置。如果没有找到,返回0 -## Description + Syntax -返回 substr 在 str 中出现的位置(从1开始计数)。如果指定第3个参数 pos,则从 str 以 pos 下标开始的字符串处开始查找 substr 出现的位置。如果没有找到,返回0 +`INT locate(VARCHAR substr, VARCHAR str[, INT pos])` -## Examples +## example ``` mysql> SELECT LOCATE('bar', 'foobarbar'); @@ -32,3 +31,5 @@ mysql> SELECT LOCATE('bar', 'foobarbar', 5); | 7 | +-------------------------------+ ``` +##keyword +LOCATE,LOCATE diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lower.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lower.md index 776d559c199f84..fe7c3e4aa606ac 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lower.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lower.md @@ -1,14 +1,13 @@ # lower +## description -## Syntax - -`INT lower(VARCHAR str)` +将参数中所有的字符串都转换成小写 -## Description + Syntax -将参数中所有的字符串都转换成小写 +`INT lower(VARCHAR str)` -## Examples +## example ``` mysql> SELECT lower("AbC123"); @@ -18,3 +17,5 @@ mysql> SELECT lower("AbC123"); | abc123 | +-----------------+ ``` +##keyword +LOWER,LOWER diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lpad.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lpad.md index 8ba1176ea5533c..178aa0eebcc954 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lpad.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lpad.md @@ -1,14 +1,13 @@ # lpad +## description -## Syntax - -`VARCHAR lpad(VARCHAR str, INT len, VARCHAR pad)` +返回 str 中长度为 len(从首字母开始算起)的字符串。如果 len 大于 str 的长度,则在 str 的前面不断补充 pad 字符,直到该字符串的长度达到 len 为止。如果 len 小于 str 的长度,该函数相当于截断 str 字符串,只返回长度为 len 的字符串。 -## Description + Syntax -返回 str 中长度为 len(从首字母开始算起)的字符串。如果 len 大于 str 的长度,则在 str 的前面不断补充 pad 字符,直到该字符串的长度达到 len 为止。如果 len 小于 str 的长度,该函数相当于截断 str 字符串,只返回长度为 len 的字符串。 +`VARCHAR lpad(VARCHAR str, INT len, VARCHAR pad)` -## Examples +## example ``` mysql> SELECT lpad("hi", 5, "xy"); @@ -25,3 +24,5 @@ mysql> SELECT lpad("hi", 1, "xy"); | h | +---------------------+ ``` +##keyword +LPAD,LPAD diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/ltrim.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/ltrim.md index 4a265d0868f9f5..e7454e6f8e5aed 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/ltrim.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/ltrim.md @@ -1,14 +1,13 @@ # ltrim +## description -## Syntax - -`VARCHAR ltrim(VARCHAR str)` +将参数 str 中从开始部分连续出现的空格去掉 -## Description + Syntax -将参数 str 中从开始部分连续出现的空格去掉 +`VARCHAR ltrim(VARCHAR str)` -## Examples +## example ``` mysql> SELECT ltrim(' ab d'); @@ -18,3 +17,5 @@ mysql> SELECT ltrim(' ab d'); | ab d | +------------------+ ``` +##keyword +LTRIM,LTRIM diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/money_format.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/money_format.md index 98da921327d4dd..11824e3fee3d1b 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/money_format.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/money_format.md @@ -1,14 +1,13 @@ # money_format +## description -## Syntax - -VARCHAR money_format(Number) +将数字按照货币格式输出,整数部分每隔3位用逗号分隔,小数部分保留2位 -## Description + Syntax -将数字按照货币格式输出,整数部分每隔3位用逗号分隔,小数部分保留2位 +VARCHAR money_format(Number) -## Examples +## example ``` mysql> select money_format(17014116); @@ -32,3 +31,5 @@ mysql> select money_format(1123.4); | 1,123.40 | +----------------------+ ``` +##keyword +MONEY_FORMAT,MONEY,FORMAT diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/regexp_extract.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/regexp_extract.md index 8cbf690eea9cb5..7ca0602a547d9d 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/regexp_extract.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/regexp_extract.md @@ -1,14 +1,13 @@ # regexp_extract +## description -## Syntax - -`VARCHAR regexp_extract(VARCHAR str, VARCHAR pattern, int pos)` +对字符串 str 进行正则匹配,抽取符合 pattern 的第 pos 个匹配部分。需要 pattern 完全匹配 str 中的某部分,这样才能返回 pattern 部分中需匹配部分。如果没有匹配,返回空字符串。 -## Description + Syntax -对字符串 str 进行正则匹配,抽取符合 pattern 的第 pos 个匹配部分。需要 pattern 完全匹配 str 中的某部分,这样才能返回 pattern 部分中需匹配部分。如果没有匹配,返回空字符串。 +`VARCHAR regexp_extract(VARCHAR str, VARCHAR pattern, int pos)` -## Examples +## example ``` mysql> SELECT regexp_extract('AbCdE', '([[:lower:]]+)C([[:lower:]]+)', 1); @@ -25,3 +24,5 @@ mysql> SELECT regexp_extract('AbCdE', '([[:lower:]]+)C([[:lower:]]+)', 2); | d | +-------------------------------------------------------------+ ``` +##keyword +REGEXP_EXTRACT,REGEXP,EXTRACT diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/regexp_replace.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/regexp_replace.md index c131996cdc4760..f4fc3b70f4e6c1 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/regexp_replace.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/regexp_replace.md @@ -1,14 +1,13 @@ # regexp_replace +## description -## Syntax - -`VARCHAR regexp_replace(VARCHAR str, VARCHAR pattern, VARCHAR repl) +对字符串 str 进行正则匹配, 将命中 pattern 的部分使用 repl 来进行替换 -## Description + Syntax -对字符串 str 进行正则匹配, 将命中 pattern 的部分使用 repl 来进行替换 +`VARCHAR regexp_replace(VARCHAR str, VARCHAR pattern, VARCHAR repl) -## Examples +## example ``` mysql> SELECT regexp_replace('a b c', " ", "-"); @@ -25,3 +24,5 @@ mysql> SELECT regexp_replace('a b c','(b)','<\\1>'); | a c | +----------------------------------------+ ``` +##keyword +REGEXP_REPLACE,REGEXP,REPLACE diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/repeat.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/repeat.md index 4ef1eeb2f70acb..14a9a59d785434 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/repeat.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/repeat.md @@ -1,14 +1,13 @@ # repeat +## description -## Syntax - -`VARCHAR repeat(VARCHAR str, INT count) +将字符串 str 重复 count 次输出,count 小于1时返回空串,str,count 任一为NULL时,返回 NULL -## Description + Syntax -将字符串 str 重复 count 次输出,count 小于1时返回空串,str,count 任一为NULL时,返回 NULL +`VARCHAR repeat(VARCHAR str, INT count) -## Examples +## example ``` mysql> SELECT repeat("a", 3); @@ -25,3 +24,5 @@ mysql> SELECT repeat("a", -1); | | +-----------------+ ``` +##keyword +REPEAT,REPEAT diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/right.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/right.md index 832a124619d9f2..4c01a89bd05c79 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/right.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/right.md @@ -1,14 +1,13 @@ # right +## description -## Syntax - -`VARCHAR right(VARCHAR str)` +它返回具有指定长度的字符串的右边部分 -## Description + Syntax -它返回具有指定长度的字符串的右边部分 +`VARCHAR right(VARCHAR str)` -## Examples +## example ``` mysql> select right("Hello doris",5); @@ -18,3 +17,5 @@ mysql> select right("Hello doris",5); | doris | +-------------------------+ ``` +##keyword +RIGHT,RIGHT diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/split_part.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/split_part.md index 142a567c4104f8..c2ef720da13f0c 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/split_part.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/split_part.md @@ -1,14 +1,13 @@ # split_part +## description -## Syntax - -`VARCHAR split_part(VARCHAR content, VARCHAR delimiter, INT field)` +根据分割符拆分字符串, 返回指定的分割部分(从一开始计数)。 -## Description + Syntax -根据分割符拆分字符串, 返回指定的分割部分(从一开始计数)。 +`VARCHAR split_part(VARCHAR content, VARCHAR delimiter, INT field)` -## Examples +## example ``` mysql> select split_part("hello world", " ", 1); @@ -40,3 +39,5 @@ mysql> select split_part("abca", "a", 1); | | +----------------------------+ ``` +##keyword +SPLIT_PART,SPLIT,PART diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/strleft.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/strleft.md index 2f7f62027e581a..95cf82da514dbd 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/strleft.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/strleft.md @@ -1,14 +1,13 @@ # strleft +## description -## Syntax - -`VARCHAR strleft(VARCHAR str)` +它返回具有指定长度的字符串的左边部分 -## Description + Syntax -它返回具有指定长度的字符串的左边部分 +`VARCHAR strleft(VARCHAR str)` -## Examples +## example ``` mysql> select strleft("Hello doris",5); @@ -18,3 +17,5 @@ mysql> select strleft("Hello doris",5); | Hello | +------------------------+ ``` +##keyword +STRLEFT,STRLEFT diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/strright.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/strright.md index aeea95ee3ada3d..56ebc34020153f 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/strright.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/strright.md @@ -1,14 +1,13 @@ # strright +## description -## Syntax - -`VARCHAR strright(VARCHAR str)` +它返回具有指定长度的字符串的右边部分 -## Description + Syntax -它返回具有指定长度的字符串的右边部分 +`VARCHAR strright(VARCHAR str)` -## Examples +## example ``` mysql> select strright("Hello doris",5); @@ -18,3 +17,5 @@ mysql> select strright("Hello doris",5); | doris | +-------------------------+ ``` +##keyword +STRRIGHT,STRRIGHT diff --git a/docs/documentation/cn/sql-reference/sql-statements/Account Management/CREATE ROLE.md b/docs/documentation/cn/sql-reference/sql-statements/Account Management/CREATE ROLE.md new file mode 100644 index 00000000000000..5af07a5342ba86 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Account Management/CREATE ROLE.md @@ -0,0 +1,19 @@ +# CREATE ROLE +## description + 该语句用户创建一个角色 + + 语法: + CREATE ROLE role1; + + 该语句创建一个无权限的角色,可以后续通过 GRANT 命令赋予该角色权限。 + +## example + + 1. 创建一个角色 + + CREATE ROLE role1; + +## keyword + CREATE, ROLE + + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Account Management/CREATE USER.md b/docs/documentation/cn/sql-reference/sql-statements/Account Management/CREATE USER.md new file mode 100644 index 00000000000000..d3f891c4be4e55 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Account Management/CREATE USER.md @@ -0,0 +1,49 @@ +# CREATE USER +## description + +Syntax: + + CREATE USER user_identity [IDENTIFIED BY 'password'] [DEFAULT ROLE 'role_name'] + + user_identity: + 'user_name'@'host' + +CREATE USER 命令用于创建一个 Doris 用户。在 Doris 中,一个 user_identity 唯一标识一个用户。user_identity 由两部分组成,user_name 和 host,其中 username 为用户名。host 标识用户端连接所在的主机地址。host 部分可以使用 % 进行模糊匹配。如果不指定 host,默认为 '%',即表示该用户可以从任意 host 连接到 Doris。 + +host 部分也可指定为 domain,语法为:'user_name'@['domain'],即使用中括号包围,则 Doris 会认为这个是一个 domain,并尝试解析其 ip 地址。目前仅支持百度内部的 BNS 解析。 + +如果指定了角色(ROLE),则会自动将该角色所拥有的权限赋予新创建的这个用户。如果不指定,则该用户默认没有任何权限。指定的 ROLE 必须已经存在。 + +## example + +1. 创建一个无密码用户(不指定 host,则等价于 jack@'%') + + CREATE USER 'jack'; + +2. 创建一个有密码用户,允许从 '172.10.1.10' 登陆 + + CREATE USER jack@'172.10.1.10' IDENTIFIED BY '123456'; + +3. 为了避免传递明文,用例2也可以使用下面的方式来创建 + + CREATE USER jack@'172.10.1.10' IDENTIFIED BY PASSWORD '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9'; + + 后面加密的内容可以通过PASSWORD()获得到,例如: + + SELECT PASSWORD('123456'); + +4. 创建一个允许从 '192.168' 子网登陆的用户,同时指定其角色为 example_role + + CREATE USER 'jack'@'192.168.%' DEFAULT ROLE 'example_role'; + +5. 创建一个允许从域名 'example_domain' 登陆的用户 + + CREATE USER 'jack'@['example_domain'] IDENTIFIED BY '12345'; + +6. 创建一个用户,并指定一个角色 + + CREATE USER 'jack'@'%' IDENTIFIED BY '12345' DEFAULT ROLE 'my_role'; + +## keyword + CREATE, USER + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Account Management/DROP ROLE.md b/docs/documentation/cn/sql-reference/sql-statements/Account Management/DROP ROLE.md new file mode 100644 index 00000000000000..b7d7e248b96843 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Account Management/DROP ROLE.md @@ -0,0 +1,18 @@ +# DROP ROLE +## description + 该语句用户删除一个角色 + + 语法: + DROP ROLE role1; + + 删除一个角色,不会影响之前属于该角色的用户的权限。仅相当于将该角色与用户解耦。用户已经从该角色中获取到的权限,不会改变。 + +## example + + 1. 删除一个角色 + + DROP ROLE role1; + +## keyword + DROP, ROLE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Account Management/DROP USER.md b/docs/documentation/cn/sql-reference/sql-statements/Account Management/DROP USER.md new file mode 100644 index 00000000000000..3711947d5ce9cf --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Account Management/DROP USER.md @@ -0,0 +1,18 @@ +# DROP USER +## description + +Syntax: + + DROP USER 'user_name' + + DROP USER 命令会删除一个 palo 用户。这里 Doris 不支持删除指定的 user_identity。当删除一个指定用户后,该用户所对应的所有 user_identity 都会被删除。比如之前通过 CREATE USER 语句创建了 jack@'192.%' 以及 jack@['domain'] 两个用户,则在执行 DROP USER 'jack' 后,jack@'192.%' 以及 jack@['domain'] 都将被删除。 + +## example + +1. 删除用户 jack + + DROP USER 'jack' + +## keyword + DROP, USER + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Account Management/GRANT.md b/docs/documentation/cn/sql-reference/sql-statements/Account Management/GRANT.md new file mode 100644 index 00000000000000..725ee855335a03 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Account Management/GRANT.md @@ -0,0 +1,55 @@ +# GRANT +## description + +GRANT 命令用于赋予指定用户或角色指定的权限。 + +Syntax: + + GRANT privilege_list ON db_name[.tbl_name] TO user_identity [ROLE role_name] + + +privilege_list 是需要赋予的权限列表,以逗号分隔。当前 Doris 支持如下权限: + + NODE_PRIV:集群节点操作权限,包括节点上下线等操作,只有 root 用户有该权限,不可赋予其他用户。 + ADMIN_PRIV:除 NODE_PRIV 以外的所有权限。 + GRANT_PRIV: 操作权限的权限。包括创建删除用户、角色,授权和撤权,设置密码等。 + SELECT_PRIV:对指定的库或表的读取权限 + LOAD_PRIV:对指定的库或表的导入权限 + ALTER_PRIV:对指定的库或表的schema变更权限 + CREATE_PRIV:对指定的库或表的创建权限 + DROP_PRIV:对指定的库或表的删除权限 + + 旧版权限中的 ALL 和 READ_WRITE 会被转换成:SELECT_PRIV,LOAD_PRIV,ALTER_PRIV,CREATE_PRIV,DROP_PRIV; + READ_ONLY 会被转换为 SELECT_PRIV。 + +db_name[.tbl_name] 支持以下三种形式: + + 1. *.* 权限可以应用于所有库及其中所有表 + 2. db.* 权限可以应用于指定库下的所有表 + 3. db.tbl 权限可以应用于指定库下的指定表 + + 这里指定的库或表可以是不存在的库和表。 + +user_identity: + + 这里的 user_identity 语法同 CREATE USER。且必须为使用 CREATE USER 创建过的 user_identity。user_identity 中的host可以是域名,如果是域名的话,权限的生效时间可能会有1分钟左右的延迟。 + + 也可以将权限赋予指定的 ROLE,如果指定的 ROLE 不存在,则会自动创建。 + +## example + + 1. 授予所有库和表的权限给用户 + + GRANT SELECT_PRIV ON *.* TO 'jack'@'%'; + + 2. 授予指定库表的权限给用户 + + GRANT SELECT_PRIV,ALTER_PRIVS,LOAD_PRIV ON db1.tbl1 TO 'jack'@'192.8.%'; + + 3. 授予指定库表的权限给角色 + + GRANT LOAD_PRIV ON db1.* TO ROLE 'my_role'; + +## keyword + GRANT + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Account Management/REVOKE.md b/docs/documentation/cn/sql-reference/sql-statements/Account Management/REVOKE.md new file mode 100644 index 00000000000000..bb377051d69829 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Account Management/REVOKE.md @@ -0,0 +1,23 @@ +# REVOKE +## description + + REVOKE 命令用于撤销指定用户或角色指定的权限。 + Syntax: + REVOKE privilege_list ON db_name[.tbl_name] FROM user_identity [ROLE role_name] + + user_identity: + + 这里的 user_identity 语法同 CREATE USER。且必须为使用 CREATE USER 创建过的 user_identity。user_identity 中的host可以是域名,如果是域名的话,权限的撤销时间可能会有1分钟左右的延迟。 + + 也可以撤销指定的 ROLE 的权限,执行的 ROLE 必须存在。 + +## example + + 1. 撤销用户 jack 数据库 testDb 的权限 + + REVOKE SELECT_PRIV ON db1.* FROM 'jack'@'192.%'; + +## keyword + + REVOKE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Account Management/SET PASSWORD.md b/docs/documentation/cn/sql-reference/sql-statements/Account Management/SET PASSWORD.md new file mode 100644 index 00000000000000..27eac65537f586 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Account Management/SET PASSWORD.md @@ -0,0 +1,30 @@ +# SET PASSWORD +## description + +Syntax: + + SET PASSWORD [FOR user_identity] = + [PASSWORD('plain password')]|['hashed password'] + + SET PASSWORD 命令可以用于修改一个用户的登录密码。如果 [FOR user_identity] 字段不存在,那么修改当前用户的密码。 + + 注意这里的 user_identity 必须完全匹配在使用 CREATE USER 创建用户时指定的 user_identity,否则会报错用户不存在。如果不指定 user_identity,则当前用户为 'username'@'ip',这个当前用户,可能无法匹配任何 user_identity。可以通过 SHOW GRANTS 查看当前用户。 + + PASSWORD() 方式输入的是明文密码; 而直接使用字符串,需要传递的是已加密的密码。 + 如果修改其他用户的密码,需要具有管理员权限。 + +## example + +1. 修改当前用户的密码 + + SET PASSWORD = PASSWORD('123456') + SET PASSWORD = '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9' + +2. 修改指定用户密码 + + SET PASSWORD FOR 'jack'@'192.%' = PASSWORD('123456') + SET PASSWORD FOR 'jack'@['domain'] = '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9' + +## keyword + SET, PASSWORD + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Account Management/SET PROPERTY.md b/docs/documentation/cn/sql-reference/sql-statements/Account Management/SET PROPERTY.md new file mode 100644 index 00000000000000..f324ffaacfa786 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Account Management/SET PROPERTY.md @@ -0,0 +1,56 @@ +# SET PROPERTY +## description + + Syntax: + + SET PROPERTY [FOR 'user'] 'key' = 'value' [, 'key' = 'value'] + + 设置用户的属性,包括分配给用户的资源、导入cluster等。这里设置的用户属性,是针对 user 的,而不是 user_identity。即假设通过 CREATE USER 语句创建了两个用户 'jack'@'%' 和 'jack'@'192.%',则使用 SET PROPERTY 语句,只能针对 jack 这个用户,而不是 'jack'@'%' 或 'jack'@'192.%' + + 导入 cluster 仅适用于百度内部用户。 + + key: + + 超级用户权限: + max_user_connections: 最大连接数。 + resource.cpu_share: cpu资源分配。 + load_cluster.{cluster_name}.priority: 为指定的cluster分配优先级,可以为 HIGH 或 NORMAL + + 普通用户权限: + quota.normal: normal级别的资源分配。 + quota.high: high级别的资源分配。 + quota.low: low级别的资源分配。 + + load_cluster.{cluster_name}.hadoop_palo_path: palo使用的hadoop目录,需要存放etl程序及etl生成的中间数据供palo导入。导入完成后会自动清理中间数据,etl程序自动保留下次使用。 + load_cluster.{cluster_name}.hadoop_configs: hadoop的配置,其中fs.default.name、mapred.job.tracker、hadoop.job.ugi必须填写。 + load_cluster.{cluster_name}.hadoop_http_port: hadoop hdfs name node http端口。其中 hdfs 默认为8070,afs 默认 8010。 + default_load_cluster: 默认的导入cluster。 + +## example + + 1. 修改用户 jack 最大连接数为1000 + SET PROPERTY FOR 'jack' 'max_user_connections' = '1000'; + + 2. 修改用户 jack 的cpu_share为1000 + SET PROPERTY FOR 'jack' 'resource.cpu_share' = '1000'; + + 3. 修改 jack 用户的normal组的权重 + SET PROPERTY FOR 'jack' 'quota.normal' = '400'; + + 4. 为用户 jack 添加导入cluster + SET PROPERTY FOR 'jack' + 'load_cluster.{cluster_name}.hadoop_palo_path' = '/user/palo/palo_path', + 'load_cluster.{cluster_name}.hadoop_configs' = 'fs.default.name=hdfs://dpp.cluster.com:port;mapred.job.tracker=dpp.cluster.com:port;hadoop.job.ugi=user,password;mapred.job.queue.name=job_queue_name_in_hadoop;mapred.job.priority=HIGH;'; + + 5. 删除用户 jack 下的导入cluster。 + SET PROPERTY FOR 'jack' 'load_cluster.{cluster_name}' = ''; + + 6. 修改用户 jack 默认的导入cluster + SET PROPERTY FOR 'jack' 'default_load_cluster' = '{cluster_name}'; + + 7. 修改用户 jack 的集群优先级为 HIGH + SET PROPERTY FOR 'jack' 'load_cluster.{cluster_name}.priority' = 'HIGH'; + +## keyword + SET, PROPERTY + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Account Management/SHOW GRANTS.md b/docs/documentation/cn/sql-reference/sql-statements/Account Management/SHOW GRANTS.md new file mode 100644 index 00000000000000..16d65357292ce1 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Account Management/SHOW GRANTS.md @@ -0,0 +1,30 @@ +# SHOW GRANTS +## description + + 该语句用于查看用户权限。 + + 语法: + SHOW [ALL] GRANTS [FOR user_identity]; + + 说明: + 1. SHOW ALL GRANTS 可以查看所有用户的权限。 + 2. 如果指定 user_identity,则查看该指定用户的权限。且该 user_identity 必须为通过 CREATE USER 命令创建的。 + 3. 如果不指定 user_identity,则查看当前用户的权限。 + + +## example + + 1. 查看所有用户权限信息 + + SHOW ALL GRANTS; + + 2. 查看指定 user 的权限 + + SHOW GRANTS FOR jack@'%'; + + 3. 查看当前用户的权限 + + SHOW GRANTS; + +## keyword + SHOW, GRANTS diff --git a/docs/documentation/cn/sql-reference/sql-statements/Account Management/SHOW ROLES.md b/docs/documentation/cn/sql-reference/sql-statements/Account Management/SHOW ROLES.md new file mode 100644 index 00000000000000..1b51f48b0799bd --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Account Management/SHOW ROLES.md @@ -0,0 +1,33 @@ +# SHOW ROLES +## description + 该语句用于展示所有已创建的角色信息,包括角色名称,包含的用户以及权限。 + + 语法: + SHOW ROLES; + +## description + 该语句用户删除一个角色 + + 语法: + DROP ROLE role1; + + 删除一个角色,不会影响之前属于该角色的用户的权限。仅相当于将该角色与用户解耦。用户已经从该角色中获取到的权限,不会改变。 + +## example + + 1. 查看已创建的角色: + + SHOW ROLES; + +## keyword + SHOW,ROLES + +## example + + 1. 删除一个角色 + + DROP ROLE role1; + +## keyword + DROP, ROLE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Account Management/help.md b/docs/documentation/cn/sql-reference/sql-statements/Account Management/help.md deleted file mode 100644 index c3b81a6ee40d6f..00000000000000 --- a/docs/documentation/cn/sql-reference/sql-statements/Account Management/help.md +++ /dev/null @@ -1,335 +0,0 @@ -# CREATE USER -## description - -Syntax: - - CREATE USER user_identity [IDENTIFIED BY 'password'] [DEFAULT ROLE 'role_name'] - - user_identity: - 'user_name'@'host' - -CREATE USER 命令用于创建一个 Doris 用户。在 Doris 中,一个 user_identity 唯一标识一个用户。user_identity 由两部分组成,user_name 和 host,其中 username 为用户名。host 标识用户端连接所在的主机地址。host 部分可以使用 % 进行模糊匹配。如果不指定 host,默认为 '%',即表示该用户可以从任意 host 连接到 Doris。 - -host 部分也可指定为 domain,语法为:'user_name'@['domain'],即使用中括号包围,则 Doris 会认为这个是一个 domain,并尝试解析其 ip 地址。目前仅支持百度内部的 BNS 解析。 - -如果指定了角色(ROLE),则会自动将该角色所拥有的权限赋予新创建的这个用户。如果不指定,则该用户默认没有任何权限。指定的 ROLE 必须已经存在。 - -## example - -1. 创建一个无密码用户(不指定 host,则等价于 jack@'%') - - CREATE USER 'jack'; - -2. 创建一个有密码用户,允许从 '172.10.1.10' 登陆 - - CREATE USER jack@'172.10.1.10' IDENTIFIED BY '123456'; - -3. 为了避免传递明文,用例2也可以使用下面的方式来创建 - - CREATE USER jack@'172.10.1.10' IDENTIFIED BY PASSWORD '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9'; - - 后面加密的内容可以通过PASSWORD()获得到,例如: - - SELECT PASSWORD('123456'); - -4. 创建一个允许从 '192.168' 子网登陆的用户,同时指定其角色为 example_role - - CREATE USER 'jack'@'192.168.%' DEFAULT ROLE 'example_role'; - -5. 创建一个允许从域名 'example_domain' 登陆的用户 - - CREATE USER 'jack'@['example_domain'] IDENTIFIED BY '12345'; - -6. 创建一个用户,并指定一个角色 - - CREATE USER 'jack'@'%' IDENTIFIED BY '12345' DEFAULT ROLE 'my_role'; - -## keyword - CREATE, USER - -# DROP USER -## description - -Syntax: - - DROP USER 'user_name' - - DROP USER 命令会删除一个 palo 用户。这里 Doris 不支持删除指定的 user_identity。当删除一个指定用户后,该用户所对应的所有 user_identity 都会被删除。比如之前通过 CREATE USER 语句创建了 jack@'192.%' 以及 jack@['domain'] 两个用户,则在执行 DROP USER 'jack' 后,jack@'192.%' 以及 jack@['domain'] 都将被删除。 - -## example - -1. 删除用户 jack - - DROP USER 'jack' - -## keyword - DROP, USER - -# SET PASSWORD -## description - -Syntax: - - SET PASSWORD [FOR user_identity] = - [PASSWORD('plain password')]|['hashed password'] - - SET PASSWORD 命令可以用于修改一个用户的登录密码。如果 [FOR user_identity] 字段不存在,那么修改当前用户的密码。 - - 注意这里的 user_identity 必须完全匹配在使用 CREATE USER 创建用户时指定的 user_identity,否则会报错用户不存在。如果不指定 user_identity,则当前用户为 'username'@'ip',这个当前用户,可能无法匹配任何 user_identity。可以通过 SHOW GRANTS 查看当前用户。 - - PASSWORD() 方式输入的是明文密码; 而直接使用字符串,需要传递的是已加密的密码。 - 如果修改其他用户的密码,需要具有管理员权限。 - -## example - -1. 修改当前用户的密码 - - SET PASSWORD = PASSWORD('123456') - SET PASSWORD = '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9' - -2. 修改指定用户密码 - - SET PASSWORD FOR 'jack'@'192.%' = PASSWORD('123456') - SET PASSWORD FOR 'jack'@['domain'] = '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9' - -## keyword - SET, PASSWORD - -# GRANT -## description - -GRANT 命令用于赋予指定用户或角色指定的权限。 - -Syntax: - - GRANT privilege_list ON db_name[.tbl_name] TO user_identity [ROLE role_name] - - -privilege_list 是需要赋予的权限列表,以逗号分隔。当前 Doris 支持如下权限: - - NODE_PRIV:集群节点操作权限,包括节点上下线等操作,只有 root 用户有该权限,不可赋予其他用户。 - ADMIN_PRIV:除 NODE_PRIV 以外的所有权限。 - GRANT_PRIV: 操作权限的权限。包括创建删除用户、角色,授权和撤权,设置密码等。 - SELECT_PRIV:对指定的库或表的读取权限 - LOAD_PRIV:对指定的库或表的导入权限 - ALTER_PRIV:对指定的库或表的schema变更权限 - CREATE_PRIV:对指定的库或表的创建权限 - DROP_PRIV:对指定的库或表的删除权限 - - 旧版权限中的 ALL 和 READ_WRITE 会被转换成:SELECT_PRIV,LOAD_PRIV,ALTER_PRIV,CREATE_PRIV,DROP_PRIV; - READ_ONLY 会被转换为 SELECT_PRIV。 - -db_name[.tbl_name] 支持以下三种形式: - - 1. *.* 权限可以应用于所有库及其中所有表 - 2. db.* 权限可以应用于指定库下的所有表 - 3. db.tbl 权限可以应用于指定库下的指定表 - - 这里指定的库或表可以是不存在的库和表。 - -user_identity: - - 这里的 user_identity 语法同 CREATE USER。且必须为使用 CREATE USER 创建过的 user_identity。user_identity 中的host可以是域名,如果是域名的话,权限的生效时间可能会有1分钟左右的延迟。 - - 也可以将权限赋予指定的 ROLE,如果指定的 ROLE 不存在,则会自动创建。 - -## example - - 1. 授予所有库和表的权限给用户 - - GRANT SELECT_PRIV ON *.* TO 'jack'@'%'; - - 2. 授予指定库表的权限给用户 - - GRANT SELECT_PRIV,ALTER_PRIVS,LOAD_PRIV ON db1.tbl1 TO 'jack'@'192.8.%'; - - 3. 授予指定库表的权限给角色 - - GRANT LOAD_PRIV ON db1.* TO ROLE 'my_role'; - -## keyword - GRANT - -# REVOKE -## description - - REVOKE 命令用于撤销指定用户或角色指定的权限。 - Syntax: - REVOKE privilege_list ON db_name[.tbl_name] FROM user_identity [ROLE role_name] - - user_identity: - - 这里的 user_identity 语法同 CREATE USER。且必须为使用 CREATE USER 创建过的 user_identity。user_identity 中的host可以是域名,如果是域名的话,权限的撤销时间可能会有1分钟左右的延迟。 - - 也可以撤销指定的 ROLE 的权限,执行的 ROLE 必须存在。 - -## example - - 1. 撤销用户 jack 数据库 testDb 的权限 - - REVOKE SELECT_PRIV ON db1.* FROM 'jack'@'192.%'; - -## keyword - - REVOKE - -# SET PROPERTY -## description - - Syntax: - - SET PROPERTY [FOR 'user'] 'key' = 'value' [, 'key' = 'value'] - - 设置用户的属性,包括分配给用户的资源、导入cluster等。这里设置的用户属性,是针对 user 的,而不是 user_identity。即假设通过 CREATE USER 语句创建了两个用户 'jack'@'%' 和 'jack'@'192.%',则使用 SET PROPERTY 语句,只能针对 jack 这个用户,而不是 'jack'@'%' 或 'jack'@'192.%' - - 导入 cluster 仅适用于百度内部用户。 - - key: - - 超级用户权限: - max_user_connections: 最大连接数。 - resource.cpu_share: cpu资源分配。 - load_cluster.{cluster_name}.priority: 为指定的cluster分配优先级,可以为 HIGH 或 NORMAL - - 普通用户权限: - quota.normal: normal级别的资源分配。 - quota.high: high级别的资源分配。 - quota.low: low级别的资源分配。 - - load_cluster.{cluster_name}.hadoop_palo_path: palo使用的hadoop目录,需要存放etl程序及etl生成的中间数据供palo导入。导入完成后会自动清理中间数据,etl程序自动保留下次使用。 - load_cluster.{cluster_name}.hadoop_configs: hadoop的配置,其中fs.default.name、mapred.job.tracker、hadoop.job.ugi必须填写。 - load_cluster.{cluster_name}.hadoop_http_port: hadoop hdfs name node http端口。其中 hdfs 默认为8070,afs 默认 8010。 - default_load_cluster: 默认的导入cluster。 - -## example - - 1. 修改用户 jack 最大连接数为1000 - SET PROPERTY FOR 'jack' 'max_user_connections' = '1000'; - - 2. 修改用户 jack 的cpu_share为1000 - SET PROPERTY FOR 'jack' 'resource.cpu_share' = '1000'; - - 3. 修改 jack 用户的normal组的权重 - SET PROPERTY FOR 'jack' 'quota.normal' = '400'; - - 4. 为用户 jack 添加导入cluster - SET PROPERTY FOR 'jack' - 'load_cluster.{cluster_name}.hadoop_palo_path' = '/user/palo/palo_path', - 'load_cluster.{cluster_name}.hadoop_configs' = 'fs.default.name=hdfs://dpp.cluster.com:port;mapred.job.tracker=dpp.cluster.com:port;hadoop.job.ugi=user,password;mapred.job.queue.name=job_queue_name_in_hadoop;mapred.job.priority=HIGH;'; - - 5. 删除用户 jack 下的导入cluster。 - SET PROPERTY FOR 'jack' 'load_cluster.{cluster_name}' = ''; - - 6. 修改用户 jack 默认的导入cluster - SET PROPERTY FOR 'jack' 'default_load_cluster' = '{cluster_name}'; - - 7. 修改用户 jack 的集群优先级为 HIGH - SET PROPERTY FOR 'jack' 'load_cluster.{cluster_name}.priority' = 'HIGH'; - -## keyword - SET, PROPERTY - -# CREATE ROLE - -## description - 该语句用户创建一个角色 - - 语法: - CREATE ROLE role1; - - 该语句创建一个无权限的角色,可以后续通过 GRANT 命令赋予该角色权限。 - -## example - - 1. 创建一个角色 - - CREATE ROLE role1; - -## keyword - CREATE, ROLE - - -# DROP ROLE - -## description - 该语句用户删除一个角色 - - 语法: - DROP ROLE role1; - - 删除一个角色,不会影响之前属于该角色的用户的权限。仅相当于将该角色与用户解耦。用户已经从该角色中获取到的权限,不会改变。 - -## example - - 1. 删除一个角色 - - DROP ROLE role1; - -## keyword - DROP, ROLE - -# SHOW ROLES - -## description - 该语句用于展示所有已创建的角色信息,包括角色名称,包含的用户以及权限。 - - 语法: - SHOW ROLES; - -## example - - 1. 查看已创建的角色: - - SHOW ROLES; - -## keyword - SHOW,ROLES - -## description - 该语句用户删除一个角色 - - 语法: - DROP ROLE role1; - - 删除一个角色,不会影响之前属于该角色的用户的权限。仅相当于将该角色与用户解耦。用户已经从该角色中获取到的权限,不会改变。 - -## example - - 1. 删除一个角色 - - DROP ROLE role1; - -## keyword - DROP, ROLE - -# SHOW GRANTS - -## description - - 该语句用于查看用户权限。 - - 语法: - SHOW [ALL] GRANTS [FOR user_identity]; - - 说明: - 1. SHOW ALL GRANTS 可以查看所有用户的权限。 - 2. 如果指定 user_identity,则查看该指定用户的权限。且该 user_identity 必须为通过 CREATE USER 命令创建的。 - 3. 如果不指定 user_identity,则查看当前用户的权限。 - - -## example - - 1. 查看所有用户权限信息 - - SHOW ALL GRANTS; - - 2. 查看指定 user 的权限 - - SHOW GRANTS FOR jack@'%'; - - 3. 查看当前用户的权限 - - SHOW GRANTS; - -## keyword - SHOW, GRANTS diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR.md new file mode 100644 index 00000000000000..52e521b254979e --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR.md @@ -0,0 +1,22 @@ +# ADMIN CANCEL REPAIR +## description + + 该语句用于取消以高优先级修复指定表或分区 + + 语法: + + ADMIN CANCEL REPAIR TABLE table_name[ PARTITION (p1,...)]; + + 说明: + + 1. 该语句仅表示系统不再以高优先级修复指定表或分区的分片副本。系统仍会以默认调度方式修复副本。 + +## example + + 1. 取消高优先级修复 + + ADMIN CANCEL REPAIR TABLE tbl PARTITION(p1); + +## keyword + ADMIN,CANCEL,REPAIR + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/admin_repair_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN REPAIR.md similarity index 60% rename from docs/documentation/cn/sql-reference/sql-statements/Administration/admin_repair_stmt.md rename to docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN REPAIR.md index 1549c65b539bdd..2ae44aed4a7412 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Administration/admin_repair_stmt.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN REPAIR.md @@ -25,25 +25,3 @@ ## keyword ADMIN,REPAIR -# ADMIN CANCEL REPAIR -## description - - 该语句用于取消以高优先级修复指定表或分区 - - 语法: - - ADMIN CANCEL REPAIR TABLE table_name[ PARTITION (p1,...)]; - - 说明: - - 1. 该语句仅表示系统不再以高优先级修复指定表或分区的分片副本。系统仍会以默认调度方式修复副本。 - -## example - - 1. 取消高优先级修复 - - ADMIN CANCEL REPAIR TABLE tbl PARTITION(p1); - -## keyword - ADMIN,CANCEL,REPAIR - diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN SET CONFIG.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN SET CONFIG.md new file mode 100644 index 00000000000000..b6e277dcaba36b --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN SET CONFIG.md @@ -0,0 +1,18 @@ +# ADMIN SET CONFIG +## description + + 该语句用于设置集群的配置项(当前仅支持设置FE的配置项)。 + 可设置的配置项,可以通过 AMDIN SHOW FRONTEND CONFIG; 命令查看。 + + 语法: + + ADMIN SET FRONTEND CONFIG ("key" = "value"); + +## example + + 1. 设置 'disable_balance' 为 true + + ADMIN SET FRONTEND CONFIG ("disable_balance" = "true"); + +## keyword + ADMIN,SET,CONFIG diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG.md new file mode 100644 index 00000000000000..c8ee9e6672dc95 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG.md @@ -0,0 +1,27 @@ +# ADMIN SHOW CONFIG +## description + + 该语句用于展示当前集群的配置(当前仅支持展示 FE 的配置项) + + 语法: + + ADMIN SHOW FRONTEND CONFIG; + + 说明: + + 结果中的各列含义如下: + 1. Key: 配置项名称 + 2. Value: 配置项值 + 3. Type: 配置项类型 + 4. IsMutable: 是否可以通过 ADMIN SET CONFIG 命令设置 + 5. MasterOnly: 是否仅适用于 Master FE + 6. Comment: 配置项说明 + +## example + + 1. 查看当前FE节点的配置 + + ADMIN SHOW FRONTEND CONFIG; + +## keyword + ADMIN,SHOW,CONFIG diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION.md new file mode 100644 index 00000000000000..c948e0cf2f238c --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION.md @@ -0,0 +1,26 @@ +# ADMIN SHOW REPLICA DISTRIBUTION +## description + + 该语句用于展示一个表或分区副本分布状态 + + 语法: + + ADMIN SHOW REPLICA DISTRIBUTION FROM [db_name.]tbl_name [PARTITION (p1, ...)]; + + 说明: + + 结果中的 Graph 列以图形的形式展示副本分布比例 + +## example + + 1. 查看表的副本分布 + + ADMIN SHOW REPLICA DISTRIBUTION FROM tbl1; + + 2. 查看表的分区的副本分布 + + ADMIN SHOW REPLICA DISTRIBUTION FROM db1.tbl1 PARTITION(p1, p2); + +## keyword + ADMIN,SHOW,REPLICA,DISTRIBUTION + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS.md new file mode 100644 index 00000000000000..93e00335d37a96 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS.md @@ -0,0 +1,39 @@ +# ADMIN SHOW REPLICA STATUS +## description + + 该语句用于展示一个表或分区的副本状态信息 + + 语法: + + ADMIN SHOW REPLICA STATUS FROM [db_name.]tbl_name [PARTITION (p1, ...)] + [where_clause]; + + where_clause: + WHERE STATUS [!]= "replica_status" + + replica_status: + OK: replica 处于健康状态 + DEAD: replica 所在 Backend 不可用 + VERSION_ERROR: replica 数据版本有缺失 + SCHEMA_ERROR: replica 的 schema hash 不正确 + MISSING: replica 不存在 + +## example + + 1. 查看表全部的副本状态 + + ADMIN SHOW REPLICA STATUS FROM db1.tbl1; + + 2. 查看表某个分区状态为 VERSION_ERROR 的副本 + + ADMIN SHOW REPLICA STATUS FROM tbl1 PARTITION (p1, p2) + WHERE STATUS = "VERSION_ERROR"; + + 3. 查看表所有状态不健康的副本 + + ADMIN SHOW REPLICA STATUS FROM tbl1 + WHERE STATUS != "OK"; + +## keyword + ADMIN,SHOW,REPLICA,STATUS + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/ALTER CLUSTER.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/ALTER CLUSTER.md new file mode 100644 index 00000000000000..b9de2e3a683b9f --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/ALTER CLUSTER.md @@ -0,0 +1,28 @@ +# ALTER CLUSTER +## description + + 该语句用于更新逻辑集群。需要有管理员权限 + + 语法 + + ALTER CLUSTER cluster_name PROPERTIES ("key"="value", ...); + + 1. 缩容,扩容 (根据集群现有的be数目,大则为扩容,小则为缩容), 扩容为同步操作,缩容为异步操作,通过backend的状态可以得知是否缩容完成 + + PROERTIES ("instance_num" = "3") + + instance_num 逻辑集群节点树 + +## example + + 1. 缩容,减少含有3个be的逻辑集群test_cluster的be数为2 + + ALTER CLUSTER test_cluster PROPERTIES ("instance_num"="2"); + + 2. 扩容,增加含有3个be的逻辑集群test_cluster的be数为4 + + ALTER CLUSTER test_cluster PROPERTIES ("instance_num"="4"); + +## keyword + ALTER,CLUSTER + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/ALTER SYSTEM.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/ALTER SYSTEM.md new file mode 100644 index 00000000000000..d6a35691951061 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/ALTER SYSTEM.md @@ -0,0 +1,94 @@ +# ALTER SYSTEM +## description + + 该语句用于操作一个系统内的节点。(仅管理员使用!) + 语法: + 1) 增加节点(不使用多租户功能则按照此方法添加) + ALTER SYSTEM ADD BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; + 2) 增加空闲节点(即添加不属于任何cluster的BACKEND) + ALTER SYSTEM ADD FREE BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; + 3) 增加节点到某个cluster + ALTER SYSTEM ADD BACKEND TO cluster_name "host:heartbeat_port"[,"host:heartbeat_port"...]; + 4) 删除节点 + ALTER SYSTEM DROP BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; + 5) 节点下线 + ALTER SYSTEM DECOMMISSION BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; + 6) 增加Broker + ALTER SYSTEM ADD BROKER broker_name "host:port"[,"host:port"...]; + 7) 减少Broker + ALTER SYSTEM DROP BROKER broker_name "host:port"[,"host:port"...]; + 8) 删除所有Broker + ALTER SYSTEM DROP ALL BROKER broker_name + 9) 设置一个 Load error hub,用于集中展示导入时的错误信息 + ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES ("key" = "value"[, ...]); + + 说明: + 1) host 可以是主机名或者ip地址 + 2) heartbeat_port 为该节点的心跳端口 + 3) 增加和删除节点为同步操作。这两种操作不考虑节点上已有的数据,节点直接从元数据中删除,请谨慎使用。 + 4) 节点下线操作用于安全下线节点。该操作为异步操作。如果成功,节点最终会从元数据中删除。如果失败,则不会完成下线。 + 5) 可以手动取消节点下线操作。详见 CANCEL DECOMMISSION + 6) Load error hub: + 当前支持两种类型的 Hub:Mysql 和 Broker。需在 PROPERTIES 中指定 "type" = "mysql" 或 "type" = "broker"。 + 如果需要删除当前的 load error hub,可以将 type 设为 null。 + 1) 当使用 Mysql 类型时,导入时产生的错误信息将会插入到指定的 mysql 库表中,之后可以通过 show load warnings 语句直接查看错误信息。 + + Mysql 类型的 Hub 需指定以下参数: + host:mysql host + port:mysql port + user:mysql user + password:mysql password + database:mysql database + table:mysql table + + 2) 当使用 Broker 类型时,导入时产生的错误信息会形成一个文件,通过 broker,写入到指定的远端存储系统中。须确保已经部署对应的 broker + Broker 类型的 Hub 需指定以下参数: + broker: broker 的名称 + path: 远端存储路径 + other properties: 其他访问远端存储所必须的信息,比如认证信息等。 + +## example + + 1. 增加一个节点 + ALTER SYSTEM ADD BACKEND "host:port"; + + 2. 增加一个空闲节点 + ALTER SYSTEM ADD FREE BACKEND "host:port"; + + 3. 删除两个节点 + ALTER SYSTEM DROP BACKEND "host1:port", "host2:port"; + + 4. 下线两个节点 + ALTER SYSTEM DECOMMISSION BACKEND "host1:port", "host2:port"; + + 5. 增加两个Hdfs Broker + ALTER SYSTEM ADD BROKER hdfs "host1:port", "host2:port"; + + 6. 添加一个 Mysql 类型的 load error hub + ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES + ("type"= "mysql", + "host" = "192.168.1.17" + "port" = "3306", + "user" = "my_name", + "password" = "my_passwd", + "database" = "doris_load", + "table" = "load_errors" + ); + + 7. 添加一个 Broker 类型的 load error hub + ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES + ("type"= "broker", + "name" = "bos", + "path" = "bos://backup-cmy/logs", + "bos_endpoint" = "http://gz.bcebos.com", + "bos_accesskey" = "069fc278xxxxxx24ddb522", + "bos_secret_accesskey"="700adb0c6xxxxxx74d59eaa980a" + ); + + 8. 删除当前的 load error hub + ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES + ("type"= "null"); + +## keyword + ALTER,SYSTEM,BACKEND,BROKER,FREE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION.md new file mode 100644 index 00000000000000..54d76fa7292ba2 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION.md @@ -0,0 +1,15 @@ +# CANCEL DECOMMISSION +## description + + 该语句用于撤销一个节点下线操作。(仅管理员使用!) + 语法: + CANCEL DECOMMISSION BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; + +## example + + 1. 取消两个节点的下线操作: + CANCEL DECOMMISSION BACKEND "host1:port", "host2:port"; + +## keyword + CANCEL,DECOMMISSION,BACKEND + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/CREATE CLUSTER.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/CREATE CLUSTER.md new file mode 100644 index 00000000000000..6ff866f6c78df7 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/CREATE CLUSTER.md @@ -0,0 +1,36 @@ +# CREATE CLUSTER +## description + + 该语句用于新建逻辑集群 (cluster), 需要管理员权限。如果不使用多租户,直接创建一个名称为default_cluster的cluster。否则创建一个自定义名称的cluster。 + + 语法 + + CREATE CLUSTER [IF NOT EXISTS] cluster_name + + PROPERTIES ("key"="value", ...) + + IDENTIFIED BY 'password' + + 1. PROPERTIES + + 指定逻辑集群的属性 + + PROPERTIES ("instance_num" = "3") + + instance_num 逻辑集群节点树 + + 2. identified by ‘password' 每个逻辑集群含有一个superuser,创建逻辑集群时必须指定其密码 + +## example + + 1. 新建一个含有3个be节点逻辑集群 test_cluster, 并指定其superuser用户密码 + + CREATE CLUSTER test_cluster PROPERTIES("instance_num"="3") IDENTIFIED BY 'test'; + + 2. 新建一个含有3个be节点逻辑集群 default_cluster(不使用多租户), 并指定其superuser用户密码 + + CREATE CLUSTER default_cluster PROPERTIES("instance_num"="3") IDENTIFIED BY 'test'; + +## keyword + CREATE,CLUSTER + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/small_files.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/CREATE FILE.md similarity index 63% rename from docs/documentation/cn/sql-reference/sql-statements/Administration/small_files.md rename to docs/documentation/cn/sql-reference/sql-statements/Administration/CREATE FILE.md index 631c125d75769a..bc3bc395e4e81b 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Administration/small_files.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/CREATE FILE.md @@ -49,55 +49,3 @@ ## keyword CREATE,FILE -# SHOW FILE -## description - - 该语句用于展示一个 database 内创建的文件 - - 语法: - - SHOW FILE [FROM database]; - - 说明: - - FileId: 文件ID,全局唯一 - DbName: 所属数据库名称 - Catalog: 自定义分类 - FileName: 文件名 - FileSize: 文件大小,单位字节 - MD5: 文件的 MD5 - -## example - - 1. 查看数据库 my_database 中已上传的文件 - - SHOW FILE FROM my_database; - -## keyword - SHOW,FILE - -# DROP FILE -## description - - 该语句用于删除一个已上传的文件。 - - 语法: - - DROP FILE "file_name" [FROM database] - [properties] - - 说明: - file_name: 文件名。 - database: 文件归属的某一个 db,如果没有指定,则使用当前 session 的 db。 - properties 支持以下参数: - - catalog: 必须。文件所属分类。 - -## example - - 1. 删除文件 ca.pem - - DROP FILE "ca.pem" properties("catalog" = "kafka"); - -## keyword - DROP,FILE diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/DROP CLUSTER.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/DROP CLUSTER.md new file mode 100644 index 00000000000000..d755037b5e7537 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/DROP CLUSTER.md @@ -0,0 +1,18 @@ +# DROP CLUSTER +## description + + 该语句用于删除逻辑集群,成功删除逻辑集群需要首先删除集群内的db,需要管理员权限 + + 语法 + + DROP CLUSTER [IF EXISTS] cluster_name + +## example + + 删除逻辑集群 test_cluster + + DROP CLUSTER test_cluster; + +## keyword + DROP,CLUSTER + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/DROP FILE.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/DROP FILE.md new file mode 100644 index 00000000000000..205f4a53167f35 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/DROP FILE.md @@ -0,0 +1,25 @@ +# DROP FILE +## description + + 该语句用于删除一个已上传的文件。 + + 语法: + + DROP FILE "file_name" [FROM database] + [properties] + + 说明: + file_name: 文件名。 + database: 文件归属的某一个 db,如果没有指定,则使用当前 session 的 db。 + properties 支持以下参数: + + catalog: 必须。文件所属分类。 + +## example + + 1. 删除文件 ca.pem + + DROP FILE "ca.pem" properties("catalog" = "kafka"); + +## keyword + DROP,FILE diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/ENTER.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/ENTER.md new file mode 100644 index 00000000000000..cf21a7e041ad55 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/ENTER.md @@ -0,0 +1,18 @@ +# ENTER +## description + + 该语句用于进入一个逻辑集群, 所有创建用户、创建数据库都需要在一个逻辑集群内执行,创建后并且隶属于这个逻 + + 辑集群,需要管理员权限 + + ENTER cluster_name + +## example + + 1. 进入逻辑集群test_cluster + + ENTER test_cluster; + +## keyword + ENTER + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/LINK DATABASE.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/LINK DATABASE.md new file mode 100644 index 00000000000000..580d0a952dd884 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/LINK DATABASE.md @@ -0,0 +1,24 @@ +# LINK DATABASE +## description + + 该语句用户链接一个逻辑集群的数据库到另外一个逻辑集群, 一个数据库只允许同时被链接一次,删除链接的数据库 + + 并不会删除数据,并且被链接的数据库不能被删除, 需要管理员权限 + + 语法 + + LINK DATABASE src_cluster_name.src_db_name des_cluster_name.des_db_name + +## example + + 1. 链接test_clusterA中的test_db到test_clusterB,并命名为link_test_db + + LINK DATABASE test_clusterA.test_db test_clusterB.link_test_db; + + 2. 删除链接的数据库link_test_db + + DROP DATABASE link_test_db; + +## keyword + LINK,DATABASE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/MIGRATE DATABASE.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/MIGRATE DATABASE.md new file mode 100644 index 00000000000000..fce717142514c9 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/MIGRATE DATABASE.md @@ -0,0 +1,20 @@ +# MIGRATE DATABASE +## description + + 该语句用于迁移一个逻辑集群的数据库到另外一个逻辑集群,执行此操作前数据库必须已经处于链接状态, 需要管理 + + 员权限 + + 语法 + + MIGRATE DATABASE src_cluster_name.src_db_name des_cluster_name.des_db_name + +## example + + 1. 迁移test_clusterA中的test_db到test_clusterB + + MIGRATE DATABASE test_clusterA.test_db test_clusterB.link_test_db; + +## keyword + MIGRATE,DATABASE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW BACKENDS.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW BACKENDS.md new file mode 100644 index 00000000000000..ae96e8d46e7163 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW BACKENDS.md @@ -0,0 +1,22 @@ +# SHOW BACKENDS +## description + 该语句用于查看 cluster 内的 BE 节点 + 语法: + SHOW BACKENDS; + + 说明: + 1. LastStartTime 表示最近一次 BE 启动时间。 + 2. LastHeartbeat 表示最近一次心跳。 + 3. Alive 表示节点是否存活。 + 4. SystemDecommissioned 为 true 表示节点正在安全下线中。 + 5. ClusterDecommissioned 为 true 表示节点正在冲当前cluster中下线。 + 6. TabletNum 表示该节点上分片数量。 + 7. DataUsedCapacity 表示实际用户数据所占用的空间。 + 8. AvailCapacity 表示磁盘的可使用空间。 + 9. TotalCapacity 表示总磁盘空间。TotalCapacity = AvailCapacity + DataUsedCapacity + 其他非用户数据文件占用空间。 + 10. UsedPct 表示磁盘已使用量百分比。 + 11. ErrMsg 用于显示心跳失败时的错误信息。 + +## keyword + SHOW, BACKENDS + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW BROKER.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW BROKER.md new file mode 100644 index 00000000000000..82bf99bca13f79 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW BROKER.md @@ -0,0 +1,15 @@ +# SHOW BROKER +## description + 该语句用于查看当前存在的 broker + 语法: + SHOW BROKER; + + 说明: + 1. LastStartTime 表示最近一次 BE 启动时间。 + 2. LastHeartbeat 表示最近一次心跳。 + 3. Alive 表示节点是否存活。 + 4. ErrMsg 用于显示心跳失败时的错误信息。 + +## keyword + SHOW, BROKER + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW FILE.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW FILE.md new file mode 100644 index 00000000000000..b25b72690da94a --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW FILE.md @@ -0,0 +1,27 @@ +# SHOW FILE +## description + + 该语句用于展示一个 database 内创建的文件 + + 语法: + + SHOW FILE [FROM database]; + + 说明: + + FileId: 文件ID,全局唯一 + DbName: 所属数据库名称 + Catalog: 自定义分类 + FileName: 文件名 + FileSize: 文件大小,单位字节 + MD5: 文件的 MD5 + +## example + + 1. 查看数据库 my_database 中已上传的文件 + + SHOW FILE FROM my_database; + +## keyword + SHOW,FILE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW FRONTENDS.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW FRONTENDS.md new file mode 100644 index 00000000000000..1b96368b6325e5 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW FRONTENDS.md @@ -0,0 +1,18 @@ +# SHOW FRONTENDS +## description + 该语句用于查看 FE 节点 + 语法: + SHOW FRONTENDS; + + 说明: + 1. name 表示该 FE 节点在 bdbje 中的名称。 + 2. Join 为 true 表示该节点曾经加入过集群。但不代表当前还在集群内(可能已失联) + 3. Alive 表示节点是否存活。 + 4. ReplayedJournalId 表示该节点当前已经回放的最大元数据日志id。 + 5. LastHeartbeat 是最近一次心跳。 + 6. IsHelper 表示该节点是否是 bdbje 中的 helper 节点。 + 7. ErrMsg 用于显示心跳失败时的错误信息。 + +## keyword + SHOW, FRONTENDS + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW MIGRATIONS.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW MIGRATIONS.md new file mode 100644 index 00000000000000..87cb4dc8302f93 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Administration/SHOW MIGRATIONS.md @@ -0,0 +1,12 @@ +# SHOW MIGRATIONS +## description + + 该语句用于查看数据库迁移的进度 + + 语法 + + SHOW MIGRATIONS + +## keyword + SHOW,MIGRATIONS + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/admin_show_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/admin_show_stmt.md deleted file mode 100644 index 1ee64b1e5b6808..00000000000000 --- a/docs/documentation/cn/sql-reference/sql-statements/Administration/admin_show_stmt.md +++ /dev/null @@ -1,92 +0,0 @@ -# ADMIN SHOW REPLICA STATUS -## description - - 该语句用于展示一个表或分区的副本状态信息 - - 语法: - - ADMIN SHOW REPLICA STATUS FROM [db_name.]tbl_name [PARTITION (p1, ...)] - [where_clause]; - - where_clause: - WHERE STATUS [!]= "replica_status" - - replica_status: - OK: replica 处于健康状态 - DEAD: replica 所在 Backend 不可用 - VERSION_ERROR: replica 数据版本有缺失 - SCHEMA_ERROR: replica 的 schema hash 不正确 - MISSING: replica 不存在 - -## example - - 1. 查看表全部的副本状态 - - ADMIN SHOW REPLICA STATUS FROM db1.tbl1; - - 2. 查看表某个分区状态为 VERSION_ERROR 的副本 - - ADMIN SHOW REPLICA STATUS FROM tbl1 PARTITION (p1, p2) - WHERE STATUS = "VERSION_ERROR"; - - 3. 查看表所有状态不健康的副本 - - ADMIN SHOW REPLICA STATUS FROM tbl1 - WHERE STATUS != "OK"; - -## keyword - ADMIN,SHOW,REPLICA,STATUS - -# ADMIN SHOW REPLICA DISTRIBUTION -## description - - 该语句用于展示一个表或分区副本分布状态 - - 语法: - - ADMIN SHOW REPLICA DISTRIBUTION FROM [db_name.]tbl_name [PARTITION (p1, ...)]; - - 说明: - - 结果中的 Graph 列以图形的形式展示副本分布比例 - -## example - - 1. 查看表的副本分布 - - ADMIN SHOW REPLICA DISTRIBUTION FROM tbl1; - - 2. 查看表的分区的副本分布 - - ADMIN SHOW REPLICA DISTRIBUTION FROM db1.tbl1 PARTITION(p1, p2); - -## keyword - ADMIN,SHOW,REPLICA,DISTRIBUTION - -# ADMIN SHOW CONFIG -## description - - 该语句用于展示当前集群的配置(当前仅支持展示 FE 的配置项) - - 语法: - - ADMIN SHOW FRONTEND CONFIG; - - 说明: - - 结果中的各列含义如下: - 1. Key: 配置项名称 - 2. Value: 配置项值 - 3. Type: 配置项类型 - 4. IsMutable: 是否可以通过 ADMIN SET CONFIG 命令设置 - 5. MasterOnly: 是否仅适用于 Master FE - 6. Comment: 配置项说明 - -## example - - 1. 查看当前FE节点的配置 - - ADMIN SHOW FRONTEND CONFIG; - -## keyword - ADMIN,SHOW,CONFIG diff --git a/docs/documentation/cn/sql-reference/sql-statements/Administration/admin_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Administration/admin_stmt.md deleted file mode 100644 index 97ae32fdd5ea75..00000000000000 --- a/docs/documentation/cn/sql-reference/sql-statements/Administration/admin_stmt.md +++ /dev/null @@ -1,338 +0,0 @@ -# ALTER SYSTEM -## description - - 该语句用于操作一个系统内的节点。(仅管理员使用!) - 语法: - 1) 增加节点(不使用多租户功能则按照此方法添加) - ALTER SYSTEM ADD BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; - 2) 增加空闲节点(即添加不属于任何cluster的BACKEND) - ALTER SYSTEM ADD FREE BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; - 3) 增加节点到某个cluster - ALTER SYSTEM ADD BACKEND TO cluster_name "host:heartbeat_port"[,"host:heartbeat_port"...]; - 4) 删除节点 - ALTER SYSTEM DROP BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; - 5) 节点下线 - ALTER SYSTEM DECOMMISSION BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; - 6) 增加Broker - ALTER SYSTEM ADD BROKER broker_name "host:port"[,"host:port"...]; - 7) 减少Broker - ALTER SYSTEM DROP BROKER broker_name "host:port"[,"host:port"...]; - 8) 删除所有Broker - ALTER SYSTEM DROP ALL BROKER broker_name - 9) 设置一个 Load error hub,用于集中展示导入时的错误信息 - ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES ("key" = "value"[, ...]); - - 说明: - 1) host 可以是主机名或者ip地址 - 2) heartbeat_port 为该节点的心跳端口 - 3) 增加和删除节点为同步操作。这两种操作不考虑节点上已有的数据,节点直接从元数据中删除,请谨慎使用。 - 4) 节点下线操作用于安全下线节点。该操作为异步操作。如果成功,节点最终会从元数据中删除。如果失败,则不会完成下线。 - 5) 可以手动取消节点下线操作。详见 CANCEL DECOMMISSION - 6) Load error hub: - 当前支持两种类型的 Hub:Mysql 和 Broker。需在 PROPERTIES 中指定 "type" = "mysql" 或 "type" = "broker"。 - 如果需要删除当前的 load error hub,可以将 type 设为 null。 - 1) 当使用 Mysql 类型时,导入时产生的错误信息将会插入到指定的 mysql 库表中,之后可以通过 show load warnings 语句直接查看错误信息。 - - Mysql 类型的 Hub 需指定以下参数: - host:mysql host - port:mysql port - user:mysql user - password:mysql password - database:mysql database - table:mysql table - - 2) 当使用 Broker 类型时,导入时产生的错误信息会形成一个文件,通过 broker,写入到指定的远端存储系统中。须确保已经部署对应的 broker - Broker 类型的 Hub 需指定以下参数: - broker: broker 的名称 - path: 远端存储路径 - other properties: 其他访问远端存储所必须的信息,比如认证信息等。 - -## example - - 1. 增加一个节点 - ALTER SYSTEM ADD BACKEND "host:port"; - - 2. 增加一个空闲节点 - ALTER SYSTEM ADD FREE BACKEND "host:port"; - - 3. 删除两个节点 - ALTER SYSTEM DROP BACKEND "host1:port", "host2:port"; - - 4. 下线两个节点 - ALTER SYSTEM DECOMMISSION BACKEND "host1:port", "host2:port"; - - 5. 增加两个Hdfs Broker - ALTER SYSTEM ADD BROKER hdfs "host1:port", "host2:port"; - - 6. 添加一个 Mysql 类型的 load error hub - ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES - ("type"= "mysql", - "host" = "192.168.1.17" - "port" = "3306", - "user" = "my_name", - "password" = "my_passwd", - "database" = "doris_load", - "table" = "load_errors" - ); - - 7. 添加一个 Broker 类型的 load error hub - ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES - ("type"= "broker", - "name" = "bos", - "path" = "bos://backup-cmy/logs", - "bos_endpoint" = "http://gz.bcebos.com", - "bos_accesskey" = "069fc278xxxxxx24ddb522", - "bos_secret_accesskey"="700adb0c6xxxxxx74d59eaa980a" - ); - - 8. 删除当前的 load error hub - ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES - ("type"= "null"); - -## keyword - ALTER,SYSTEM,BACKEND,BROKER,FREE - -# CANCEL DECOMMISSION -## description - - 该语句用于撤销一个节点下线操作。(仅管理员使用!) - 语法: - CANCEL DECOMMISSION BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; - -## example - - 1. 取消两个节点的下线操作: - CANCEL DECOMMISSION BACKEND "host1:port", "host2:port"; - -## keyword - CANCEL,DECOMMISSION,BACKEND - -# CREATE CLUSTER -## description - - 该语句用于新建逻辑集群 (cluster), 需要管理员权限。如果不使用多租户,直接创建一个名称为default_cluster的cluster。否则创建一个自定义名称的cluster。 - - 语法 - - CREATE CLUSTER [IF NOT EXISTS] cluster_name - - PROPERTIES ("key"="value", ...) - - IDENTIFIED BY 'password' - - 1. PROPERTIES - - 指定逻辑集群的属性 - - PROPERTIES ("instance_num" = "3") - - instance_num 逻辑集群节点树 - - 2. identified by ‘password' 每个逻辑集群含有一个superuser,创建逻辑集群时必须指定其密码 - -## example - - 1. 新建一个含有3个be节点逻辑集群 test_cluster, 并指定其superuser用户密码 - - CREATE CLUSTER test_cluster PROPERTIES("instance_num"="3") IDENTIFIED BY 'test'; - - 2. 新建一个含有3个be节点逻辑集群 default_cluster(不使用多租户), 并指定其superuser用户密码 - - CREATE CLUSTER default_cluster PROPERTIES("instance_num"="3") IDENTIFIED BY 'test'; - -## keyword - CREATE,CLUSTER - -# ALTER CLUSTER -## description - - 该语句用于更新逻辑集群。需要有管理员权限 - - 语法 - - ALTER CLUSTER cluster_name PROPERTIES ("key"="value", ...); - - 1. 缩容,扩容 (根据集群现有的be数目,大则为扩容,小则为缩容), 扩容为同步操作,缩容为异步操作,通过backend的状态可以得知是否缩容完成 - - PROERTIES ("instance_num" = "3") - - instance_num 逻辑集群节点树 - -## example - - 1. 缩容,减少含有3个be的逻辑集群test_cluster的be数为2 - - ALTER CLUSTER test_cluster PROPERTIES ("instance_num"="2"); - - 2. 扩容,增加含有3个be的逻辑集群test_cluster的be数为4 - - ALTER CLUSTER test_cluster PROPERTIES ("instance_num"="4"); - -## keyword - ALTER,CLUSTER - -# DROP CLUSTER -## description - - 该语句用于删除逻辑集群,成功删除逻辑集群需要首先删除集群内的db,需要管理员权限 - - 语法 - - DROP CLUSTER [IF EXISTS] cluster_name - -## example - - 删除逻辑集群 test_cluster - - DROP CLUSTER test_cluster; - -## keyword - DROP,CLUSTER - -# LINK DATABASE -## description - - 该语句用户链接一个逻辑集群的数据库到另外一个逻辑集群, 一个数据库只允许同时被链接一次,删除链接的数据库 - - 并不会删除数据,并且被链接的数据库不能被删除, 需要管理员权限 - - 语法 - - LINK DATABASE src_cluster_name.src_db_name des_cluster_name.des_db_name - -## example - - 1. 链接test_clusterA中的test_db到test_clusterB,并命名为link_test_db - - LINK DATABASE test_clusterA.test_db test_clusterB.link_test_db; - - 2. 删除链接的数据库link_test_db - - DROP DATABASE link_test_db; - -## keyword - LINK,DATABASE - -# MIGRATE DATABASE -## description - - 该语句用于迁移一个逻辑集群的数据库到另外一个逻辑集群,执行此操作前数据库必须已经处于链接状态, 需要管理 - - 员权限 - - 语法 - - MIGRATE DATABASE src_cluster_name.src_db_name des_cluster_name.des_db_name - -## example - - 1. 迁移test_clusterA中的test_db到test_clusterB - - MIGRATE DATABASE test_clusterA.test_db test_clusterB.link_test_db; - -## keyword - MIGRATE,DATABASE - -# SHOW MIGRATIONS -## description - - 该语句用于查看数据库迁移的进度 - - 语法 - - SHOW MIGRATIONS - -## keyword - SHOW,MIGRATIONS - -# ENTER -## description - - 该语句用于进入一个逻辑集群, 所有创建用户、创建数据库都需要在一个逻辑集群内执行,创建后并且隶属于这个逻 - - 辑集群,需要管理员权限 - - ENTER cluster_name - -## example - - 1. 进入逻辑集群test_cluster - - ENTER test_cluster; - -## keyword - ENTER - -# SHOW BACKENDS -## description - 该语句用于查看 cluster 内的 BE 节点 - 语法: - SHOW BACKENDS; - - 说明: - 1. LastStartTime 表示最近一次 BE 启动时间。 - 2. LastHeartbeat 表示最近一次心跳。 - 3. Alive 表示节点是否存活。 - 4. SystemDecommissioned 为 true 表示节点正在安全下线中。 - 5. ClusterDecommissioned 为 true 表示节点正在冲当前cluster中下线。 - 6. TabletNum 表示该节点上分片数量。 - 7. DataUsedCapacity 表示实际用户数据所占用的空间。 - 8. AvailCapacity 表示磁盘的可使用空间。 - 9. TotalCapacity 表示总磁盘空间。TotalCapacity = AvailCapacity + DataUsedCapacity + 其他非用户数据文件占用空间。 - 10. UsedPct 表示磁盘已使用量百分比。 - 11. ErrMsg 用于显示心跳失败时的错误信息。 - -## keyword - SHOW, BACKENDS - -# SHOW FRONTENDS -## description - 该语句用于查看 FE 节点 - 语法: - SHOW FRONTENDS; - - 说明: - 1. name 表示该 FE 节点在 bdbje 中的名称。 - 2. Join 为 true 表示该节点曾经加入过集群。但不代表当前还在集群内(可能已失联) - 3. Alive 表示节点是否存活。 - 4. ReplayedJournalId 表示该节点当前已经回放的最大元数据日志id。 - 5. LastHeartbeat 是最近一次心跳。 - 6. IsHelper 表示该节点是否是 bdbje 中的 helper 节点。 - 7. ErrMsg 用于显示心跳失败时的错误信息。 - -## keyword - SHOW, FRONTENDS - -# SHOW BROKER -## description - 该语句用于查看当前存在的 broker - 语法: - SHOW BROKER; - - 说明: - 1. LastStartTime 表示最近一次 BE 启动时间。 - 2. LastHeartbeat 表示最近一次心跳。 - 3. Alive 表示节点是否存活。 - 4. ErrMsg 用于显示心跳失败时的错误信息。 - -## keyword - SHOW, BROKER - -# ADMIN SET CONFIG -## description - - 该语句用于设置集群的配置项(当前仅支持设置FE的配置项)。 - 可设置的配置项,可以通过 AMDIN SHOW FRONTEND CONFIG; 命令查看。 - - 语法: - - ADMIN SET FRONTEND CONFIG ("key" = "value"); - -## example - - 1. 设置 'disable_balance' 为 true - - ADMIN SET FRONTEND CONFIG ("disable_balance" = "true"); - -## keyword - ADMIN,SET,CONFIG diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/ALTER DATABASE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/ALTER DATABASE.md new file mode 100644 index 00000000000000..a020eceff735f3 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/ALTER DATABASE.md @@ -0,0 +1,29 @@ +# ALTER DATABASE +## description + 该语句用于设置指定数据库的属性。(仅管理员使用) + 语法: + 1) 设置数据库数据量配额,单位为B/K/KB/M/MB/G/GB/T/TB/P/PB + ALTER DATABASE db_name SET DATA QUOTA quota; + + 2) 重命名数据库 + ALTER DATABASE db_name RENAME new_db_name; + + 说明: + 重命名数据库后,如需要,请使用 REVOKE 和 GRANT 命令修改相应的用户权限。 + +## example + 1. 设置指定数据库数据量配额 + ALTER DATABASE example_db SET DATA QUOTA 10995116277760; + 上述单位为字节,等价于 + ALTER DATABASE example_db SET DATA QUOTA 10T; + + ALTER DATABASE example_db SET DATA QUOTA 100G; + + ALTER DATABASE example_db SET DATA QUOTA 200M; + + 2. 将数据库 example_db 重命名为 example_db2 + ALTER DATABASE example_db RENAME example_db2; + +## keyword + ALTER,DATABASE,RENAME + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/ALTER TABLE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/ALTER TABLE.md new file mode 100644 index 00000000000000..e6a8099b18cef0 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/ALTER TABLE.md @@ -0,0 +1,240 @@ +# ALTER TABLE +## description + 该语句用于对已有的 table 进行修改。如果没有指定 rollup index,默认操作 base index。 + 该语句分为三种操作类型: schema change 、rollup 、partition + 这三种操作类型不能同时出现在一条 ALTER TABLE 语句中。 + 其中 schema change 和 rollup 是异步操作,任务提交成功则返回。之后可使用 SHOW ALTER 命令查看进度。 + partition 是同步操作,命令返回表示执行完毕。 + + 语法: + ALTER TABLE [database.]table + alter_clause1[, alter_clause2, ...]; + + alter_clause 分为 partition 、rollup、schema change 和 rename 四种。 + + partition 支持如下几种修改方式 + 1. 增加分区 + 语法: + ADD PARTITION [IF NOT EXISTS] partition_name VALUES LESS THAN [MAXVALUE|("value1")] ["key"="value"] + [DISTRIBUTED BY HASH (k1[,k2 ...]) [BUCKETS num]] + 注意: + 1) 分区为左闭右开区间,用户指定右边界,系统自动确定左边界 + 2) 如果没有指定分桶方式,则自动使用建表使用的分桶方式 + 3) 如指定分桶方式,只能修改分桶数,不可修改分桶方式或分桶列 + 4) ["key"="value"] 部分可以设置分区的一些属性,具体说明见 CREATE TABLE + + 2. 删除分区 + 语法: + DROP PARTITION [IF EXISTS] partition_name + 注意: + 1) 使用分区方式的表至少要保留一个分区。 + 2) 执行 DROP PARTITION 一段时间内,可以通过 RECOVER 语句恢复被删除的 partition。详见 RECOVER 语句 + + 3. 修改分区属性 + 语法: + MODIFY PARTITION partition_name SET ("key" = "value", ...) + 说明: + 1) 当前支持修改分区的 storage_medium、storage_cooldown_time 和 replication_num 三个属性。 + 2) 对于单分区表,partition_name 同表名。 + + rollup 支持如下几种创建方式: + 1. 创建 rollup index + 语法: + ADD ROLLUP rollup_name (column_name1, column_name2, ...) + [FROM from_index_name] + [PROPERTIES ("key"="value", ...)] + 注意: + 1) 如果没有指定 from_index_name,则默认从 base index 创建 + 2) rollup 表中的列必须是 from_index 中已有的列 + 3) 在 properties 中,可以指定存储格式。具体请参阅 CREATE TABLE + + 2. 删除 rollup index + 语法: + DROP ROLLUP rollup_name + [PROPERTIES ("key"="value", ...)] + 注意: + 1) 不能删除 base index + 2) 执行 DROP ROLLUP 一段时间内,可以通过 RECOVER 语句恢复被删除的 rollup index。详见 RECOVER 语句 + + + schema change 支持如下几种修改方式: + 1. 向指定 index 的指定位置添加一列 + 语法: + ADD COLUMN column_name column_type [KEY | agg_type] [DEFAULT "default_value"] + [AFTER column_name|FIRST] + [TO rollup_index_name] + [PROPERTIES ("key"="value", ...)] + 注意: + 1) 聚合模型如果增加 value 列,需要指定 agg_type + 2) 非聚合模型如果增加key列,需要指定KEY关键字 + 3) 不能在 rollup index 中增加 base index 中已经存在的列 + 如有需要,可以重新创建一个 rollup index) + + 2. 向指定 index 添加多列 + 语法: + ADD COLUMN (column_name1 column_type [KEY | agg_type] DEFAULT "default_value", ...) + [TO rollup_index_name] + [PROPERTIES ("key"="value", ...)] + 注意: + 1) 聚合模型如果增加 value 列,需要指定agg_type + 2) 非聚合模型如果增加key列,需要指定KEY关键字 + 3) 不能在 rollup index 中增加 base index 中已经存在的列 + (如有需要,可以重新创建一个 rollup index) + + 3. 从指定 index 中删除一列 + 语法: + DROP COLUMN column_name + [FROM rollup_index_name] + 注意: + 1) 不能删除分区列 + 2) 如果是从 base index 中删除列,则如果 rollup index 中包含该列,也会被删除 + + 4. 修改指定 index 的列类型以及列位置 + 语法: + MODIFY COLUMN column_name column_type [KEY | agg_type] [NULL | NOT NULL] [DEFAULT "default_value"] + [AFTER column_name|FIRST] + [FROM rollup_index_name] + [PROPERTIES ("key"="value", ...)] + 注意: + 1) 聚合模型如果修改 value 列,需要指定 agg_type + 2) 非聚合类型如果修改key列,需要指定KEY关键字 + 3) 只能修改列的类型,列的其他属性维持原样(即其他属性需在语句中按照原属性显式的写出,参见 example 8) + 4) 分区列不能做任何修改 + 5) 目前支持以下类型的转换(精度损失由用户保证) + TINYINT/SMALLINT/INT/BIGINT 转换成 TINYINT/SMALLINT/INT/BIGINT/DOUBLE。 + LARGEINT 转换成 DOUBLE + VARCHAR 支持修改最大长度 + 6) 不支持从NULL转为NOT NULL + + 5. 对指定 index 的列进行重新排序 + 语法: + ORDER BY (column_name1, column_name2, ...) + [FROM rollup_index_name] + [PROPERTIES ("key"="value", ...)] + 注意: + 1) index 中的所有列都要写出来 + 2) value 列在 key 列之后 + + 6. 修改table的属性,目前支持修改bloom filter列和colocate_with 属性 + 语法: + PROPERTIES ("key"="value") + 注意: + 也可以合并到上面的schema change操作中来修改,见下面例子 + + + rename 支持对以下名称进行修改: + 1. 修改表名 + 语法: + RENAME new_table_name; + + 2. 修改 rollup index 名称 + 语法: + RENAME ROLLUP old_rollup_name new_rollup_name; + + 3. 修改 partition 名称 + 语法: + RENAME PARTITION old_partition_name new_partition_name; + +## example + [partition] + 1. 增加分区, 现有分区 [MIN, 2013-01-01),增加分区 [2013-01-01, 2014-01-01),使用默认分桶方式 + ALTER TABLE example_db.my_table + ADD PARTITION p1 VALUES LESS THAN ("2014-01-01"); + + 2. 增加分区,使用新的分桶数 + ALTER TABLE example_db.my_table + ADD PARTITION p1 VALUES LESS THAN ("2015-01-01") + DISTRIBUTED BY HASH(k1) BUCKETS 20; + + 3. 删除分区 + ALTER TABLE example_db.my_table + DROP PARTITION p1; + + [rollup] + 1. 创建 index: example_rollup_index,基于 base index(k1,k2,k3,v1,v2)。列式存储。 + ALTER TABLE example_db.my_table + ADD ROLLUP example_rollup_index(k1, k3, v1, v2) + PROPERTIES("storage_type"="column"); + + 2. 创建 index: example_rollup_index2,基于 example_rollup_index(k1,k3,v1,v2) + ALTER TABLE example_db.my_table + ADD ROLLUP example_rollup_index2 (k1, v1) + FROM example_rollup_index; + + 3. 删除 index: example_rollup_index2 + ALTER TABLE example_db.my_table + DROP ROLLUP example_rollup_index2; + + [schema change] + 1. 向 example_rollup_index 的 col1 后添加一个key列 new_col(非聚合模型) + ALTER TABLE example_db.my_table + ADD COLUMN new_col INT KEY DEFAULT "0" AFTER col1 + TO example_rollup_index; + + 2. 向example_rollup_index的col1后添加一个value列new_col(非聚合模型) + ALTER TABLE example_db.my_table + ADD COLUMN new_col INT DEFAULT "0" AFTER col1 + TO example_rollup_index; + + 3. 向example_rollup_index的col1后添加一个key列new_col(聚合模型) + ALTER TABLE example_db.my_table + ADD COLUMN new_col INT DEFAULT "0" AFTER col1 + TO example_rollup_index; + + 4. 向example_rollup_index的col1后添加一个value列new_col SUM聚合类型(聚合模型) + ALTER TABLE example_db.my_table + ADD COLUMN new_col INT SUM DEFAULT "0" AFTER col1 + TO example_rollup_index; + + 5. 向 example_rollup_index 添加多列(聚合模型) + ALTER TABLE example_db.my_table + ADD COLUMN (col1 INT DEFAULT "1", col2 FLOAT SUM DEFAULT "2.3") + TO example_rollup_index; + + 6. 从 example_rollup_index 删除一列 + ALTER TABLE example_db.my_table + DROP COLUMN col2 + FROM example_rollup_index; + + 7. 修改 base index 的 col1 列的类型为 BIGINT,并移动到 col2 列后面 + ALTER TABLE example_db.my_table + MODIFY COLUMN col1 BIGINT DEFAULT "1" AFTER col2; + + 8. 修改 base index 的 val1 列最大长度。原 val1 为 (val1 VARCHAR(32) REPLACE DEFAULT "abc") + ALTER TABLE example_db.my_table + MODIFY COLUMN val1 VARCHAR(64) REPLACE DEFAULT "abc"; + + 9. 重新排序 example_rollup_index 中的列(设原列顺序为:k1,k2,k3,v1,v2) + ALTER TABLE example_db.my_table + ORDER BY (k3,k1,k2,v2,v1) + FROM example_rollup_index; + + 10. 同时执行两种操作 + ALTER TABLE example_db.my_table + ADD COLUMN v2 INT MAX DEFAULT "0" AFTER k2 TO example_rollup_index, + ORDER BY (k3,k1,k2,v2,v1) FROM example_rollup_index; + + 11. 修改表的 bloom filter 列 + ALTER TABLE example_db.my_table SET ("bloom_filter_columns"="k1,k2,k3"); + + 也可以合并到上面的 schema change 操作中(注意多子句的语法有少许区别) + ALTER TABLE example_db.my_table + DROP COLUMN col2 + PROPERTIES ("bloom_filter_columns"="k1,k2,k3"); + + 12. 修改表的Colocate 属性 + ALTER TABLE example_db.my_table set ("colocate_with"="t1"); + + [rename] + 1. 将名为 table1 的表修改为 table2 + ALTER TABLE table1 RENAME table2; + + 2. 将表 example_table 中名为 rollup1 的 rollup index 修改为 rollup2 + ALTER TABLE example_table RENAME ROLLUP rollup1 rollup2; + + 3. 将表 example_table 中名为 p1 的 partition 修改为 p2 + ALTER TABLE example_table RENAME PARTITION p1 p2; + +## keyword + ALTER,TABLE,ROLLUP,COLUMN,PARTITION,RENAME + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/BACKUP.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/BACKUP.md new file mode 100644 index 00000000000000..19c5137ce1b42e --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/BACKUP.md @@ -0,0 +1,39 @@ +# BACKUP +## description + 该语句用于备份指定数据库下的数据。该命令为异步操作。提交成功后,需通过 SHOW BACKUP 命令查看进度。仅支持备份 OLAP 类型的表。 + 语法: + BACKUP SNAPSHOT [db_name].{snapshot_name} + TO `repository_name` + ON ( + `table_name` [PARTITION (`p1`, ...)], + ... + ) + PROPERTIES ("key"="value", ...); + + 说明: + 1. 同一数据库下只能有一个正在执行的 BACKUP 或 RESTORE 任务。 + 2. ON 子句中标识需要备份的表和分区。如果不指定分区,则默认备份该表的所有分区。 + 3. PROPERTIES 目前支持以下属性: + "type" = "full":表示这是一次全量更新(默认)。 + "timeout" = "3600":任务超时时间,默认为一天。单位秒。 + +## example + + 1. 全量备份 example_db 下的表 example_tbl 到仓库 example_repo 中: + BACKUP SNAPSHOT example_db.snapshot_label1 + TO example_repo + ON (example_tbl) + PROPERTIES ("type" = "full"); + + 2. 全量备份 example_db 下,表 example_tbl 的 p1, p2 分区,以及表 example_tbl2 到仓库 example_repo 中: + BACKUP SNAPSHOT example_db.snapshot_label2 + TO example_repo + ON + ( + example_tbl PARTITION (p1,p2), + example_tbl2 + ); + +## keyword + BACKUP + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CANCEL ALTER.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CANCEL ALTER.md new file mode 100644 index 00000000000000..93c17f921a3379 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CANCEL ALTER.md @@ -0,0 +1,32 @@ +# CANCEL ALTER +## description + 该语句用于撤销一个 ALTER 操作。 + 1. 撤销 ALTER TABLE COLUMN 操作 + 语法: + CANCEL ALTER TABLE COLUMN + FROM db_name.table_name + + 2. 撤销 ALTER TABLE ROLLUP 操作 + 语法: + CANCEL ALTER TABLE ROLLUP + FROM db_name.table_name + + 2. 撤销 ALTER CLUSTER 操作 + 语法: + (待实现...) + + +## example + [CANCEL ALTER TABLE COLUMN] + 1. 撤销针对 my_table 的 ALTER COLUMN 操作。 + CANCEL ALTER TABLE COLUMN + FROM example_db.my_table; + + [CANCEL ALTER TABLE ROLLUP] + 1. 撤销 my_table 下的 ADD ROLLUP 操作。 + CANCEL ALTER TABLE ROLLUP + FROM example_db.my_table; + +## keyword + CANCEL,ALTER,TABLE,COLUMN,ROLLUP + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CANCEL BACKUP.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CANCEL BACKUP.md new file mode 100644 index 00000000000000..b6451b99fe7931 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CANCEL BACKUP.md @@ -0,0 +1,13 @@ +# CANCEL BACKUP +## description + 该语句用于取消一个正在进行的 BACKUP 任务。 + 语法: + CANCEL BACKUP FROM db_name; + +## example + 1. 取消 example_db 下的 BACKUP 任务。 + CANCEL BACKUP FROM example_db; + +## keyword + CANCEL, BACKUP + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CANCEL RESTORE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CANCEL RESTORE.md new file mode 100644 index 00000000000000..b2b52bedbc3c7b --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CANCEL RESTORE.md @@ -0,0 +1,16 @@ +# CANCEL RESTORE +## description + 该语句用于取消一个正在进行的 RESTORE 任务。 + 语法: + CANCEL RESTORE FROM db_name; + + 注意: + 当取消处于 COMMIT 或之后阶段的恢复左右时,可能导致被恢复的表无法访问。此时只能通过再次执行恢复作业进行数据恢复。 + +## example + 1. 取消 example_db 下的 RESTORE 任务。 + CANCEL RESTORE FROM example_db; + +## keyword + CANCEL, RESTORE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE DATABASE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE DATABASE.md new file mode 100644 index 00000000000000..00938857f6ce70 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE DATABASE.md @@ -0,0 +1,13 @@ +# CREATE DATABASE +## description + 该语句用于新建数据库(database) + 语法: + CREATE DATABASE [IF NOT EXISTS] db_name; + +## example + 1. 新建数据库 db_test + CREATE DATABASE db_test; + +## keyword + CREATE,DATABASE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md new file mode 100644 index 00000000000000..68ca39141597d6 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md @@ -0,0 +1,50 @@ +# CREATE REPOSITORY +## description + 该语句用于创建仓库。仓库用于属于备份或恢复。仅 root 或 superuser 用户可以创建仓库。 + 语法: + CREATE [READ ONLY] REPOSITORY `repo_name` + WITH BROKER `broker_name` + ON LOCATION `repo_location` + PROPERTIES ("key"="value", ...); + + 说明: + 1. 仓库的创建,依赖于已存在的 broker + 2. 如果是只读仓库,则只能在仓库上进行恢复。如果不是,则可以进行备份和恢复操作。 + 3. 根据 broker 的不同类型,PROPERTIES 有所不同,具体见示例。 + +## example + 1. 创建名为 bos_repo 的仓库,依赖 BOS broker "bos_broker",数据根目录为:bos://palo_backup + CREATE REPOSITORY `bos_repo` + WITH BROKER `bos_broker` + ON LOCATION "bos://palo_backup" + PROPERTIES + ( + "bos_endpoint" = "http://gz.bcebos.com", + "bos_accesskey" = "069fc2786e664e63a5f111111114ddbs22", + "bos_secret_accesskey"="70999999999999de274d59eaa980a" + ); + + 2. 创建和示例 1 相同的仓库,但属性为只读: + CREATE READ ONLY REPOSITORY `bos_repo` + WITH BROKER `bos_broker` + ON LOCATION "bos://palo_backup" + PROPERTIES + ( + "bos_endpoint" = "http://gz.bcebos.com", + "bos_accesskey" = "069fc2786e664e63a5f111111114ddbs22", + "bos_secret_accesskey"="70999999999999de274d59eaa980a" + ); + + 3. 创建名为 hdfs_repo 的仓库,依赖 Baidu hdfs broker "hdfs_broker",数据根目录为:hdfs://hadoop-name-node:54310/path/to/repo/ + CREATE REPOSITORY `hdfs_repo` + WITH BROKER `hdfs_broker` + ON LOCATION "hdfs://hadoop-name-node:54310/path/to/repo/" + PROPERTIES + ( + "username" = "user", + "password" = "password" + ); + +## keyword + CREATE REPOSITORY + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE TABLE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE TABLE.md new file mode 100644 index 00000000000000..1af44e7ce4d756 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE TABLE.md @@ -0,0 +1,334 @@ +# CREATE TABLE +## description + 该语句用于创建 table。 + 语法: + CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [database.]table_name + (column_definition1[, column_definition2, ...]) + [ENGINE = [olap|mysql|broker]] + [key_desc] + [partition_desc] + [distribution_desc] + [PROPERTIES ("key"="value", ...)]; + [BROKER PROPERTIES ("key"="value", ...)]; + + 1. column_definition + 语法: + col_name col_type [agg_type] [NULL | NOT NULL] [DEFAULT "default_value"] + + 说明: + col_name:列名称 + col_type:列类型 + TINYINT(1字节) + 范围:-2^7 + 1 ~ 2^7 - 1 + SMALLINT(2字节) + 范围:-2^15 + 1 ~ 2^15 - 1 + INT(4字节) + 范围:-2^31 + 1 ~ 2^31 - 1 + BIGINT(8字节) + 范围:-2^63 + 1 ~ 2^63 - 1 + LARGEINT(16字节) + 范围:0 ~ 2^127 - 1 + FLOAT(4字节) + 支持科学计数法 + DOUBLE(12字节) + 支持科学计数法 + DECIMAL[(precision, scale)] (40字节) + 保证精度的小数类型。默认是 DECIMAL(10, 0) + precision: 1 ~ 27 + scale: 0 ~ 9 + 其中整数部分为 1 ~ 18 + 不支持科学计数法 + DATE(3字节) + 范围:1900-01-01 ~ 9999-12-31 + DATETIME(8字节) + 范围:1900-01-01 00:00:00 ~ 9999-12-31 23:59:59 + CHAR[(length)] + 定长字符串。长度范围:1 ~ 255。默认为1 + VARCHAR[(length)] + 变长字符串。长度范围:1 ~ 65533 + HLL (1~16385个字节) + hll列类型,不需要指定长度和默认值、长度根据数据的聚合 + 程度系统内控制,并且HLL列只能通过配套的hll_union_agg、Hll_cardinality、hll_hash进行查询或使用 + + agg_type:聚合类型,如果不指定,则该列为 key 列。否则,该列为 value 列 + SUM、MAX、MIN、REPLACE、HLL_UNION(仅用于HLL列,为HLL独有的聚合方式) + 该类型只对聚合模型(key_desc的type为AGGREGATE KEY)有用,其它模型不需要指定这个。 + + 是否允许为NULL: 默认不允许为 NULL。NULL 值在导入数据中用 \N 来表示 + + 2. ENGINE 类型 + 默认为 olap。可选 mysql, broker + 1) 如果是 mysql,则需要在 properties 提供以下信息: + + PROPERTIES ( + "host" = "mysql_server_host", + "port" = "mysql_server_port", + "user" = "your_user_name", + "password" = "your_password", + "database" = "database_name", + "table" = "table_name" + ) + + 注意: + "table" 条目中的 "table_name" 是 mysql 中的真实表名。 + 而 CREATE TABLE 语句中的 table_name 是该 mysql 表在 Palo 中的名字,可以不同。 + + 在 Palo 创建 mysql 表的目的是可以通过 Palo 访问 mysql 数据库。 + 而 Palo 本身并不维护、存储任何 mysql 数据。 + 2) 如果是 broker,表示表的访问需要通过指定的broker, 需要在 properties 提供以下信息: + PROPERTIES ( + "broker_name" = "broker_name", + "paths" = "file_path1[,file_path2]", + "column_separator" = "value_separator" + "line_delimiter" = "value_delimiter" + ) + 另外还需要提供Broker需要的Property信息,通过BROKER PROPERTIES来传递,例如HDFS需要传入 + BROKER PROPERTIES( + "username" = "name", + "password" = "password" + ) + 这个根据不同的Broker类型,需要传入的内容也不相同 + 注意: + "paths" 中如果有多个文件,用逗号[,]分割。如果文件名中包含逗号,那么使用 %2c 来替代。如果文件名中包含 %,使用 %25 代替 + 现在文件内容格式支持CSV,支持GZ,BZ2,LZ4,LZO(LZOP) 压缩格式。 + + 3. key_desc + 语法: + key_type(k1[,k2 ...]) + 说明: + 数据按照指定的key列进行排序,且根据不同的key_type具有不同特性。 + key_type支持一下类型: + AGGREGATE KEY:key列相同的记录,value列按照指定的聚合类型进行聚合, + 适合报表、多维分析等业务场景。 + UNIQUE KEY:key列相同的记录,value列按导入顺序进行覆盖, + 适合按key列进行增删改查的点查询业务。 + DUPLICATE KEY:key列相同的记录,同时存在于Palo中, + 适合存储明细数据或者数据无聚合特性的业务场景。 + 注意: + 除AGGREGATE KEY外,其他key_type在建表时,value列不需要指定聚合类型。 + + 4. partition_desc + 1) Range 分区 + 语法: + PARTITION BY RANGE (k1, k2, ...) + ( + PARTITION partition_name VALUES LESS THAN MAXVALUE|("value1", "value2", ...) + PARTITION partition_name VALUES LESS THAN MAXVALUE|("value1", "value2", ...) + ... + ) + 说明: + 使用指定的 key 列和指定的数值范围进行分区。 + 1) 分区名称仅支持字母开头,字母、数字和下划线组成 + 2) 目前仅支持以下类型的列作为 Range 分区列,且只能指定一个分区列 + TINYINT, SMALLINT, INT, BIGINT, LARGEINT, DATE, DATETIME + 3) 分区为左闭右开区间,首个分区的左边界为做最小值 + 4) NULL 值只会存放在包含最小值的分区中。当包含最小值的分区被删除后,NULL 值将无法导入。 + 5) 可以指定一列或多列作为分区列。如果分区值缺省,则会默认填充最小值。 + + 注意: + 1) 分区一般用于时间维度的数据管理 + 2) 有数据回溯需求的,可以考虑首个分区为空分区,以便后续增加分区 + + 5. distribution_desc + 1) Hash 分桶 + 语法: + DISTRIBUTED BY HASH (k1[,k2 ...]) [BUCKETS num] + 说明: + 使用指定的 key 列进行哈希分桶。默认分区数为10 + + 建议:建议使用Hash分桶方式 + + 6. PROPERTIES + 1) 如果 ENGINE 类型为 olap,则可以在 properties 中指定列存(目前我们仅支持列存) + + PROPERTIES ( + "storage_type" = "[column]", + ) + + 2) 如果 ENGINE 类型为 olap + 可以在 properties 设置该表数据的初始存储介质、存储到期时间和副本数。 + + PROPERTIES ( + "storage_medium" = "[SSD|HDD]", + ["storage_cooldown_time" = "yyyy-MM-dd HH:mm:ss"], + ["replication_num" = "3"] + ) + + storage_medium: 用于指定该分区的初始存储介质,可选择 SSD 或 HDD。默认为 HDD。 + storage_cooldown_time: 当设置存储介质为 SSD 时,指定该分区在 SSD 上的存储到期时间。 + 默认存放 7 天。 + 格式为:"yyyy-MM-dd HH:mm:ss" + replication_num: 指定分区的副本数。默认为 3 + + 当表为单分区表时,这些属性为表的属性。 + 当表为两级分区时,这些属性为附属于每一个分区。 + 如果希望不同分区有不同属性。可以通过 ADD PARTITION 或 MODIFY PARTITION 进行操作 + + 3) 如果 Engine 类型为 olap, 并且 storage_type 为 column, 可以指定某列使用 bloom filter 索引 + bloom filter 索引仅适用于查询条件为 in 和 equal 的情况,该列的值越分散效果越好 + 目前只支持以下情况的列:除了 TINYINT FLOAT DOUBLE 类型以外的 key 列及聚合方法为 REPLACE 的 value 列 + + PROPERTIES ( + "bloom_filter_columns"="k1,k2,k3" + ) + 4) 如果希望使用Colocate Join 特性,需要在 properties 中指定 + + PROPERTIES ( + "colocate_with"="table1" + ) + +## example + 1. 创建一个 olap 表,使用 HASH 分桶,使用列存,相同key的记录进行聚合 + CREATE TABLE example_db.table_hash + ( + k1 TINYINT, + k2 DECIMAL(10, 2) DEFAULT "10.5", + v1 CHAR(10) REPLACE, + v2 INT SUM + ) + ENGINE=olap + AGGREGATE KEY(k1, k2) + DISTRIBUTED BY HASH(k1) BUCKETS 32 + PROPERTIES ("storage_type"="column"); + + 2. 创建一个 olap 表,使用 Hash 分桶,使用列存,相同key的记录进行覆盖, + 设置初始存储介质和冷却时间 + CREATE TABLE example_db.table_hash + ( + k1 BIGINT, + k2 LARGEINT, + v1 VARCHAR(2048) REPLACE, + v2 SMALLINT SUM DEFAULT "10" + ) + ENGINE=olap + UNIQUE KEY(k1, k2) + DISTRIBUTED BY HASH (k1, k2) BUCKETS 32 + PROPERTIES( + "storage_type"="column", + "storage_medium" = "SSD", + "storage_cooldown_time" = "2015-06-04 00:00:00" + ); + + 3. 创建一个 olap 表,使用 Key Range 分区,使用Hash分桶,默认使用列存, + 相同key的记录同时存在,设置初始存储介质和冷却时间 + CREATE TABLE example_db.table_range + ( + k1 DATE, + k2 INT, + k3 SMALLINT, + v1 VARCHAR(2048), + v2 DATETIME DEFAULT "2014-02-04 15:36:00" + ) + ENGINE=olap + DUPLICATE KEY(k1, k2, k3) + PARTITION BY RANGE (k1) + ( + PARTITION p1 VALUES LESS THAN ("2014-01-01"), + PARTITION p2 VALUES LESS THAN ("2014-06-01"), + PARTITION p3 VALUES LESS THAN ("2014-12-01") + ) + DISTRIBUTED BY HASH(k2) BUCKETS 32 + PROPERTIES( + "storage_medium" = "SSD", "storage_cooldown_time" = "2015-06-04 00:00:00" + ); + + 说明: + 这个语句会将数据划分成如下3个分区: + ( { MIN }, {"2014-01-01"} ) + [ {"2014-01-01"}, {"2014-06-01"} ) + [ {"2014-06-01"}, {"2014-12-01"} ) + + 不在这些分区范围内的数据将视为非法数据被过滤 + + 4. 创建一个 mysql 表 + CREATE TABLE example_db.table_mysql + ( + k1 DATE, + k2 INT, + k3 SMALLINT, + k4 VARCHAR(2048), + k5 DATETIME + ) + ENGINE=mysql + PROPERTIES + ( + "host" = "127.0.0.1", + "port" = "8239", + "user" = "mysql_user", + "password" = "mysql_passwd", + "database" = "mysql_db_test", + "table" = "mysql_table_test" + ) + + 5. 创建一个数据文件存储在HDFS上的 broker 外部表, 数据使用 "|" 分割,"\n" 换行 + CREATE EXTERNAL TABLE example_db.table_broker ( + k1 DATE, + k2 INT, + k3 SMALLINT, + k4 VARCHAR(2048), + k5 DATETIME + ) + ENGINE=broker + PROPERTIES ( + "broker_name" = "hdfs", + "path" = "hdfs://hdfs_host:hdfs_port/data1,hdfs://hdfs_host:hdfs_port/data2,hdfs://hdfs_host:hdfs_port/data3%2c4", + "column_separator" = "|", + "line_delimiter" = "\n" + ) + BROKER PROPERTIES ( + "username" = "hdfs_user", + "password" = "hdfs_password" + ) + + 6. 创建一张含有HLL列的表 + CREATE TABLE example_db.example_table + ( + k1 TINYINT, + k2 DECIMAL(10, 2) DEFAULT "10.5", + v1 HLL HLL_UNION, + v2 HLL HLL_UNION + ) + ENGINE=olap + AGGREGATE KEY(k1, k2) + DISTRIBUTED BY HASH(k1) BUCKETS 32 + PROPERTIES ("storage_type"="column"); + + 7. 创建两张支持Colocat Join的表t1 和t2 + CREATE TABLE `t1` ( + `id` int(11) COMMENT "", + `value` varchar(8) COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 10 + PROPERTIES ( + "colocate_with" = "t1" + ); + + CREATE TABLE `t2` ( + `id` int(11) COMMENT "", + `value` varchar(8) COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 10 + PROPERTIES ( + "colocate_with" = "t1" + ); + + 8. 创建一个数据文件存储在BOS上的 broker 外部表 + CREATE EXTERNAL TABLE example_db.table_broker ( + k1 DATE + ) + ENGINE=broker + PROPERTIES ( + "broker_name" = "bos", + "path" = "bos://my_bucket/input/file", + ) + BROKER PROPERTIES ( + "bos_endpoint" = "http://bj.bcebos.com", + "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx", + "bos_secret_accesskey"="yyyyyyyyyyyyyyyyyyyy" + ) + +## keyword + CREATE,TABLE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE VIEW.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE VIEW.md new file mode 100644 index 00000000000000..a64e611f0b5c25 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/CREATE VIEW.md @@ -0,0 +1,22 @@ +# CREATE VIEW +## description + 该语句用于创建一个逻辑视图 + 语法: + CREATE VIEW [IF NOT EXISTS] + [db_name.]view_name (column1[, column2, ...]) + AS query_stmt + + 说明: + 1. 视图为逻辑视图,没有物理存储。所有在视图上的查询相当于在视图对应的子查询上进行。 + 2. query_stmt 为任意支持的 SQL + +## example + 1. 在 example_db 上创建视图 example_view + CREATE VIEW example_db.example_view (k1, k2, k3, v1) + AS + SELECT c1 as k1, k2, k3, SUM(v1) FROM example_table + WHERE k1 = 20160112 GROUP BY k1,k2,k3; + +## keyword + CREATE,VIEW + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/Colocate Join.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/Colocate Join.md new file mode 100644 index 00000000000000..9a8529ffb0ed76 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/Colocate Join.md @@ -0,0 +1,72 @@ +# Colocate Join +## description + Colocate/Local Join 就是指多个节点Join时没有数据移动和网络传输,每个节点只在本地进行Join, + 能够本地进行Join的前提是相同Join Key的数据导入时按照相同规则导入到固定的节点。 + + 1 How To Use: + + 只需要在建表时增加 colocate_with 这个属性即可,colocate_with的值 可以设置成同一组colocate 表中的任意一个, + 不过需要保证colocate_with属性中的表要先建立。 + + 假如需要对table t1 和t2 进行Colocate Join,可以按以下语句建表: + + CREATE TABLE `t1` ( + `id` int(11) COMMENT "", + `value` varchar(8) COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 10 + PROPERTIES ( + "colocate_with" = "t1" + ); + + CREATE TABLE `t2` ( + `id` int(11) COMMENT "", + `value` varchar(8) COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 10 + PROPERTIES ( + "colocate_with" = "t1" + ); + + 2 Colocate Join 目前的限制: + + 1. Colcoate Table 必须是OLAP类型的表 + 2. 相同colocate_with 属性的表的 BUCKET 数必须一样 + 3. 相同colocate_with 属性的表的 副本数必须一样 + 4. 相同colocate_with 属性的表的 DISTRIBUTED Columns的数据类型必须一样 + + 3 Colocate Join的适用场景: + + Colocate Join 十分适合几张表按照相同字段分桶,并高频根据相同字段Join的场景。 + + 4 FAQ: + + Q: 支持多张表进行Colocate Join 吗? + + A: 支持 + + Q: 支持Colocate 表和正常表 Join 吗? + + A: 支持 + + Q: Colocate 表支持用非分桶的Key进行Join吗? + + A: 支持:不符合Colocate Join条件的Join会使用Shuffle Join或Broadcast Join + + Q: 如何确定Join 是按照Colocate Join 执行的? + + A: explain的结果中Hash Join的孩子节点如果直接是OlapScanNode, 没有Exchange Node,就说明是Colocate Join + + Q: 如何修改colocate_with 属性? + + A: ALTER TABLE example_db.my_table set ("colocate_with"="target_table"); + + Q: 如何禁用colcoate join? + + A: set disable_colocate_join = true; 就可以禁用Colocate Join,查询时就会使用Shuffle Join 和Broadcast Join + +## keyword + + COLOCATE, JOIN, CREATE TABLE diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/DROP DATABASE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/DROP DATABASE.md new file mode 100644 index 00000000000000..7ad5ba06bdddb2 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/DROP DATABASE.md @@ -0,0 +1,16 @@ +# DROP DATABASE +## description + 该语句用于删除数据库(database) + 语法: + DROP DATABASE [IF EXISTS] db_name; + + 说明: + 执行 DROP DATABASE 一段时间内,可以通过 RECOVER 语句恢复被删除的 database。详见 RECOVER 语句 + +## example + 1. 删除数据库 db_test + DROP DATABASE db_test; + +## keyword + DROP,DATABASE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/DROP REPOSITORY.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/DROP REPOSITORY.md new file mode 100644 index 00000000000000..1ecdc6a30a65ce --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/DROP REPOSITORY.md @@ -0,0 +1,16 @@ +# DROP REPOSITORY +## description + 该语句用于删除一个已创建的仓库。仅 root 或 superuser 用户可以删除仓库。 + 语法: + DROP REPOSITORY `repo_name`; + + 说明: + 1. 删除仓库,仅仅是删除该仓库在 Palo 中的映射,不会删除实际的仓库数据。删除后,可以再次通过指定相同的 broker 和 LOCATION 映射到该仓库。 + +## example + 1. 删除名为 bos_repo 的仓库: + DROP REPOSITORY `bos_repo`; + +## keyword + DROP REPOSITORY + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/DROP TABLE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/DROP TABLE.md new file mode 100644 index 00000000000000..9f1473b9a2fef5 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/DROP TABLE.md @@ -0,0 +1,19 @@ +# DROP TABLE +## description + 该语句用于删除 table 。 + 语法: + DROP TABLE [IF EXISTS] [db_name.]table_name; + + 说明: + 执行 DROP TABLE 一段时间内,可以通过 RECOVER 语句恢复被删除的 table。详见 RECOVER 语句 + +## example + 1. 删除一个 table + DROP TABLE my_table; + + 2. 如果存在,删除指定 database 的 table + DROP TABLE IF EXISTS example_db.my_table; + +## keyword + DROP,TABLE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/DROP VIEW.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/DROP VIEW.md new file mode 100644 index 00000000000000..e2feb735b4c5ca --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/DROP VIEW.md @@ -0,0 +1,14 @@ +# DROP VIEW +## description + 该语句用于删除一个逻辑视图 VIEW + 语法: + DROP VIEW [IF EXISTS] + [db_name.]view_name; + +## example + 1. 如果存在,删除 example_db 上的视图 example_view + DROP VIEW IF EXISTS example_db.example_view; + +## keyword + DROP,VIEW + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/HLL.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/HLL.md new file mode 100644 index 00000000000000..16b40d7fbb444e --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/HLL.md @@ -0,0 +1,80 @@ +# HLL +## description + HLL是基于HyperLogLog算法的工程实现,用于保存HyperLogLog计算过程的中间结果,它只能作为表的value列类型 + 通过聚合来不断的减少数据量,以此来实现加快查询的目的,基于它到的是一个估算结果,误差大概在1%左右 + hll列是通过其它列或者导入数据里面的数据生成的,导入的时候通过hll_hash函数来指定数据中哪一列用于生成hll列 + 它常用于替代count distinct,通过结合rollup在业务上用于快速计算uv等 + + 相关函数: + + HLL_UNION_AGG(hll) + 此函数为聚合函数,用于计算满足条件的所有数据的基数估算。此函数还可用于分析函数,只支持默认窗口,不支持window从句。 + + HLL_RAW_AGG(hll) + 此函数为聚合函数,用于聚合hll类型字段,并且返回的还是hll类型。 + + HLL_CARDINALITY(hll) + 此函数用于计算单条hll列的基数估算 + + HLL_HASH(column_name) + 生成HLL列类型,用于insert或导入的时候,导入的使用见相关说明 + +## example + 1. 首先创建一张含有hll列的表 + create table test( + dt date, + id int, + name char(10), + province char(10), + os char(1), + set1 hll hll_union, + set2 hll hll_union) + distributed by hash(id) buckets 32; + + 2. 导入数据,导入的方式见相关help curl + + a. 使用表中的列生成hll列 + curl --location-trusted -uname:password -T data http://host/api/test_db/test/_load?label=load_1\&hll=set1,id:set2,name + + b. 使用数据中的某一列生成hll列 + curl --location-trusted -uname:password -T data http://host/api/test_db/test/_load?label=load_1\&hll=set1,cuid:set2,os + \&columns=dt,id,name,province,sex,cuid,os + + 3. 聚合数据,常用方式3种:(如果不聚合直接对base表查询,速度可能跟直接使用ndv速度差不多) + + a. 创建一个rollup,让hll列产生聚合, + alter table test add rollup test_rollup(dt, set1); + + b. 创建另外一张专门计算uv的表,然后insert数据) + + create table test_uv( + dt date, + uv_set hll hll_union) + distributed by hash(id) buckets 32; + + insert into test_uv select dt, set1 from test; + + c. 创建另外一张专门计算uv的表,然后insert并通过hll_hash根据test其它非hll列生成hll列 + + create table test_uv( + dt date, + id_set hll hll_union) + distributed by hash(id) buckets 32; + + insert into test_uv select dt, hll_hash(id) from test; + + 4. 查询,hll列不允许直接查询它的原始值,可以通过配套的函数进行查询 + + a. 求总uv + select HLL_UNION_AGG(uv_set) from test_uv; + + b. 求每一天的uv + select dt, HLL_CARDINALITY(uv_set) from test_uv; + + c. 求test表中set1的聚合值 + select dt, HLL_CARDINALITY(uv) from (select dt, HLL_RAW_AGG(set1) as uv from test group by dt) tmp; + select dt, HLL_UNION_AGG(set1) as uv from test group by dt; + +## keyword + HLL + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/RECOVER.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/RECOVER.md new file mode 100644 index 00000000000000..73b52ae2942516 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/RECOVER.md @@ -0,0 +1,28 @@ +# RECOVER +## description + 该语句用于恢复之前删除的 database、table 或者 partition + 语法: + 1) 恢复 database + RECOVER DATABASE db_name; + 2) 恢复 table + RECOVER TABLE [db_name.]table_name; + 3) 恢复 partition + RECOVER PARTITION partition_name FROM [db_name.]table_name; + + 说明: + 1. 该操作仅能恢复之前一段时间内删除的元信息。默认为 3600 秒。 + 2. 如果删除元信息后新建立了同名同类型的元信息,则之前删除的元信息不能被恢复 + +## example + 1. 恢复名为 example_db 的 database + RECOVER DATABASE example_db; + + 2. 恢复名为 example_tbl 的 table + RECOVER TABLE example_db.example_tbl; + + 3. 恢复表 example_tbl 中名为 p1 的 partition + RECOVER PARTITION p1 FROM example_tbl; + +## keyword + RECOVER + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/RESTORE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/RESTORE.md new file mode 100644 index 00000000000000..0f6f71e3a52319 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/RESTORE.md @@ -0,0 +1,52 @@ +# RESTORE +## description + 1. RESTORE + 该语句用于将之前通过 BACKUP 命令备份的数据,恢复到指定数据库下。该命令为异步操作。提交成功后,需通过 SHOW RESTORE 命令查看进度。仅支持恢复 OLAP 类型的表。 + 语法: + RESTORE SNAPSHOT [db_name].{snapshot_name} + FROM `repository_name` + ON ( + `table_name` [PARTITION (`p1`, ...)] [AS `tbl_alias`], + ... + ) + PROPERTIES ("key"="value", ...); + + 说明: + 1. 同一数据库下只能有一个正在执行的 BACKUP 或 RESTORE 任务。 + 2. ON 子句中标识需要恢复的表和分区。如果不指定分区,则默认恢复该表的所有分区。所指定的表和分区必须已存在于仓库备份中。 + 3. 可以通过 AS 语句将仓库中备份的表名恢复为新的表。但新表名不能已存在于数据库中。分区名称不能修改。 + 4. 可以将仓库中备份的表恢复替换数据库中已有的同名表,但须保证两张表的表结构完全一致。表结构包括:表名、列、分区、Rollup等等。 + 5. 可以指定恢复表的部分分区,系统会检查分区 Range 是否能够匹配。 + 6. PROPERTIES 目前支持以下属性: + "backup_timestamp" = "2018-05-04-16-45-08":指定了恢复对应备份的哪个时间版本,必填。该信息可以通过 `SHOW SNAPSHOT ON repo;` 语句获得。 + "replication_num" = "3":指定恢复的表或分区的副本数。默认为3。若恢复已存在的表或分区,则副本数必须和已存在表或分区的副本数相同。同时,必须有足够的 host 容纳多个副本。 + "timeout" = "3600":任务超时时间,默认为一天。单位秒。 + "meta_version" = 40:使用指定的 meta_version 来读取之前备份的元数据。注意,该参数作为临时方案,仅用于恢复老版本 Doris 备份的数据。最新版本的备份数据中已经包含 meta version,无需再指定。 + +## example + 1. 从 example_repo 中恢复备份 snapshot_1 中的表 backup_tbl 到数据库 example_db1,时间版本为 "2018-05-04-16-45-08"。恢复为 1 个副本: + RESTORE SNAPSHOT example_db1.`snapshot_1` + FROM `example_repo` + ON ( `backup_tbl` ) + PROPERTIES + ( + "backup_timestamp"="2018-05-04-16-45-08", + "replication_num" = "1" + ); + + 2. 从 example_repo 中恢复备份 snapshot_2 中的表 backup_tbl 的分区 p1,p2,以及表 backup_tbl2 到数据库 example_db1,并重命名为 new_tbl,时间版本为 "2018-05-04-17-11-01"。默认恢复为 3 个副本: + RESTORE SNAPSHOT example_db1.`snapshot_2` + FROM `example_repo` + ON + ( + `backup_tbl` PARTITION (`p1`, `p2`), + `backup_tbl2` AS `new_tbl` + ) + PROPERTIES + ( + "backup_timestamp"="2018-05-04-17-11-01" + ); + +## keyword + RESTORE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE.md new file mode 100644 index 00000000000000..28c68c5bc24675 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE.md @@ -0,0 +1,27 @@ +# TRUNCATE TABLE +## description + 该语句用于清空指定表和分区的数据 + 语法: + + TRUNCATE TABLE [db.]tbl[ PARTITION(p1, p2, ...)]; + + 说明: + 1. 该语句清空数据,但保留表或分区。 + 2. 不同于 DELETE,该语句只能整体清空指定的表或分区,不能添加过滤条件。 + 3. 不同于 DELETE,使用该方式清空数据不会对查询性能造成影响。 + 4. 该操作删除的数据不可恢复。 + 5. 使用该命令时,表状态需为 NORMAL,即不允许正在进行 SCHEMA CHANGE 等操作。 + +## example + + 1. 清空 example_db 下的表 tbl + + TRUNCATE TABLE example_db.tbl; + + 2. 清空表 tbl 的 p1 和 p2 分区 + + TRUNCATE TABLE tbl PARTITION(p1, p2); + +## keyword + TRUNCATE,TABLE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/create-function.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/create-function.md index b2dbd9954f4b71..f2bf7953928c04 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/create-function.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/create-function.md @@ -1,6 +1,11 @@ # CREATE FUNCTION +## description -## Syntax +此语句创建一个自定义函数。执行此命令需要用户拥有 `ADMIN` 权限。 + +如果 `function_name` 中包含了数据库名字,那么这个自定义函数会创建在对应的数据库中,否则这个函数将会创建在当前会话所在的数据库。新函数的名字与参数不能够与当前命名空间中已存在的函数相同,否则会创建失败。但是只有名字相同,参数不同是能够创建成功的。 + + Syntax ``` CREATE [AGGREGATE] FUNCTION function_name @@ -10,13 +15,7 @@ CREATE [AGGREGATE] FUNCTION function_name [PROPERTIES ("key" = "value" [, ...]) ] ``` -## Description - -此语句创建一个自定义函数。执行此命令需要用户拥有 `ADMIN` 权限。 - -如果 `function_name` 中包含了数据库名字,那么这个自定义函数会创建在对应的数据库中,否则这个函数将会创建在当前会话所在的数据库。新函数的名字与参数不能够与当前命名空间中已存在的函数相同,否则会创建失败。但是只有名字相同,参数不同是能够创建成功的。 - -## Parameters + Parameters > `AGGREGATE`: 如果有此项,表示的是创建的函数是一个聚合函数,否则创建的是一个标量函数。 > @@ -46,7 +45,7 @@ CREATE [AGGREGATE] FUNCTION function_name > > "md5": 函数动态链接库的MD5值,用于校验下载的内容是否正确。此选项是可选项 -## Examples +## example 1. 创建一个自定义标量函数 @@ -68,3 +67,5 @@ CREATE AGGREGATE FUNCTION my_count (BIGINT) RETURNS BIGINT PROPERTIES ( "object_file"="http://host:port/libudasample.so" ); ``` +##keyword +CREATE,FUNCTION,CREATE,FUNCTION diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/ddl_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/ddl_stmt.md deleted file mode 100644 index b0bb79ecd0d7cb..00000000000000 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/ddl_stmt.md +++ /dev/null @@ -1,1112 +0,0 @@ -# CREATE DATABASE -## description - 该语句用于新建数据库(database) - 语法: - CREATE DATABASE [IF NOT EXISTS] db_name; - -## example - 1. 新建数据库 db_test - CREATE DATABASE db_test; - -## keyword - CREATE,DATABASE - -# DROP DATABASE -## description - 该语句用于删除数据库(database) - 语法: - DROP DATABASE [IF EXISTS] db_name; - - 说明: - 执行 DROP DATABASE 一段时间内,可以通过 RECOVER 语句恢复被删除的 database。详见 RECOVER 语句 - -## example - 1. 删除数据库 db_test - DROP DATABASE db_test; - -## keyword - DROP,DATABASE - -# CREATE TABLE -## description - 该语句用于创建 table。 - 语法: - CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [database.]table_name - (column_definition1[, column_definition2, ...]) - [ENGINE = [olap|mysql|broker]] - [key_desc] - [partition_desc] - [distribution_desc] - [PROPERTIES ("key"="value", ...)]; - [BROKER PROPERTIES ("key"="value", ...)]; - - 1. column_definition - 语法: - col_name col_type [agg_type] [NULL | NOT NULL] [DEFAULT "default_value"] - - 说明: - col_name:列名称 - col_type:列类型 - TINYINT(1字节) - 范围:-2^7 + 1 ~ 2^7 - 1 - SMALLINT(2字节) - 范围:-2^15 + 1 ~ 2^15 - 1 - INT(4字节) - 范围:-2^31 + 1 ~ 2^31 - 1 - BIGINT(8字节) - 范围:-2^63 + 1 ~ 2^63 - 1 - LARGEINT(16字节) - 范围:0 ~ 2^127 - 1 - FLOAT(4字节) - 支持科学计数法 - DOUBLE(12字节) - 支持科学计数法 - DECIMAL[(precision, scale)] (40字节) - 保证精度的小数类型。默认是 DECIMAL(10, 0) - precision: 1 ~ 27 - scale: 0 ~ 9 - 其中整数部分为 1 ~ 18 - 不支持科学计数法 - DATE(3字节) - 范围:1900-01-01 ~ 9999-12-31 - DATETIME(8字节) - 范围:1900-01-01 00:00:00 ~ 9999-12-31 23:59:59 - CHAR[(length)] - 定长字符串。长度范围:1 ~ 255。默认为1 - VARCHAR[(length)] - 变长字符串。长度范围:1 ~ 65533 - HLL (1~16385个字节) - hll列类型,不需要指定长度和默认值、长度根据数据的聚合 - 程度系统内控制,并且HLL列只能通过配套的hll_union_agg、Hll_cardinality、hll_hash进行查询或使用 - - agg_type:聚合类型,如果不指定,则该列为 key 列。否则,该列为 value 列 - SUM、MAX、MIN、REPLACE、HLL_UNION(仅用于HLL列,为HLL独有的聚合方式) - 该类型只对聚合模型(key_desc的type为AGGREGATE KEY)有用,其它模型不需要指定这个。 - - 是否允许为NULL: 默认不允许为 NULL。NULL 值在导入数据中用 \N 来表示 - - 2. ENGINE 类型 - 默认为 olap。可选 mysql, broker - 1) 如果是 mysql,则需要在 properties 提供以下信息: - - PROPERTIES ( - "host" = "mysql_server_host", - "port" = "mysql_server_port", - "user" = "your_user_name", - "password" = "your_password", - "database" = "database_name", - "table" = "table_name" - ) - - 注意: - "table" 条目中的 "table_name" 是 mysql 中的真实表名。 - 而 CREATE TABLE 语句中的 table_name 是该 mysql 表在 Palo 中的名字,可以不同。 - - 在 Palo 创建 mysql 表的目的是可以通过 Palo 访问 mysql 数据库。 - 而 Palo 本身并不维护、存储任何 mysql 数据。 - 2) 如果是 broker,表示表的访问需要通过指定的broker, 需要在 properties 提供以下信息: - PROPERTIES ( - "broker_name" = "broker_name", - "paths" = "file_path1[,file_path2]", - "column_separator" = "value_separator" - "line_delimiter" = "value_delimiter" - ) - 另外还需要提供Broker需要的Property信息,通过BROKER PROPERTIES来传递,例如HDFS需要传入 - BROKER PROPERTIES( - "username" = "name", - "password" = "password" - ) - 这个根据不同的Broker类型,需要传入的内容也不相同 - 注意: - "paths" 中如果有多个文件,用逗号[,]分割。如果文件名中包含逗号,那么使用 %2c 来替代。如果文件名中包含 %,使用 %25 代替 - 现在文件内容格式支持CSV,支持GZ,BZ2,LZ4,LZO(LZOP) 压缩格式。 - - 3. key_desc - 语法: - key_type(k1[,k2 ...]) - 说明: - 数据按照指定的key列进行排序,且根据不同的key_type具有不同特性。 - key_type支持一下类型: - AGGREGATE KEY:key列相同的记录,value列按照指定的聚合类型进行聚合, - 适合报表、多维分析等业务场景。 - UNIQUE KEY:key列相同的记录,value列按导入顺序进行覆盖, - 适合按key列进行增删改查的点查询业务。 - DUPLICATE KEY:key列相同的记录,同时存在于Palo中, - 适合存储明细数据或者数据无聚合特性的业务场景。 - 注意: - 除AGGREGATE KEY外,其他key_type在建表时,value列不需要指定聚合类型。 - - 4. partition_desc - 1) Range 分区 - 语法: - PARTITION BY RANGE (k1, k2, ...) - ( - PARTITION partition_name VALUES LESS THAN MAXVALUE|("value1", "value2", ...) - PARTITION partition_name VALUES LESS THAN MAXVALUE|("value1", "value2", ...) - ... - ) - 说明: - 使用指定的 key 列和指定的数值范围进行分区。 - 1) 分区名称仅支持字母开头,字母、数字和下划线组成 - 2) 目前仅支持以下类型的列作为 Range 分区列,且只能指定一个分区列 - TINYINT, SMALLINT, INT, BIGINT, LARGEINT, DATE, DATETIME - 3) 分区为左闭右开区间,首个分区的左边界为做最小值 - 4) NULL 值只会存放在包含最小值的分区中。当包含最小值的分区被删除后,NULL 值将无法导入。 - 5) 可以指定一列或多列作为分区列。如果分区值缺省,则会默认填充最小值。 - - 注意: - 1) 分区一般用于时间维度的数据管理 - 2) 有数据回溯需求的,可以考虑首个分区为空分区,以便后续增加分区 - - 5. distribution_desc - 1) Hash 分桶 - 语法: - DISTRIBUTED BY HASH (k1[,k2 ...]) [BUCKETS num] - 说明: - 使用指定的 key 列进行哈希分桶。默认分区数为10 - - 建议:建议使用Hash分桶方式 - - 6. PROPERTIES - 1) 如果 ENGINE 类型为 olap,则可以在 properties 中指定列存(目前我们仅支持列存) - - PROPERTIES ( - "storage_type" = "[column]", - ) - - 2) 如果 ENGINE 类型为 olap - 可以在 properties 设置该表数据的初始存储介质、存储到期时间和副本数。 - - PROPERTIES ( - "storage_medium" = "[SSD|HDD]", - ["storage_cooldown_time" = "yyyy-MM-dd HH:mm:ss"], - ["replication_num" = "3"] - ) - - storage_medium: 用于指定该分区的初始存储介质,可选择 SSD 或 HDD。默认为 HDD。 - storage_cooldown_time: 当设置存储介质为 SSD 时,指定该分区在 SSD 上的存储到期时间。 - 默认存放 7 天。 - 格式为:"yyyy-MM-dd HH:mm:ss" - replication_num: 指定分区的副本数。默认为 3 - - 当表为单分区表时,这些属性为表的属性。 - 当表为两级分区时,这些属性为附属于每一个分区。 - 如果希望不同分区有不同属性。可以通过 ADD PARTITION 或 MODIFY PARTITION 进行操作 - - 3) 如果 Engine 类型为 olap, 并且 storage_type 为 column, 可以指定某列使用 bloom filter 索引 - bloom filter 索引仅适用于查询条件为 in 和 equal 的情况,该列的值越分散效果越好 - 目前只支持以下情况的列:除了 TINYINT FLOAT DOUBLE 类型以外的 key 列及聚合方法为 REPLACE 的 value 列 - - PROPERTIES ( - "bloom_filter_columns"="k1,k2,k3" - ) - 4) 如果希望使用Colocate Join 特性,需要在 properties 中指定 - - PROPERTIES ( - "colocate_with"="table1" - ) - -## example - 1. 创建一个 olap 表,使用 HASH 分桶,使用列存,相同key的记录进行聚合 - CREATE TABLE example_db.table_hash - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 CHAR(10) REPLACE, - v2 INT SUM - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - DISTRIBUTED BY HASH(k1) BUCKETS 32 - PROPERTIES ("storage_type"="column"); - - 2. 创建一个 olap 表,使用 Hash 分桶,使用列存,相同key的记录进行覆盖, - 设置初始存储介质和冷却时间 - CREATE TABLE example_db.table_hash - ( - k1 BIGINT, - k2 LARGEINT, - v1 VARCHAR(2048) REPLACE, - v2 SMALLINT SUM DEFAULT "10" - ) - ENGINE=olap - UNIQUE KEY(k1, k2) - DISTRIBUTED BY HASH (k1, k2) BUCKETS 32 - PROPERTIES( - "storage_type"="column", - "storage_medium" = "SSD", - "storage_cooldown_time" = "2015-06-04 00:00:00" - ); - - 3. 创建一个 olap 表,使用 Key Range 分区,使用Hash分桶,默认使用列存, - 相同key的记录同时存在,设置初始存储介质和冷却时间 - CREATE TABLE example_db.table_range - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - ENGINE=olap - DUPLICATE KEY(k1, k2, k3) - PARTITION BY RANGE (k1) - ( - PARTITION p1 VALUES LESS THAN ("2014-01-01"), - PARTITION p2 VALUES LESS THAN ("2014-06-01"), - PARTITION p3 VALUES LESS THAN ("2014-12-01") - ) - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD", "storage_cooldown_time" = "2015-06-04 00:00:00" - ); - - 说明: - 这个语句会将数据划分成如下3个分区: - ( { MIN }, {"2014-01-01"} ) - [ {"2014-01-01"}, {"2014-06-01"} ) - [ {"2014-06-01"}, {"2014-12-01"} ) - - 不在这些分区范围内的数据将视为非法数据被过滤 - - 4. 创建一个 mysql 表 - CREATE TABLE example_db.table_mysql - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - k4 VARCHAR(2048), - k5 DATETIME - ) - ENGINE=mysql - PROPERTIES - ( - "host" = "127.0.0.1", - "port" = "8239", - "user" = "mysql_user", - "password" = "mysql_passwd", - "database" = "mysql_db_test", - "table" = "mysql_table_test" - ) - - 5. 创建一个数据文件存储在HDFS上的 broker 外部表, 数据使用 "|" 分割,"\n" 换行 - CREATE EXTERNAL TABLE example_db.table_broker ( - k1 DATE, - k2 INT, - k3 SMALLINT, - k4 VARCHAR(2048), - k5 DATETIME - ) - ENGINE=broker - PROPERTIES ( - "broker_name" = "hdfs", - "path" = "hdfs://hdfs_host:hdfs_port/data1,hdfs://hdfs_host:hdfs_port/data2,hdfs://hdfs_host:hdfs_port/data3%2c4", - "column_separator" = "|", - "line_delimiter" = "\n" - ) - BROKER PROPERTIES ( - "username" = "hdfs_user", - "password" = "hdfs_password" - ) - - 6. 创建一张含有HLL列的表 - CREATE TABLE example_db.example_table - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 HLL HLL_UNION, - v2 HLL HLL_UNION - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - DISTRIBUTED BY HASH(k1) BUCKETS 32 - PROPERTIES ("storage_type"="column"); - - 7. 创建两张支持Colocat Join的表t1 和t2 - CREATE TABLE `t1` ( - `id` int(11) COMMENT "", - `value` varchar(8) COMMENT "" - ) ENGINE=OLAP - DUPLICATE KEY(`id`) - DISTRIBUTED BY HASH(`id`) BUCKETS 10 - PROPERTIES ( - "colocate_with" = "t1" - ); - - CREATE TABLE `t2` ( - `id` int(11) COMMENT "", - `value` varchar(8) COMMENT "" - ) ENGINE=OLAP - DUPLICATE KEY(`id`) - DISTRIBUTED BY HASH(`id`) BUCKETS 10 - PROPERTIES ( - "colocate_with" = "t1" - ); - - 8. 创建一个数据文件存储在BOS上的 broker 外部表 - CREATE EXTERNAL TABLE example_db.table_broker ( - k1 DATE - ) - ENGINE=broker - PROPERTIES ( - "broker_name" = "bos", - "path" = "bos://my_bucket/input/file", - ) - BROKER PROPERTIES ( - "bos_endpoint" = "http://bj.bcebos.com", - "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx", - "bos_secret_accesskey"="yyyyyyyyyyyyyyyyyyyy" - ) - -## keyword - CREATE,TABLE - -# DROP TABLE -## description - 该语句用于删除 table 。 - 语法: - DROP TABLE [IF EXISTS] [db_name.]table_name; - - 说明: - 执行 DROP TABLE 一段时间内,可以通过 RECOVER 语句恢复被删除的 table。详见 RECOVER 语句 - -## example - 1. 删除一个 table - DROP TABLE my_table; - - 2. 如果存在,删除指定 database 的 table - DROP TABLE IF EXISTS example_db.my_table; - -## keyword - DROP,TABLE - -# ALTER TABLE -## description - 该语句用于对已有的 table 进行修改。如果没有指定 rollup index,默认操作 base index。 - 该语句分为三种操作类型: schema change 、rollup 、partition - 这三种操作类型不能同时出现在一条 ALTER TABLE 语句中。 - 其中 schema change 和 rollup 是异步操作,任务提交成功则返回。之后可使用 SHOW ALTER 命令查看进度。 - partition 是同步操作,命令返回表示执行完毕。 - - 语法: - ALTER TABLE [database.]table - alter_clause1[, alter_clause2, ...]; - - alter_clause 分为 partition 、rollup、schema change 和 rename 四种。 - - partition 支持如下几种修改方式 - 1. 增加分区 - 语法: - ADD PARTITION [IF NOT EXISTS] partition_name VALUES LESS THAN [MAXVALUE|("value1")] ["key"="value"] - [DISTRIBUTED BY HASH (k1[,k2 ...]) [BUCKETS num]] - 注意: - 1) 分区为左闭右开区间,用户指定右边界,系统自动确定左边界 - 2) 如果没有指定分桶方式,则自动使用建表使用的分桶方式 - 3) 如指定分桶方式,只能修改分桶数,不可修改分桶方式或分桶列 - 4) ["key"="value"] 部分可以设置分区的一些属性,具体说明见 CREATE TABLE - - 2. 删除分区 - 语法: - DROP PARTITION [IF EXISTS] partition_name - 注意: - 1) 使用分区方式的表至少要保留一个分区。 - 2) 执行 DROP PARTITION 一段时间内,可以通过 RECOVER 语句恢复被删除的 partition。详见 RECOVER 语句 - - 3. 修改分区属性 - 语法: - MODIFY PARTITION partition_name SET ("key" = "value", ...) - 说明: - 1) 当前支持修改分区的 storage_medium、storage_cooldown_time 和 replication_num 三个属性。 - 2) 对于单分区表,partition_name 同表名。 - - rollup 支持如下几种创建方式: - 1. 创建 rollup index - 语法: - ADD ROLLUP rollup_name (column_name1, column_name2, ...) - [FROM from_index_name] - [PROPERTIES ("key"="value", ...)] - 注意: - 1) 如果没有指定 from_index_name,则默认从 base index 创建 - 2) rollup 表中的列必须是 from_index 中已有的列 - 3) 在 properties 中,可以指定存储格式。具体请参阅 CREATE TABLE - - 2. 删除 rollup index - 语法: - DROP ROLLUP rollup_name - [PROPERTIES ("key"="value", ...)] - 注意: - 1) 不能删除 base index - 2) 执行 DROP ROLLUP 一段时间内,可以通过 RECOVER 语句恢复被删除的 rollup index。详见 RECOVER 语句 - - - schema change 支持如下几种修改方式: - 1. 向指定 index 的指定位置添加一列 - 语法: - ADD COLUMN column_name column_type [KEY | agg_type] [DEFAULT "default_value"] - [AFTER column_name|FIRST] - [TO rollup_index_name] - [PROPERTIES ("key"="value", ...)] - 注意: - 1) 聚合模型如果增加 value 列,需要指定 agg_type - 2) 非聚合模型如果增加key列,需要指定KEY关键字 - 3) 不能在 rollup index 中增加 base index 中已经存在的列 - 如有需要,可以重新创建一个 rollup index) - - 2. 向指定 index 添加多列 - 语法: - ADD COLUMN (column_name1 column_type [KEY | agg_type] DEFAULT "default_value", ...) - [TO rollup_index_name] - [PROPERTIES ("key"="value", ...)] - 注意: - 1) 聚合模型如果增加 value 列,需要指定agg_type - 2) 非聚合模型如果增加key列,需要指定KEY关键字 - 3) 不能在 rollup index 中增加 base index 中已经存在的列 - (如有需要,可以重新创建一个 rollup index) - - 3. 从指定 index 中删除一列 - 语法: - DROP COLUMN column_name - [FROM rollup_index_name] - 注意: - 1) 不能删除分区列 - 2) 如果是从 base index 中删除列,则如果 rollup index 中包含该列,也会被删除 - - 4. 修改指定 index 的列类型以及列位置 - 语法: - MODIFY COLUMN column_name column_type [KEY | agg_type] [NULL | NOT NULL] [DEFAULT "default_value"] - [AFTER column_name|FIRST] - [FROM rollup_index_name] - [PROPERTIES ("key"="value", ...)] - 注意: - 1) 聚合模型如果修改 value 列,需要指定 agg_type - 2) 非聚合类型如果修改key列,需要指定KEY关键字 - 3) 只能修改列的类型,列的其他属性维持原样(即其他属性需在语句中按照原属性显式的写出,参见 example 8) - 4) 分区列不能做任何修改 - 5) 目前支持以下类型的转换(精度损失由用户保证) - TINYINT/SMALLINT/INT/BIGINT 转换成 TINYINT/SMALLINT/INT/BIGINT/DOUBLE。 - LARGEINT 转换成 DOUBLE - VARCHAR 支持修改最大长度 - 6) 不支持从NULL转为NOT NULL - - 5. 对指定 index 的列进行重新排序 - 语法: - ORDER BY (column_name1, column_name2, ...) - [FROM rollup_index_name] - [PROPERTIES ("key"="value", ...)] - 注意: - 1) index 中的所有列都要写出来 - 2) value 列在 key 列之后 - - 6. 修改table的属性,目前支持修改bloom filter列和colocate_with 属性 - 语法: - PROPERTIES ("key"="value") - 注意: - 也可以合并到上面的schema change操作中来修改,见下面例子 - - - rename 支持对以下名称进行修改: - 1. 修改表名 - 语法: - RENAME new_table_name; - - 2. 修改 rollup index 名称 - 语法: - RENAME ROLLUP old_rollup_name new_rollup_name; - - 3. 修改 partition 名称 - 语法: - RENAME PARTITION old_partition_name new_partition_name; - -## example - [partition] - 1. 增加分区, 现有分区 [MIN, 2013-01-01),增加分区 [2013-01-01, 2014-01-01),使用默认分桶方式 - ALTER TABLE example_db.my_table - ADD PARTITION p1 VALUES LESS THAN ("2014-01-01"); - - 2. 增加分区,使用新的分桶数 - ALTER TABLE example_db.my_table - ADD PARTITION p1 VALUES LESS THAN ("2015-01-01") - DISTRIBUTED BY HASH(k1) BUCKETS 20; - - 3. 删除分区 - ALTER TABLE example_db.my_table - DROP PARTITION p1; - - [rollup] - 1. 创建 index: example_rollup_index,基于 base index(k1,k2,k3,v1,v2)。列式存储。 - ALTER TABLE example_db.my_table - ADD ROLLUP example_rollup_index(k1, k3, v1, v2) - PROPERTIES("storage_type"="column"); - - 2. 创建 index: example_rollup_index2,基于 example_rollup_index(k1,k3,v1,v2) - ALTER TABLE example_db.my_table - ADD ROLLUP example_rollup_index2 (k1, v1) - FROM example_rollup_index; - - 3. 删除 index: example_rollup_index2 - ALTER TABLE example_db.my_table - DROP ROLLUP example_rollup_index2; - - [schema change] - 1. 向 example_rollup_index 的 col1 后添加一个key列 new_col(非聚合模型) - ALTER TABLE example_db.my_table - ADD COLUMN new_col INT KEY DEFAULT "0" AFTER col1 - TO example_rollup_index; - - 2. 向example_rollup_index的col1后添加一个value列new_col(非聚合模型) - ALTER TABLE example_db.my_table - ADD COLUMN new_col INT DEFAULT "0" AFTER col1 - TO example_rollup_index; - - 3. 向example_rollup_index的col1后添加一个key列new_col(聚合模型) - ALTER TABLE example_db.my_table - ADD COLUMN new_col INT DEFAULT "0" AFTER col1 - TO example_rollup_index; - - 4. 向example_rollup_index的col1后添加一个value列new_col SUM聚合类型(聚合模型) - ALTER TABLE example_db.my_table - ADD COLUMN new_col INT SUM DEFAULT "0" AFTER col1 - TO example_rollup_index; - - 5. 向 example_rollup_index 添加多列(聚合模型) - ALTER TABLE example_db.my_table - ADD COLUMN (col1 INT DEFAULT "1", col2 FLOAT SUM DEFAULT "2.3") - TO example_rollup_index; - - 6. 从 example_rollup_index 删除一列 - ALTER TABLE example_db.my_table - DROP COLUMN col2 - FROM example_rollup_index; - - 7. 修改 base index 的 col1 列的类型为 BIGINT,并移动到 col2 列后面 - ALTER TABLE example_db.my_table - MODIFY COLUMN col1 BIGINT DEFAULT "1" AFTER col2; - - 8. 修改 base index 的 val1 列最大长度。原 val1 为 (val1 VARCHAR(32) REPLACE DEFAULT "abc") - ALTER TABLE example_db.my_table - MODIFY COLUMN val1 VARCHAR(64) REPLACE DEFAULT "abc"; - - 9. 重新排序 example_rollup_index 中的列(设原列顺序为:k1,k2,k3,v1,v2) - ALTER TABLE example_db.my_table - ORDER BY (k3,k1,k2,v2,v1) - FROM example_rollup_index; - - 10. 同时执行两种操作 - ALTER TABLE example_db.my_table - ADD COLUMN v2 INT MAX DEFAULT "0" AFTER k2 TO example_rollup_index, - ORDER BY (k3,k1,k2,v2,v1) FROM example_rollup_index; - - 11. 修改表的 bloom filter 列 - ALTER TABLE example_db.my_table SET ("bloom_filter_columns"="k1,k2,k3"); - - 也可以合并到上面的 schema change 操作中(注意多子句的语法有少许区别) - ALTER TABLE example_db.my_table - DROP COLUMN col2 - PROPERTIES ("bloom_filter_columns"="k1,k2,k3"); - - 12. 修改表的Colocate 属性 - ALTER TABLE example_db.my_table set ("colocate_with"="t1"); - - [rename] - 1. 将名为 table1 的表修改为 table2 - ALTER TABLE table1 RENAME table2; - - 2. 将表 example_table 中名为 rollup1 的 rollup index 修改为 rollup2 - ALTER TABLE example_table RENAME ROLLUP rollup1 rollup2; - - 3. 将表 example_table 中名为 p1 的 partition 修改为 p2 - ALTER TABLE example_table RENAME PARTITION p1 p2; - -## keyword - ALTER,TABLE,ROLLUP,COLUMN,PARTITION,RENAME - -# CANCEL ALTER -## description - 该语句用于撤销一个 ALTER 操作。 - 1. 撤销 ALTER TABLE COLUMN 操作 - 语法: - CANCEL ALTER TABLE COLUMN - FROM db_name.table_name - - 2. 撤销 ALTER TABLE ROLLUP 操作 - 语法: - CANCEL ALTER TABLE ROLLUP - FROM db_name.table_name - - 2. 撤销 ALTER CLUSTER 操作 - 语法: - (待实现...) - - -## example - [CANCEL ALTER TABLE COLUMN] - 1. 撤销针对 my_table 的 ALTER COLUMN 操作。 - CANCEL ALTER TABLE COLUMN - FROM example_db.my_table; - - [CANCEL ALTER TABLE ROLLUP] - 1. 撤销 my_table 下的 ADD ROLLUP 操作。 - CANCEL ALTER TABLE ROLLUP - FROM example_db.my_table; - -## keyword - CANCEL,ALTER,TABLE,COLUMN,ROLLUP - -# CREATE VIEW -## description - 该语句用于创建一个逻辑视图 - 语法: - CREATE VIEW [IF NOT EXISTS] - [db_name.]view_name (column1[, column2, ...]) - AS query_stmt - - 说明: - 1. 视图为逻辑视图,没有物理存储。所有在视图上的查询相当于在视图对应的子查询上进行。 - 2. query_stmt 为任意支持的 SQL - -## example - 1. 在 example_db 上创建视图 example_view - CREATE VIEW example_db.example_view (k1, k2, k3, v1) - AS - SELECT c1 as k1, k2, k3, SUM(v1) FROM example_table - WHERE k1 = 20160112 GROUP BY k1,k2,k3; - -## keyword - CREATE,VIEW - -# DROP VIEW -## description - 该语句用于删除一个逻辑视图 VIEW - 语法: - DROP VIEW [IF EXISTS] - [db_name.]view_name; - -## example - 1. 如果存在,删除 example_db 上的视图 example_view - DROP VIEW IF EXISTS example_db.example_view; - -## keyword - DROP,VIEW - -# RECOVER -## description - 该语句用于恢复之前删除的 database、table 或者 partition - 语法: - 1) 恢复 database - RECOVER DATABASE db_name; - 2) 恢复 table - RECOVER TABLE [db_name.]table_name; - 3) 恢复 partition - RECOVER PARTITION partition_name FROM [db_name.]table_name; - - 说明: - 1. 该操作仅能恢复之前一段时间内删除的元信息。默认为 3600 秒。 - 2. 如果删除元信息后新建立了同名同类型的元信息,则之前删除的元信息不能被恢复 - -## example - 1. 恢复名为 example_db 的 database - RECOVER DATABASE example_db; - - 2. 恢复名为 example_tbl 的 table - RECOVER TABLE example_db.example_tbl; - - 3. 恢复表 example_tbl 中名为 p1 的 partition - RECOVER PARTITION p1 FROM example_tbl; - -## keyword - RECOVER - -# ALTER DATABASE -## description - 该语句用于设置指定数据库的属性。(仅管理员使用) - 语法: - 1) 设置数据库数据量配额,单位为B/K/KB/M/MB/G/GB/T/TB/P/PB - ALTER DATABASE db_name SET DATA QUOTA quota; - - 2) 重命名数据库 - ALTER DATABASE db_name RENAME new_db_name; - - 说明: - 重命名数据库后,如需要,请使用 REVOKE 和 GRANT 命令修改相应的用户权限。 - -## example - 1. 设置指定数据库数据量配额 - ALTER DATABASE example_db SET DATA QUOTA 10995116277760; - 上述单位为字节,等价于 - ALTER DATABASE example_db SET DATA QUOTA 10T; - - ALTER DATABASE example_db SET DATA QUOTA 100G; - - ALTER DATABASE example_db SET DATA QUOTA 200M; - - 2. 将数据库 example_db 重命名为 example_db2 - ALTER DATABASE example_db RENAME example_db2; - -## keyword - ALTER,DATABASE,RENAME - -# CREATE REPOSITORY -## description - 该语句用于创建仓库。仓库用于属于备份或恢复。仅 root 或 superuser 用户可以创建仓库。 - 语法: - CREATE [READ ONLY] REPOSITORY `repo_name` - WITH BROKER `broker_name` - ON LOCATION `repo_location` - PROPERTIES ("key"="value", ...); - - 说明: - 1. 仓库的创建,依赖于已存在的 broker - 2. 如果是只读仓库,则只能在仓库上进行恢复。如果不是,则可以进行备份和恢复操作。 - 3. 根据 broker 的不同类型,PROPERTIES 有所不同,具体见示例。 - -## example - 1. 创建名为 bos_repo 的仓库,依赖 BOS broker "bos_broker",数据根目录为:bos://palo_backup - CREATE REPOSITORY `bos_repo` - WITH BROKER `bos_broker` - ON LOCATION "bos://palo_backup" - PROPERTIES - ( - "bos_endpoint" = "http://gz.bcebos.com", - "bos_accesskey" = "069fc2786e664e63a5f111111114ddbs22", - "bos_secret_accesskey"="70999999999999de274d59eaa980a" - ); - - 2. 创建和示例 1 相同的仓库,但属性为只读: - CREATE READ ONLY REPOSITORY `bos_repo` - WITH BROKER `bos_broker` - ON LOCATION "bos://palo_backup" - PROPERTIES - ( - "bos_endpoint" = "http://gz.bcebos.com", - "bos_accesskey" = "069fc2786e664e63a5f111111114ddbs22", - "bos_secret_accesskey"="70999999999999de274d59eaa980a" - ); - - 3. 创建名为 hdfs_repo 的仓库,依赖 Baidu hdfs broker "hdfs_broker",数据根目录为:hdfs://hadoop-name-node:54310/path/to/repo/ - CREATE REPOSITORY `hdfs_repo` - WITH BROKER `hdfs_broker` - ON LOCATION "hdfs://hadoop-name-node:54310/path/to/repo/" - PROPERTIES - ( - "username" = "user", - "password" = "password" - ); - -## keyword - CREATE REPOSITORY - -# DROP REPOSITORY -## description - 该语句用于删除一个已创建的仓库。仅 root 或 superuser 用户可以删除仓库。 - 语法: - DROP REPOSITORY `repo_name`; - - 说明: - 1. 删除仓库,仅仅是删除该仓库在 Palo 中的映射,不会删除实际的仓库数据。删除后,可以再次通过指定相同的 broker 和 LOCATION 映射到该仓库。 - -## example - 1. 删除名为 bos_repo 的仓库: - DROP REPOSITORY `bos_repo`; - -## keyword - DROP REPOSITORY - -# BACKUP -## description - 该语句用于备份指定数据库下的数据。该命令为异步操作。提交成功后,需通过 SHOW BACKUP 命令查看进度。仅支持备份 OLAP 类型的表。 - 语法: - BACKUP SNAPSHOT [db_name].{snapshot_name} - TO `repository_name` - ON ( - `table_name` [PARTITION (`p1`, ...)], - ... - ) - PROPERTIES ("key"="value", ...); - - 说明: - 1. 同一数据库下只能有一个正在执行的 BACKUP 或 RESTORE 任务。 - 2. ON 子句中标识需要备份的表和分区。如果不指定分区,则默认备份该表的所有分区。 - 3. PROPERTIES 目前支持以下属性: - "type" = "full":表示这是一次全量更新(默认)。 - "timeout" = "3600":任务超时时间,默认为一天。单位秒。 - -## example - - 1. 全量备份 example_db 下的表 example_tbl 到仓库 example_repo 中: - BACKUP SNAPSHOT example_db.snapshot_label1 - TO example_repo - ON (example_tbl) - PROPERTIES ("type" = "full"); - - 2. 全量备份 example_db 下,表 example_tbl 的 p1, p2 分区,以及表 example_tbl2 到仓库 example_repo 中: - BACKUP SNAPSHOT example_db.snapshot_label2 - TO example_repo - ON - ( - example_tbl PARTITION (p1,p2), - example_tbl2 - ); - -## keyword - BACKUP - -# RESTORE -## description - 1. RESTORE - 该语句用于将之前通过 BACKUP 命令备份的数据,恢复到指定数据库下。该命令为异步操作。提交成功后,需通过 SHOW RESTORE 命令查看进度。仅支持恢复 OLAP 类型的表。 - 语法: - RESTORE SNAPSHOT [db_name].{snapshot_name} - FROM `repository_name` - ON ( - `table_name` [PARTITION (`p1`, ...)] [AS `tbl_alias`], - ... - ) - PROPERTIES ("key"="value", ...); - - 说明: - 1. 同一数据库下只能有一个正在执行的 BACKUP 或 RESTORE 任务。 - 2. ON 子句中标识需要恢复的表和分区。如果不指定分区,则默认恢复该表的所有分区。所指定的表和分区必须已存在于仓库备份中。 - 3. 可以通过 AS 语句将仓库中备份的表名恢复为新的表。但新表名不能已存在于数据库中。分区名称不能修改。 - 4. 可以将仓库中备份的表恢复替换数据库中已有的同名表,但须保证两张表的表结构完全一致。表结构包括:表名、列、分区、Rollup等等。 - 5. 可以指定恢复表的部分分区,系统会检查分区 Range 是否能够匹配。 - 6. PROPERTIES 目前支持以下属性: - "backup_timestamp" = "2018-05-04-16-45-08":指定了恢复对应备份的哪个时间版本,必填。该信息可以通过 `SHOW SNAPSHOT ON repo;` 语句获得。 - "replication_num" = "3":指定恢复的表或分区的副本数。默认为3。若恢复已存在的表或分区,则副本数必须和已存在表或分区的副本数相同。同时,必须有足够的 host 容纳多个副本。 - "timeout" = "3600":任务超时时间,默认为一天。单位秒。 - "meta_version" = 40:使用指定的 meta_version 来读取之前备份的元数据。注意,该参数作为临时方案,仅用于恢复老版本 Doris 备份的数据。最新版本的备份数据中已经包含 meta version,无需再指定。 - -## example - 1. 从 example_repo 中恢复备份 snapshot_1 中的表 backup_tbl 到数据库 example_db1,时间版本为 "2018-05-04-16-45-08"。恢复为 1 个副本: - RESTORE SNAPSHOT example_db1.`snapshot_1` - FROM `example_repo` - ON ( `backup_tbl` ) - PROPERTIES - ( - "backup_timestamp"="2018-05-04-16-45-08", - "replication_num" = "1" - ); - - 2. 从 example_repo 中恢复备份 snapshot_2 中的表 backup_tbl 的分区 p1,p2,以及表 backup_tbl2 到数据库 example_db1,并重命名为 new_tbl,时间版本为 "2018-05-04-17-11-01"。默认恢复为 3 个副本: - RESTORE SNAPSHOT example_db1.`snapshot_2` - FROM `example_repo` - ON - ( - `backup_tbl` PARTITION (`p1`, `p2`), - `backup_tbl2` AS `new_tbl` - ) - PROPERTIES - ( - "backup_timestamp"="2018-05-04-17-11-01" - ); - -## keyword - RESTORE - -# CANCEL BACKUP -## description - 该语句用于取消一个正在进行的 BACKUP 任务。 - 语法: - CANCEL BACKUP FROM db_name; - -## example - 1. 取消 example_db 下的 BACKUP 任务。 - CANCEL BACKUP FROM example_db; - -## keyword - CANCEL, BACKUP - -# CANCEL RESTORE -## description - 该语句用于取消一个正在进行的 RESTORE 任务。 - 语法: - CANCEL RESTORE FROM db_name; - - 注意: - 当取消处于 COMMIT 或之后阶段的恢复左右时,可能导致被恢复的表无法访问。此时只能通过再次执行恢复作业进行数据恢复。 - -## example - 1. 取消 example_db 下的 RESTORE 任务。 - CANCEL RESTORE FROM example_db; - -## keyword - CANCEL, RESTORE - -# HLL -## description - HLL是基于HyperLogLog算法的工程实现,用于保存HyperLogLog计算过程的中间结果,它只能作为表的value列类型 - 通过聚合来不断的减少数据量,以此来实现加快查询的目的,基于它到的是一个估算结果,误差大概在1%左右 - hll列是通过其它列或者导入数据里面的数据生成的,导入的时候通过hll_hash函数来指定数据中哪一列用于生成hll列 - 它常用于替代count distinct,通过结合rollup在业务上用于快速计算uv等 - - 相关函数: - - HLL_UNION_AGG(hll) - 此函数为聚合函数,用于计算满足条件的所有数据的基数估算。此函数还可用于分析函数,只支持默认窗口,不支持window从句。 - - HLL_RAW_AGG(hll) - 此函数为聚合函数,用于聚合hll类型字段,并且返回的还是hll类型。 - - HLL_CARDINALITY(hll) - 此函数用于计算单条hll列的基数估算 - - HLL_HASH(column_name) - 生成HLL列类型,用于insert或导入的时候,导入的使用见相关说明 - -## example - 1. 首先创建一张含有hll列的表 - create table test( - dt date, - id int, - name char(10), - province char(10), - os char(1), - set1 hll hll_union, - set2 hll hll_union) - distributed by hash(id) buckets 32; - - 2. 导入数据,导入的方式见相关help curl - - a. 使用表中的列生成hll列 - curl --location-trusted -uname:password -T data http://host/api/test_db/test/_load?label=load_1\&hll=set1,id:set2,name - - b. 使用数据中的某一列生成hll列 - curl --location-trusted -uname:password -T data http://host/api/test_db/test/_load?label=load_1\&hll=set1,cuid:set2,os - \&columns=dt,id,name,province,sex,cuid,os - - 3. 聚合数据,常用方式3种:(如果不聚合直接对base表查询,速度可能跟直接使用ndv速度差不多) - - a. 创建一个rollup,让hll列产生聚合, - alter table test add rollup test_rollup(dt, set1); - - b. 创建另外一张专门计算uv的表,然后insert数据) - - create table test_uv( - dt date, - uv_set hll hll_union) - distributed by hash(id) buckets 32; - - insert into test_uv select dt, set1 from test; - - c. 创建另外一张专门计算uv的表,然后insert并通过hll_hash根据test其它非hll列生成hll列 - - create table test_uv( - dt date, - id_set hll hll_union) - distributed by hash(id) buckets 32; - - insert into test_uv select dt, hll_hash(id) from test; - - 4. 查询,hll列不允许直接查询它的原始值,可以通过配套的函数进行查询 - - a. 求总uv - select HLL_UNION_AGG(uv_set) from test_uv; - - b. 求每一天的uv - select dt, HLL_CARDINALITY(uv_set) from test_uv; - - c. 求test表中set1的聚合值 - select dt, HLL_CARDINALITY(uv) from (select dt, HLL_RAW_AGG(set1) as uv from test group by dt) tmp; - select dt, HLL_UNION_AGG(set1) as uv from test group by dt; - -## keyword - HLL - -# TRUNCATE TABLE -## description - 该语句用于清空指定表和分区的数据 - 语法: - - TRUNCATE TABLE [db.]tbl[ PARTITION(p1, p2, ...)]; - - 说明: - 1. 该语句清空数据,但保留表或分区。 - 2. 不同于 DELETE,该语句只能整体清空指定的表或分区,不能添加过滤条件。 - 3. 不同于 DELETE,使用该方式清空数据不会对查询性能造成影响。 - 4. 该操作删除的数据不可恢复。 - 5. 使用该命令时,表状态需为 NORMAL,即不允许正在进行 SCHEMA CHANGE 等操作。 - -## example - - 1. 清空 example_db 下的表 tbl - - TRUNCATE TABLE example_db.tbl; - - 2. 清空表 tbl 的 p1 和 p2 分区 - - TRUNCATE TABLE tbl PARTITION(p1, p2); - -## keyword - TRUNCATE,TABLE - -# Colocate Join -## description - Colocate/Local Join 就是指多个节点Join时没有数据移动和网络传输,每个节点只在本地进行Join, - 能够本地进行Join的前提是相同Join Key的数据导入时按照相同规则导入到固定的节点。 - - 1 How To Use: - - 只需要在建表时增加 colocate_with 这个属性即可,colocate_with的值 可以设置成同一组colocate 表中的任意一个, - 不过需要保证colocate_with属性中的表要先建立。 - - 假如需要对table t1 和t2 进行Colocate Join,可以按以下语句建表: - - CREATE TABLE `t1` ( - `id` int(11) COMMENT "", - `value` varchar(8) COMMENT "" - ) ENGINE=OLAP - DUPLICATE KEY(`id`) - DISTRIBUTED BY HASH(`id`) BUCKETS 10 - PROPERTIES ( - "colocate_with" = "t1" - ); - - CREATE TABLE `t2` ( - `id` int(11) COMMENT "", - `value` varchar(8) COMMENT "" - ) ENGINE=OLAP - DUPLICATE KEY(`id`) - DISTRIBUTED BY HASH(`id`) BUCKETS 10 - PROPERTIES ( - "colocate_with" = "t1" - ); - - 2 Colocate Join 目前的限制: - - 1. Colcoate Table 必须是OLAP类型的表 - 2. 相同colocate_with 属性的表的 BUCKET 数必须一样 - 3. 相同colocate_with 属性的表的 副本数必须一样 - 4. 相同colocate_with 属性的表的 DISTRIBUTED Columns的数据类型必须一样 - - 3 Colocate Join的适用场景: - - Colocate Join 十分适合几张表按照相同字段分桶,并高频根据相同字段Join的场景。 - - 4 FAQ: - - Q: 支持多张表进行Colocate Join 吗? - - A: 支持 - - Q: 支持Colocate 表和正常表 Join 吗? - - A: 支持 - - Q: Colocate 表支持用非分桶的Key进行Join吗? - - A: 支持:不符合Colocate Join条件的Join会使用Shuffle Join或Broadcast Join - - Q: 如何确定Join 是按照Colocate Join 执行的? - - A: explain的结果中Hash Join的孩子节点如果直接是OlapScanNode, 没有Exchange Node,就说明是Colocate Join - - Q: 如何修改colocate_with 属性? - - A: ALTER TABLE example_db.my_table set ("colocate_with"="target_table"); - - Q: 如何禁用colcoate join? - - A: set disable_colocate_join = true; 就可以禁用Colocate Join,查询时就会使用Shuffle Join 和Broadcast Join - -## keyword - - COLOCATE, JOIN, CREATE TABLE diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/drop-function.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/drop-function.md index f228d11a3f444c..b0ee8166cc893f 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/drop-function.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/drop-function.md @@ -1,27 +1,28 @@ # DROP FUNCTION +## description -## Syntax +删除一个自定义函数。函数的名字、参数类型完全一致才能够被删除 + + Syntax ``` DROP FUNCTION function_name (arg_type [, ...]) ``` -## Description - -删除一个自定义函数。函数的名字、参数类型完全一致才能够被删除 - -## Parameters + Parameters > `function_name`: 要删除函数的名字 > > `arg_type`: 要删除函数的参数列表 > -## Examples +## example 1. 删除掉一个函数 ``` DROP FUNCTION my_add(INT, INT) ``` +##keyword +DROP,FUNCTION,DROP,FUNCTION diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/show-function.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/show-function.md index ec1818d6de30d9..eeb84e0ff79631 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/show-function.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/show-function.md @@ -1,22 +1,21 @@ # SHOW FUNCTION +## description -## Syntax +查看数据库下所有的自定义函数。如果用户指定了数据库,那么查看对应数据库的,否则直接查询当前会话所在数据库 + +需要对这个数据库拥有 `SHOW` 权限 + + Syntax ``` SHOW FUNCTION [FROM db] ``` -## Description - -查看数据库下所有的自定义函数。如果用户指定了数据库,那么查看对应数据库的,否则直接查询当前会话所在数据库 - -需要对这个数据库拥有 `SHOW` 权限 - -## Parameters + Parameters > `db`: 要查询的数据库名字 -## Examples +## example ``` mysql> show function in testDb\G @@ -34,3 +33,5 @@ Intermediate Type: NULL Properties: {"symbol":"_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_","object_file":"http://host:port/libudfsample.so","md5":"cfe7a362d10f3aaf6c49974ee0f1f878"} 2 rows in set (0.00 sec) ``` +##keyword +SHOW,FUNCTION,SHOW,FUNCTION diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/CANCEL DELETE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/CANCEL DELETE.md new file mode 100644 index 00000000000000..31f83bf6e793e4 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/CANCEL DELETE.md @@ -0,0 +1,10 @@ +# CANCEL DELETE +## description + + 该语句用于撤销一个 DELETE 操作。(仅管理员使用!)(待实现) + +## example + +## keyword + CANCEL,DELETE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/CANCEL LABEL.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/CANCEL LABEL.md new file mode 100644 index 00000000000000..c94a0aa232065f --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/CANCEL LABEL.md @@ -0,0 +1,33 @@ +# CANCEL LABEL +## description + NAME: + cancel_label: cancel a transaction with label + + SYNOPSIS + curl -u user:passwd -XPOST http://host:port/api/{db}/{label}/_cancel + + DESCRIPTION + 该命令用于cancel一个指定Label对应的事务,事务在Prepare阶段能够被成功cancel + + RETURN VALUES + 执行完成后,会以Json格式返回这次导入的相关内容。当前包括一下字段 + Status: 是否成功cancel + Success: 成功cancel事务 + 其他: cancel失败 + Message: 具体的失败信息 + + ERRORS + +## example + + 1. cancel testDb, testLabel的作业 + curl -u root -XPOST http://host:port/api/testDb/testLabel/_cancel + +## keyword + CANCEL,LABEL + + + + + + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD.md new file mode 100644 index 00000000000000..02dbb25e66bbe1 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD.md @@ -0,0 +1,20 @@ +# CANCEL LOAD +## description + + 该语句用于撤销指定 load label 的批次的导入作业。 + 这是一个异步操作,任务提交成功则返回。执行后可使用 SHOW LOAD 命令查看进度。 + 语法: + CANCEL LOAD + [FROM db_name] + WHERE LABEL = "load_label"; + +## example + + 1. 撤销数据库 example_db 上, label 为 example_db_test_load_label 的导入作业 + CANCEL LOAD + FROM example_db + WHERE LABEL = "example_db_test_load_label"; + +## keyword + CANCEL,LOAD + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/DELETE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/DELETE.md new file mode 100644 index 00000000000000..2ed78e10c8e8b9 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/DELETE.md @@ -0,0 +1,36 @@ +# DELETE +## description + + 该语句用于按条件删除指定 table(base index) partition 中的数据。 + 该操作会同时删除和此 base index 相关的 rollup index 的数据。 + 语法: + DELETE FROM table_name [PARTITION partition_name] + WHERE + column_name1 op value[ AND column_name2 op value ...]; + + 说明: + 1) op 的可选类型包括:=, >, <, >=, <=, != + 2) 只能指定 key 列上的条件。 + 2) 当选定的 key 列不存在于某个 rollup 中时,无法进行 delete。 + 3) 条件之间只能是“与”的关系。 + 若希望达成“或”的关系,需要将条件分写在两个 DELETE 语句中。 + 4) 如果为RANGE分区表,则必须指定 PARTITION。如果是单分区表,可以不指定。 + + 注意: + 该语句可能会降低执行后一段时间内的查询效率。 + 影响程度取决于语句中指定的删除条件的数量。 + 指定的条件越多,影响越大。 + +## example + + 1. 删除 my_table partition p1 中 k1 列值为 3 的数据行 + DELETE FROM my_table PARTITION p1 + WHERE k1 = 3; + + 2. 删除 my_table partition p1 中 k1 列值大于等于 3 且 k2 列值为 "abc" 的数据行 + DELETE FROM my_table PARTITION p1 + WHERE k1 >= 3 AND k2 = "abc"; + +## keyword + DELETE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/EXPORT.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/EXPORT.md new file mode 100644 index 00000000000000..61fc286aef4422 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/EXPORT.md @@ -0,0 +1,58 @@ +# EXPORT +## description + + 该语句用于将指定表的数据导出到指定位置。 + 该功能通过 broker 进程实现。对于不同的目的存储系统,需要部署不同的 broker。可以通过 SHOW BROKER 查看已部署的 broker。 + 这是一个异步操作,任务提交成功则返回。执行后可使用 SHOW EXPORT 命令查看进度。 + + 语法: + EXPORT TABLE table_name + [PARTITION (p1[,p2])] + TO export_path + [opt_properties] + broker; + + 1. table_name + 当前要导出的表的表名,目前支持engine为olap和mysql的表的导出。 + + 2. partition + 可以只导出指定表的某些指定分区 + + 3. export_path + 导出的路径,需为目录。目前不能导出到本地,需要导出到broker。 + + 4. opt_properties + 用于指定一些特殊参数。 + 语法: + [PROPERTIES ("key"="value", ...)] + + 可以指定如下参数: + column_separator: 指定导出的列分隔符,默认为\t。 + line_delimiter: 指定导出的行分隔符,默认为\n。 + exec_mem_limit: 导出在单个 BE 节点的内存使用上限,默认为 2GB,单位为字节。 + timeout:导入作业的超时时间,默认为1天,单位是秒。 + tablet_num_per_task:每个子任务能分配的最大 Tablet 数量。 + + 5. broker + 用于指定导出使用的broker + 语法: + WITH BROKER broker_name ("key"="value"[,...]) + 这里需要指定具体的broker name, 以及所需的broker属性 + + 对于不同存储系统对应的 broker,这里需要输入的参数不同。具体参数可以参阅:`help broker load` 中 broker 所需属性。 + +## example + + 1. 将 testTbl 表中的所有数据导出到 hdfs 上 + EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); + + 2. 将 testTbl 表中的分区p1,p2导出到 hdfs 上 + + EXPORT TABLE testTbl PARTITION (p1,p2) TO "hdfs://hdfs_host:port/a/b/c" WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); + 3. 将 testTbl 表中的所有数据导出到 hdfs 上,以","作为列分隔符 + + EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("column_separator"=",") WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); + +## keyword + EXPORT + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/GET LABEL STATE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/GET LABEL STATE.md new file mode 100644 index 00000000000000..069a86dc7bde5e --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/GET LABEL STATE.md @@ -0,0 +1,33 @@ +# GET LABEL STATE +## description + NAME: + get_label_state: get label's state + + SYNOPSIS + curl -u user:passwd http://host:port/api/{db}/{label}/_state + + DESCRIPTION + 该命令用于查看一个Label对应的事务状态 + + RETURN VALUES + 执行完毕后,会以Json格式返回这次导入的相关内容。当前包括一下字段 + Label:本次导入的 label,如果没有指定,则为一个 uuid。 + Status:此命令是否成功执行,Success表示成功执行 + Message: 具体的执行信息 + State: 只有在Status为Success时才有意义 + UNKNOWN: 没有找到对应的Label + PREPARE: 对应的事务已经prepare,但尚未提交 + COMMITTED: 事务已经提交,不能被cancel + VISIBLE: 事务提交,并且数据可见,不能被cancel + ABORTED: 事务已经被ROLLBACK,导入已经失败。 + + ERRORS + +## example + + 1. 获得testDb, testLabel的状态 + curl -u root http://host:port/api/testDb/testLabel/_state + +## keyword + GET, LABEL, STATE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/LOAD.md new file mode 100644 index 00000000000000..34a7c8c8ad5690 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/LOAD.md @@ -0,0 +1,284 @@ +# LOAD +## description + + Palo 目前支持以下4种导入方式: + + 1. Hadoop Load:基于 MR 进行 ETL 的导入。 + 2. Broker Load:使用 broker 进行进行数据导入。 + 3. Mini Load:通过 http 协议上传文件进行批量数据导入。 + 4. Stream Load:通过 http 协议进行流式数据导入。 + + 本帮助主要描述第一种导入方式,即 Hadoop Load 相关帮助信息。其余导入方式可以使用以下命令查看帮助: + + !!!该导入方式可能在后续某个版本即不再支持,建议使用其他导入方式进行数据导入。!!! + + 1. help broker load; + 2. help mini load; + 3. help stream load; + + Hadoop Load 仅适用于百度内部环境。公有云、私有云以及开源环境无法使用这种导入方式。 + 该导入方式必须设置用于 ETL 的 Hadoop 计算队列,设置方式可以通过 help set property 命令查看帮助。 + + Stream load 暂时只支持百度内部用户使用。开源社区和公有云用户将在后续版本更新中支持。 + +语法: + + LOAD LABEL load_label + ( + data_desc1[, data_desc2, ...] + ) + [opt_properties]; + + 1. load_label + + 当前导入批次的标签。在一个 database 内唯一。 + 语法: + [database_name.]your_label + + 2. data_desc + + 用于描述一批导入数据。 + 语法: + DATA INFILE + ( + "file_path1"[, file_path2, ...] + ) + [NEGATIVE] + INTO TABLE `table_name` + [PARTITION (p1, p2)] + [COLUMNS TERMINATED BY "column_separator"] + [FORMAT AS "file_type"] + [(column_list)] + [SET (k1 = func(k2))] + + 说明: + file_path: + + 文件路径,可以指定到一个文件,也可以用 * 通配符指定某个目录下的所有文件。通配符必须匹配到文件,而不能是目录。 + + PARTITION: + + 如果指定此参数,则只会导入指定的分区,导入分区以外的数据会被过滤掉。 + 如果不指定,默认导入table的所有分区。 + + NEGATIVE: + 如果指定此参数,则相当于导入一批“负”数据。用于抵消之前导入的同一批数据。 + 该参数仅适用于存在 value 列,并且 value 列的聚合类型仅为 SUM 的情况。 + + column_separator: + + 用于指定导入文件中的列分隔符。默认为 \t + 如果是不可见字符,则需要加\\x作为前缀,使用十六进制来表示分隔符。 + 如hive文件的分隔符\x01,指定为"\\x01" + + file_type: + + 用于指定导入文件的类型,例如:parquet、csv。默认值通过文件后缀名判断。 + + column_list: + + 用于指定导入文件中的列和 table 中的列的对应关系。 + 当需要跳过导入文件中的某一列时,将该列指定为 table 中不存在的列名即可。 + 语法: + (col_name1, col_name2, ...) + + SET: + + 如果指定此参数,可以将源文件某一列按照函数进行转化,然后将转化后的结果导入到table中。 + 目前支持的函数有: + + strftime(fmt, column) 日期转换函数 + fmt: 日期格式,形如%Y%m%d%H%M%S (年月日时分秒) + column: column_list中的列,即输入文件中的列。存储内容应为数字型的时间戳。 + 如果没有column_list,则按照palo表的列顺序默认输入文件的列。 + + time_format(output_fmt, input_fmt, column) 日期格式转化 + output_fmt: 转化后的日期格式,形如%Y%m%d%H%M%S (年月日时分秒) + input_fmt: 转化前column列的日期格式,形如%Y%m%d%H%M%S (年月日时分秒) + column: column_list中的列,即输入文件中的列。存储内容应为input_fmt格式的日期字符串。 + 如果没有column_list,则按照palo表的列顺序默认输入文件的列。 + + alignment_timestamp(precision, column) 将时间戳对齐到指定精度 + precision: year|month|day|hour + column: column_list中的列,即输入文件中的列。存储内容应为数字型的时间戳。 + 如果没有column_list,则按照palo表的列顺序默认输入文件的列。 + 注意:对齐精度为year、month的时候,只支持20050101~20191231范围内的时间戳。 + + default_value(value) 设置某一列导入的默认值 + 不指定则使用建表时列的默认值 + + md5sum(column1, column2, ...) 将指定的导入列的值求md5sum,返回32位16进制字符串 + + replace_value(old_value[, new_value]) 将导入文件中指定的old_value替换为new_value + new_value如不指定则使用建表时列的默认值 + + hll_hash(column) 用于将表或数据里面的某一列转化成HLL列的数据结构 + + 3. opt_properties + + 用于指定一些特殊参数。 + 语法: + [PROPERTIES ("key"="value", ...)] + + 可以指定如下参数: + cluster: 导入所使用的 Hadoop 计算队列。 + timeout: 指定导入操作的超时时间。默认超时为3天。单位秒。 + max_filter_ratio:最大容忍可过滤(数据不规范等原因)的数据比例。默认零容忍。 + load_delete_flag:指定该导入是否通过导入key列的方式删除数据,仅适用于UNIQUE KEY, + 导入时可不指定value列。默认为false。 + + 5. 导入数据格式样例 + + 整型类(TINYINT/SMALLINT/INT/BIGINT/LARGEINT):1, 1000, 1234 + 浮点类(FLOAT/DOUBLE/DECIMAL):1.1, 0.23, .356 + 日期类(DATE/DATETIME):2017-10-03, 2017-06-13 12:34:03。 + (注:如果是其他日期格式,可以在导入命令中,使用 strftime 或者 time_format 函数进行转换) + 字符串类(CHAR/VARCHAR):"I am a student", "a" + NULL值:\N + +## example + + 1. 导入一批数据,指定超时时间和过滤比例。指定导入队列为 my_cluster。 + + LOAD LABEL example_db.label1 + ( + DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") + INTO TABLE `my_table` + ) + PROPERTIES + ( + "cluster" = "my_cluster", + "timeout" = "3600", + "max_filter_ratio" = "0.1" + ); + + 其中 hdfs_host 为 namenode 的 host,hdfs_port 为 fs.defaultFS 端口(默认9000) + + 2. 导入一批数据,包含多个文件。导入不同的 table,指定分隔符,指定列对应关系 + + LOAD LABEL example_db.label2 + ( + DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file1") + INTO TABLE `my_table_1` + COLUMNS TERMINATED BY "," + (k1, k3, k2, v1, v2), + DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file2") + INTO TABLE `my_table_2` + COLUMNS TERMINATED BY "\t" + (k1, k2, k3, v2, v1) + ); + + 3. 导入一批数据,指定hive的默认分隔符\x01,并使用通配符*指定目录下的所有文件 + + LOAD LABEL example_db.label3 + ( + DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/*") + NEGATIVE + INTO TABLE `my_table` + COLUMNS TERMINATED BY "\\x01" + ); + + 4. 导入一批“负”数据 + + LOAD LABEL example_db.label4 + ( + DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/old_file) + NEGATIVE + INTO TABLE `my_table` + COLUMNS TERMINATED BY "\t" + ); + + 5. 导入一批数据,指定分区 + + LOAD LABEL example_db.label5 + ( + DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") + INTO TABLE `my_table` + PARTITION (p1, p2) + COLUMNS TERMINATED BY "," + (k1, k3, k2, v1, v2) + ); + + 6. 导入一批数据,指定分区, 并对导入文件的列做一些转化,如下: + 表结构为: + k1 datetime + k2 date + k3 bigint + k4 varchar(20) + k5 varchar(64) + k6 int + + 假设数据文件只有一行数据,5列,逗号分隔: + + 1537002087,2018-08-09 11:12:13,1537002087,-,1 + + 数据文件中各列,对应导入语句中指定的各列: + tmp_k1, tmp_k2, tmp_k3, k6, v1 + + 转换如下: + + 1) k1:将 tmp_k1 时间戳列转化为 datetime 类型的数据 + 2) k2:将 tmp_k2 datetime 类型的数据转化为 date 的数据 + 3) k3:将 tmp_k3 时间戳列转化为天级别时间戳 + 4) k4:指定导入默认值为1 + 5) k5:将 tmp_k1、tmp_k2、tmp_k3 列计算 md5 值 + 6) k6:将导入文件中的 - 值替换为 10 + + LOAD LABEL example_db.label6 + ( + DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") + INTO TABLE `my_table` + PARTITION (p1, p2) + COLUMNS TERMINATED BY "," + (tmp_k1, tmp_k2, tmp_k3, k6, v1) + SET ( + k1 = strftime("%Y-%m-%d %H:%M:%S", tmp_k1), + k2 = time_format("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", tmp_k2), + k3 = alignment_timestamp("day", tmp_k3), + k4 = default_value("1"), + k5 = md5sum(tmp_k1, tmp_k2, tmp_k3), + k6 = replace_value("-", "10") + ) + ); + + 7. 导入数据到含有HLL列的表,可以是表中的列或者数据里面的列 + + LOAD LABEL example_db.label7 + ( + DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") + INTO TABLE `my_table` + PARTITION (p1, p2) + COLUMNS TERMINATED BY "," + SET ( + v1 = hll_hash(k1), + v2 = hll_hash(k2) + ) + ); + + LOAD LABEL example_db.label8 + ( + DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") + INTO TABLE `my_table` + PARTITION (p1, p2) + COLUMNS TERMINATED BY "," + (k1, k2, tmp_k3, tmp_k4, v1, v2) + SET ( + v1 = hll_hash(tmp_k3), + v2 = hll_hash(tmp_k4) + ) + ) + WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); + + 8. 导入Parquet文件中数据 指定FORMAT 为parquet, 默认是通过文件后缀判断 + LOAD LABEL example_db.label9 + ( + DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") + INTO TABLE `my_table` + FORMAT AS "parquet" + (k1, k2, k3) + ) + WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); + +## keyword + LOAD + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/MINI LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/MINI LOAD.md new file mode 100644 index 00000000000000..81a63f73bdbf93 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/MINI LOAD.md @@ -0,0 +1,104 @@ +# MINI LOAD +## description + + MINI LOAD 和 STREAM LOAD 的导入实现方式完全一致。在导入功能支持上,MINI LOAD 的功能是 STREAM LOAD 的子集。 + 后续的导入新功能只会在 STREAM LOAD 中支持,MINI LOAD 将不再新增功能。建议改用 STREAM LOAD,具体使用方式请 HELP STREAM LOAD。 + + MINI LOAD 是 通过 http 协议完成的导入方式。用户可以不依赖 Hadoop,也无需通过 Mysql 客户端,即可完成导入。 + 用户通过 http 协议描述导入,数据在接受 http 请求的过程中被流式的导入 Doris , **导入作业完成后** 返回给用户导入的结果。 + + * 注:为兼容旧版本 mini load 使用习惯,用户依旧可以通过 'SHOW LOAD' 命令来查看导入结果。 + + 语法: + 导入: + + curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table}/_load?label=xxx + + 查看导入信息 + + curl -u user:passwd http://host:port/api/{db}/_load_info?label=xxx + + HTTP协议相关说明 + + 权限认证 当前 Doris 使用 http 的 Basic 方式权限认证。所以在导入的时候需要指定用户名密码 + 这种方式是明文传递密码的,暂不支持加密传输。 + + Expect Doris 需要发送过来的 http 请求带有 'Expect' 头部信息,内容为 '100-continue'。 + 为什么呢?因为我们需要将请求进行 redirect,那么必须在传输数据内容之前, + 这样可以避免造成数据的多次传输,从而提高效率。 + + Content-Length Doris 需要在发送请求时带有 'Content-Length' 这个头部信息。如果发送的内容比 + 'Content-Length' 要少,那么 Doris 认为传输出现问题,则提交此次任务失败。 + NOTE: 如果,发送的数据比 'Content-Length' 要多,那么 Doris 只读取 'Content-Length' + 长度的内容,并进行导入 + + + 参数说明: + + user: 用户如果是在default_cluster中的,user即为user_name。否则为user_name@cluster_name。 + + label: 用于指定这一批次导入的 label,用于后期进行作业查询等。 + 这个参数是必须传入的。 + + columns: 用于描述导入文件中对应的列名字。 + 如果不传入,那么认为文件中的列顺序与建表的顺序一致, + 指定的方式为逗号分隔,例如:columns=k1,k2,k3,k4 + + column_separator: 用于指定列与列之间的分隔符,默认的为'\t' + NOTE: 需要进行url编码,譬如 + 需要指定'\t'为分隔符,那么应该传入'column_separator=%09' + 需要指定'\x01'为分隔符,那么应该传入'column_separator=%01' + 需要指定','为分隔符,那么应该传入'column_separator=%2c' + + + max_filter_ratio: 用于指定允许过滤不规范数据的最大比例,默认是0,不允许过滤 + 自定义指定应该如下:'max_filter_ratio=0.2',含义是允许20%的错误率 + + timeout: 指定 load 作业的超时时间,单位是秒。当load执行时间超过该阈值时,会自动取消。默认超时时间是 86400 秒。 + 建议指定 timeout 时间小于 86400 秒。 + + hll: 用于指定数据里面和表里面的HLL列的对应关系,表中的列和数据里面指定的列 + (如果不指定columns,则数据列面的列也可以是表里面的其它非HLL列)通过","分割 + 指定多个hll列使用“:”分割,例如: 'hll1,cuid:hll2,device' + + NOTE: + 1. 此种导入方式当前是在一台机器上完成导入工作,因而不宜进行数据量较大的导入工作。 + 建议导入数据量不要超过 1 GB + + 2. 当前无法使用 `curl -T "{file1, file2}"` 这样的方式提交多个文件,因为curl是将其拆成多个 + 请求发送的,多个请求不能共用一个label号,所以无法使用 + + 3. mini load 的导入方式和 streaming 完全一致,都是在流式的完成导入后,同步的返回结果给用户。 + 后续查询虽可以查到 mini load 的信息,但不能对其进行操作,查询只为兼容旧的使用方式。 + + 4. 当使用 curl 命令行导入时,需要在 & 前加入 \ 转义,否则参数信息会丢失。 + +## example + + 1. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表(用户是defalut_cluster中的) + curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123 + + 2. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表(用户是test_cluster中的)。超时时间是 3600 秒 + curl --location-trusted -u root@test_cluster:root -T testData http://fe.host:port/api/testDb/testTbl/_load?label=123&timeout=3600 + + 3. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表, 允许20%的错误率(用户是defalut_cluster中的) + curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2 + + 4. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表, 允许20%的错误率,并且指定文件的列名(用户是defalut_cluster中的) + curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2\&columns=k1,k2,k3 + + 5. 使用streaming方式导入(用户是defalut_cluster中的) + seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_load?label=123 + + 6. 导入含有HLL列的表,可以是表中的列或者数据中的列用于生成HLL列(用户是defalut_cluster中的 + + curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2 + \&hll=hll_column1,tmp_k4:hll_column2,tmp_k5\&columns=k1,k2,k3,tmp_k4,tmp_k5 + + 7. 查看提交后的导入情况 + + curl -u root http://host:port/api/testDb/_load_info?label=123 + +## keyword + MINI, LOAD + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/MULTI LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/MULTI LOAD.md new file mode 100644 index 00000000000000..dc58fb39d75992 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/MULTI LOAD.md @@ -0,0 +1,82 @@ +# MULTI LOAD +## description + + Syntax: + curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_start?label=xxx + curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table1}/_load?label=xxx\&sub_label=yyy + curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table2}/_load?label=xxx\&sub_label=zzz + curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_commit?label=xxx + curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_desc?label=xxx + + 'MULTI LOAD'在'MINI LOAD'的基础上,可以支持用户同时向多个表进行导入,具体的命令如上面所示 + '/api/{db}/_multi_start' 开始一个多表导入任务 + '/api/{db}/{table}/_load' 向一个导入任务添加一个要导入的表,与'MINI LOAD'的主要区别是,需要传入'sub_label'参数 + '/api/{db}/_multi_commit' 提交整个多表导入任务,后台开始进行处理 + '/api/{db}/_multi_abort' 放弃一个多表导入任务 + '/api/{db}/_multi_desc' 可以展示某个多表导入任务已经提交的作业数 + + HTTP协议相关说明 + 权限认证 当前 Doris 使用http的Basic方式权限认证。所以在导入的时候需要指定用户名密码 + 这种方式是明文传递密码的,鉴于我们当前都是内网环境。。。 + + Expect Doris 需要发送过来的http请求,需要有'Expect'头部信息,内容为'100-continue' + 为什么呢?因为我们需要将请求进行redirect,那么必须在传输数据内容之前, + 这样可以避免造成数据的多次传输,从而提高效率。 + + Content-Length Doris 需要在发送请求是带有'Content-Length'这个头部信息。如果发送的内容比 + 'Content-Length'要少,那么Palo认为传输出现问题,则提交此次任务失败。 + NOTE: 如果,发送的数据比'Content-Length'要多,那么 Doris 只读取'Content-Length' + 长度的内容,并进行导入 + + 参数说明: + user: 用户如果是在default_cluster中的,user即为user_name。否则为user_name@cluster_name。 + + label: 用于指定这一批次导入的label号,用于后期进行作业状态查询等。 + 这个参数是必须传入的。 + + sub_label: 用于指定一个多表导入任务内部的子版本号。对于多表导入的load, 这个参数是必须传入的。 + + columns: 用于描述导入文件中对应的列名字。 + 如果不传入,那么认为文件中的列顺序与建表的顺序一致, + 指定的方式为逗号分隔,例如:columns=k1,k2,k3,k4 + + column_separator: 用于指定列与列之间的分隔符,默认的为'\t' + NOTE: 需要进行url编码,譬如需要指定'\t'为分隔符, + 那么应该传入'column_separator=%09' + + max_filter_ratio: 用于指定允许过滤不规范数据的最大比例,默认是0,不允许过滤 + 自定义指定应该如下:'max_filter_ratio=0.2',含义是允许20%的错误率 + 在'_multi_start'时传入有效果 + + NOTE: + 1. 此种导入方式当前是在一台机器上完成导入工作,因而不宜进行数据量较大的导入工作。 + 建议导入数据量不要超过1GB + + 2. 当前无法使用`curl -T "{file1, file2}"`这样的方式提交多个文件,因为curl是将其拆成多个 + 请求发送的,多个请求不能共用一个label号,所以无法使用 + + 3. 支持类似streaming的方式使用curl来向 Doris 中导入数据,但是,只有等这个streaming结束后 Doris + 才会发生真实的导入行为,这中方式数据量也不能过大。 + +## example + + 1. 将本地文件'testData1'中的数据导入到数据库'testDb'中'testTbl1'的表,并且 + 把'testData2'的数据导入到'testDb'中的表'testTbl2'(用户是defalut_cluster中的) + curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123 + curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1 + curl --location-trusted -u root -T testData2 http://host:port/api/testDb/testTbl2/_load?label=123\&sub_label=2 + curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_commit?label=123 + + 2. 多表导入中途放弃(用户是defalut_cluster中的) + curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123 + curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1 + curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_abort?label=123 + + 3. 多表导入查看已经提交多少内容(用户是defalut_cluster中的) + curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123 + curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1 + curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_desc?label=123 + +## keyword + MULTI, MINI, LOAD + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD.md new file mode 100644 index 00000000000000..13a371d05291b3 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD.md @@ -0,0 +1,10 @@ +# PAUSE ROUTINE LOAD +## example + +1. 暂停名称为 test1 的例行导入作业。 + + PAUSE ROUTINE LOAD FOR test1; + +## keyword + PAUSE,ROUTINE,LOAD + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/RESTORE TABLET.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/RESTORE TABLET.md new file mode 100644 index 00000000000000..1693bbc71ad6f8 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/RESTORE TABLET.md @@ -0,0 +1,15 @@ +# RESTORE TABLET +## description + + 该功能用于恢复trash目录中被误删的tablet数据。 + + 说明:这个功能暂时只在be服务中提供一个http接口。如果要使用, + 需要向要进行数据恢复的那台be机器的http端口发送restore tablet api请求。api格式如下: + METHOD: POST + URI: http://be_host:be_http_port/api/restore_tablet?tablet_id=xxx&schema_hash=xxx + +## example + + curl -X POST "http://hostname:8088/api/restore_tablet?tablet_id=123456&schema_hash=1111111" +##keyword +RESTORE,TABLET,RESTORE,TABLET diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD.md new file mode 100644 index 00000000000000..2401b627ef6e10 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD.md @@ -0,0 +1,10 @@ +# RESUME ROUTINE LOAD +## example + +1. 恢复名称为 test1 的例行导入作业。 + + RESUME ROUTINE LOAD FOR test1; + +## keyword + RESUME,ROUTINE,LOAD + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/routine_load.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md similarity index 62% rename from docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/routine_load.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md index 4fd4568eddaa47..66cd410b1eb78d 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/routine_load.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md @@ -247,197 +247,3 @@ ## keyword CREATE,ROUTINE,LOAD -# PAUSE ROUTINE LOAD - - 该语句用于暂停一个指定的例行导入作业。 - -语法: - - PAUSE ROUTINE LOAD FOR [db.]name; - -## example - -1. 暂停名称为 test1 的例行导入作业。 - - PAUSE ROUTINE LOAD FOR test1; - -## keyword - PAUSE,ROUTINE,LOAD - -# RESUME ROUTINE LOAD - - 该语句用于恢复一个被暂停的例行导入作业。 - -语法: - - RESUME ROUTINE LOAD FOR [db.]name; - -## example - -1. 恢复名称为 test1 的例行导入作业。 - - RESUME ROUTINE LOAD FOR test1; - -## keyword - RESUME,ROUTINE,LOAD - -# STOP ROUTINE LOAD - - 该语句用于停止一个被暂停的例行导入作业。 - -语法: - - STOP ROUTINE LOAD FOR [db.]name; - - 被停止的作业无法再恢复运行。 - -## example - -1. 停止名称为 test1 的例行导入作业。 - - STOP ROUTINE LOAD FOR test1; - -## keyword - STOP,ROUTINE,LOAD - -# SHOW ROUTINE LOAD - - 该语句用于展示指定名称的例行导入作业的详细信息。 - -语法: - - SHOW [ALL] ROUTINE LOAD FOR [[db.]name] - -展示结果包括如下信息: - - 1. Id:作业id。 - 2. Name:作业的名称。 - 3. CreateTime:作业创建时间。 - 4. PauseTime:作业暂停时间。 - 5. EndTime:作业结束时间。 - 6. DdName:数据库名称。 - 7. TableName:目的表名称。 - 8. State:作业状态。 - - NEED_SCHEDULE:等待被调度。 - RUNNING:运行中。 - PAUSE:暂停中。 - STOPPED:作业由用户停止。 - CANCELLED:作业因失败停止。 - - 9. DataSourceType:数据源类型。 - - KAFKA - - 10. CurrentTaskNum:当前正在运行的子任务的个数 - 11. JobProperties:作业相关配置信息,对应创建语句中的 load_properties 和 job_properties。以 json 格式表示。 - - { - "partitions": "*", // 目的表的分区,星号表示没有指定。 - "columnToColumnExpr": "k1,yyy,v1,v2,v3,v4,v5,v6,k2=`k1` + 1", - "maxBatchIntervalS": "10", - "whereExpr": "`k1` > 100", - "maxBatchSizeBytes": "104857600", - "columnSeparator": "\t", - "maxErrorNum": "0", - "currentTaskConcurrentNum": "3", // 当前例行作业的子任务并发数 - "maxBatchRows": "200000" - } - - 12. Statistic:作业运行状态的统计信息。以 json 格式表示。 - - { - "errorRows": 0, // 总的错误行数 - "loadedRows": 6584959, // 总导入的行数 - "unselectedRows": 2392, // 被 where 条件过滤的行数 - "totalRows": 6587351, // 总消费的行数,totalRows = errorRows + loadedRows + unselectedRows - "loadRowsRate": 91000, // 导入速率(rows/s) - "receivedBytes": 861626324, // 总消费的字节数 - "receivedBytesRate": 11915000, // 消费速率 (Bytes/s) - "committedTaskNum": 33, // 提交成功的子任务数 - "abortedTaskNum": 2, // 失败的子任务数 - "taskExecuteTaskMs": 72312 // 子任务执行时间,单位毫秒 - } - - 13. Progress:作业进度。以 json 格式表示。 - - 如果数据源是 Kafka,则显示每个 kafka partition,当前已经被消费的 offset。 - - { - "0": 2199288, - "1": 2194329, - "2": 2193731 - } - - OFFSET_BEGINNING: 表示用户指定了从头开始消费,并且还未开始消费。 - OFFSET_END: 表示用户指定了从末尾开始消费,并且还未开始消费。 - OFFSET_ZERO: 表示用户指定了从 0 开始消费,并且还未开始消费。 - - 14. CustomProperties: 自定义参数。 - -## example - -1. 展示名称为 test1 的所有例行导入作业(包括已停止或取消的作业)。结果为一行或多行。 - - SHOW ALL ROUTINE LOAD FOR test1; - -2. 展示名称为 test1 的当前正在运行的例行导入作业 - - SHOW ROUTINE LOAD FOR test1; - -3. 显示 example_db 下,所有的例行导入作业(包括已停止或取消的作业)。结果为一行或多行。 - - use example_db; - SHOW ALL ROUTINE LOAD; - -4. 显示 example_db 下,所有正在运行的例行导入作业 - - use example_db; - SHOW ROUTINE LOAD; - -5. 显示 example_db 下,名称为 test1 的当前正在运行的例行导入作业 - - SHOW ROUTINE LOAD FOR example_db.test1; - -6. 显示 example_db 下,名称为 test1 的所有例行导入作业(包括已停止或取消的作业)。结果为一行或多行。 - - SHOW ALL ROUTINE LOAD FOR example_db.test1; - -## keyword - SHOW,ROUTINE,LOAD - -# SHOW ROUTINE LOAD TASK - - 该语句用于展示指定例行导入作业,当前正在运行的子任务信息。 - -语法: - - SHOW ROUTINE LOAD TASK [FROM db] WHERE JobName = "name"; - -展示结果包括如下信息: - - TaskId:task id。 - TxnId:task 对应的事务id。 - JobId:作业id。 - CreateTime:任务创建时间。 - ExecuteStartTime:任务开始执行的时间。 - BeId:任务所在的 Backend id。 - DataSourceProperties: - - 任务的参数,以 json 格式展示。 - 当数据源为 Kafka 时,显示如下: - - { - "2":2193732 - } - - 表示该任务准备消费的 kafka partition 和起始 offset。 - -## example - -1. 展示名为 test1 的例行导入任务的子任务信息。 - - SHOW ROUTINE LOAD TASK WHERE JobName = "test1"; - -## keyword - SHOW,ROUTINE,LOAD,TASK diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW ALTER.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW ALTER.md new file mode 100644 index 00000000000000..54d1d51fe619f7 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW ALTER.md @@ -0,0 +1,25 @@ +# SHOW ALTER +## description + 该语句用于展示当前正在进行的各类修改任务的执行情况 + 语法: + SHOW ALTER [CLUSTER | TABLE [COLUMN | ROLLUP] [FROM db_name]]; + + 说明: + TABLE COLUMN:展示修改列的 ALTER 任务 + TABLE ROLLUP:展示创建或删除 ROLLUP index 的任务 + 如果不指定 db_name,使用当前默认 db + CLUSTER: 展示集群操作相关任务情况(仅管理员使用!待实现...) + +## example + 1. 展示默认 db 的所有修改列的任务执行情况 + SHOW ALTER TABLE COLUMN; + + 2. 展示指定 db 的创建或删除 ROLLUP index 的任务执行情况 + SHOW ALTER TABLE ROLLUP FROM example_db; + + 3. 展示集群操作相关任务(仅管理员使用!待实现...) + SHOW ALTER CLUSTER; + +## keyword + SHOW,ALTER + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP.md new file mode 100644 index 00000000000000..97ad8c193fc7ae --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP.md @@ -0,0 +1,37 @@ +# SHOW BACKUP +## description + 该语句用于查看 BACKUP 任务 + 语法: + SHOW BACKUP [FROM db_name] + + 说明: + 1. Palo 中仅保存最近一次 BACKUP 任务。 + 2. 各列含义如下: + JobId: 唯一作业id + SnapshotName: 备份的名称 + DbName: 所属数据库 + State: 当前阶段 + PENDING: 提交作业后的初始状态 + SNAPSHOTING: 执行快照中 + UPLOAD_SNAPSHOT:快照完成,准备上传 + UPLOADING: 快照上传中 + SAVE_META: 将作业元信息保存为本地文件 + UPLOAD_INFO: 上传作业元信息 + FINISHED: 作业成功 + CANCELLED: 作业失败 + BackupObjs: 备份的表和分区 + CreateTime: 任务提交时间 + SnapshotFinishedTime: 快照完成时间 + UploadFinishedTime: 快照上传完成时间 + FinishedTime: 作业结束时间 + UnfinishedTasks: 在 SNAPSHOTING 和 UPLOADING 阶段会显示还未完成的子任务id + Status: 如果作业失败,显示失败信息 + Timeout: 作业超时时间,单位秒 + +## example + 1. 查看 example_db 下最后一次 BACKUP 任务。 + SHOW BACKUP FROM example_db; + +## keyword + SHOW, BACKUP + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW DATA.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW DATA.md new file mode 100644 index 00000000000000..961758677ed0cc --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW DATA.md @@ -0,0 +1,21 @@ +# SHOW DATA +## description + 该语句用于展示数据量 + 语法: + SHOW DATA [FROM db_name[.table_name]]; + + 说明: + 1. 如果不指定 FROM 子句,使用展示当前 db 下细分到各个 table 的数据量 + 2. 如果指定 FROM 子句,则展示 table 下细分到各个 index 的数据量 + 3. 如果想查看各个 Partition 的大小,请参阅 help show partitions + +## example + 1. 展示默认 db 的各个 table 的数据量及汇总数据量 + SHOW DATA; + + 2. 展示指定 db 的下指定表的细分数据量 + SHOW DATA FROM example_db.table_name; + +## keyword + SHOW,DATA + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES.md new file mode 100644 index 00000000000000..bc46fd04ba83c2 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES.md @@ -0,0 +1,9 @@ +# SHOW DATABASES +## description + 该语句用于展示当前可见的 db + 语法: + SHOW DATABASES; + +## keyword + SHOW,DATABASES + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW DELETE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW DELETE.md new file mode 100644 index 00000000000000..89af796c75bc63 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW DELETE.md @@ -0,0 +1,13 @@ +# SHOW DELETE +## description + 该语句用于展示已执行成功的历史 delete 任务 + 语法: + SHOW DELETE [FROM db_name] + +## example + 1. 展示数据库 database 的所有历史 delete 任务 + SHOW DELETE FROM database; + +## keyword + SHOW,DELETE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT.md new file mode 100644 index 00000000000000..54de1c9b64fc06 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT.md @@ -0,0 +1,36 @@ +# SHOW EXPORT +## description + 该语句用于展示指定的导出任务的执行情况 + 语法: + SHOW EXPORT + [FROM db_name] + [ + WHERE + [EXPORT_JOB_ID = your_job_id] + [STATE = ["PENDING"|"EXPORTING"|"FINISHED"|"CANCELLED"]] + ] + [ORDER BY ...] + [LIMIT limit]; + + 说明: + 1) 如果不指定 db_name,使用当前默认db + 2) 如果指定了 STATE,则匹配 EXPORT 状态 + 3) 可以使用 ORDER BY 对任意列组合进行排序 + 4) 如果指定了 LIMIT,则显示 limit 条匹配记录。否则全部显示 + +## example + 1. 展示默认 db 的所有导出任务 + SHOW EXPORT; + + 2. 展示指定 db 的导出任务,按 StartTime 降序排序 + SHOW EXPORT FROM example_db ORDER BY StartTime DESC; + + 3. 展示指定 db 的导出任务,state 为 "exporting", 并按 StartTime 降序排序 + SHOW EXPORT FROM example_db WHERE STATE = "exporting" ORDER BY StartTime DESC; + + 4. 展示指定db,指定job_id的导出任务 + SHOW EXPORT FROM example_db WHERE EXPORT_JOB_ID = job_id; + +## keyword + SHOW,EXPORT + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW LOAD.md new file mode 100644 index 00000000000000..c4b56bd2c0340b --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW LOAD.md @@ -0,0 +1,49 @@ +# SHOW LOAD +## description + 该语句用于展示指定的导入任务的执行情况 + 语法: + SHOW LOAD + [FROM db_name] + [ + WHERE + [LABEL [ = "your_label" | LIKE "label_matcher"]] + [STATE = ["PENDING"|"ETL"|"LOADING"|"FINISHED"|"CANCELLED"|]] + ] + [ORDER BY ...] + [LIMIT limit][OFFSET offset]; + + 说明: + 1) 如果不指定 db_name,使用当前默认db + 2) 如果使用 LABEL LIKE,则会匹配导入任务的 label 包含 label_matcher 的导入任务 + 3) 如果使用 LABEL = ,则精确匹配指定的 label + 4) 如果指定了 STATE,则匹配 LOAD 状态 + 5) 可以使用 ORDER BY 对任意列组合进行排序 + 6) 如果指定了 LIMIT,则显示 limit 条匹配记录。否则全部显示 + 7) 如果指定了 OFFSET,则从偏移量offset开始显示查询结果。默认情况下偏移量为0。 + 8) 如果是使用 broker/mini load,则 URL 列中的连接可以使用以下命令查看: + + SHOW LOAD WARNINGS ON 'url' + +## example + 1. 展示默认 db 的所有导入任务 + SHOW LOAD; + + 2. 展示指定 db 的导入任务,label 中包含字符串 "2014_01_02",展示最老的10个 + SHOW LOAD FROM example_db WHERE LABEL LIKE "2014_01_02" LIMIT 10; + + 3. 展示指定 db 的导入任务,指定 label 为 "load_example_db_20140102" 并按 LoadStartTime 降序排序 + SHOW LOAD FROM example_db WHERE LABEL = "load_example_db_20140102" ORDER BY LoadStartTime DESC; + + 4. 展示指定 db 的导入任务,指定 label 为 "load_example_db_20140102" ,state 为 "loading", 并按 LoadStartTime 降序排序 + SHOW LOAD FROM example_db WHERE LABEL = "load_example_db_20140102" AND STATE = "loading" ORDER BY LoadStartTime DESC; + + 5. 展示指定 db 的导入任务 并按 LoadStartTime 降序排序,并从偏移量5开始显示10条查询结果 + SHOW LOAD FROM example_db ORDER BY LoadStartTime DESC limit 5,10; + SHOW LOAD FROM example_db ORDER BY LoadStartTime DESC limit 10 offset 5; + + 6. 小批量导入是查看导入状态的命令 + curl --location-trusted -u {user}:{passwd} http://{hostname}:{port}/api/{database}/_load_info?label={labelname} + +## keyword + SHOW,LOAD + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS.md new file mode 100644 index 00000000000000..44e959b55bbed5 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS.md @@ -0,0 +1,16 @@ +# SHOW PARTITIONS +## description + 该语句用于展示分区信息 + 语法: + SHOW PARTITIONS FROM [db_name.]table_name [PARTITION partition_name]; + +## example + 1. 展示指定 db 的下指定表的分区信息 + SHOW PARTITIONS FROM example_db.table_name; + + 1. 展示指定 db 的下指定表的指定分区的信息 + SHOW PARTITIONS FROM example_db.table_name PARTITION p1; + +## keyword + SHOW,PARTITIONS + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY.md new file mode 100644 index 00000000000000..17ca6eea5bd1e4 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY.md @@ -0,0 +1,16 @@ +# SHOW PROPERTY +## description + 该语句用于查看用户的属性 + 语法: + SHOW PROPERTY [FOR user] [LIKE key] + +## example + 1. 查看 jack 用户的属性 + SHOW PROPERTY FOR 'jack' + + 2. 查看 jack 用户导入cluster相关属性 + SHOW PROPERTY FOR 'jack' LIKE '%load_cluster%' + +## keyword + SHOW, PROPERTY + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES.md new file mode 100644 index 00000000000000..9fb7f159938d25 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES.md @@ -0,0 +1,23 @@ +# SHOW REPOSITORIES +## description + 该语句用于查看当前已创建的仓库。 + 语法: + SHOW REPOSITORIES; + + 说明: + 1. 各列含义如下: + RepoId: 唯一的仓库ID + RepoName: 仓库名称 + CreateTime: 第一次创建该仓库的时间 + IsReadOnly: 是否为只读仓库 + Location: 仓库中用于备份数据的根目录 + Broker: 依赖的 Broker + ErrMsg: Palo 会定期检查仓库的连通性,如果出现问题,这里会显示错误信息 + +## example + 1. 查看已创建的仓库: + SHOW REPOSITORIES; + +## keyword + SHOW, REPOSITORY, REPOSITORIES + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE.md new file mode 100644 index 00000000000000..1139a627977e45 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE.md @@ -0,0 +1,41 @@ +# SHOW RESTORE +## description + 该语句用于查看 RESTORE 任务 + 语法: + SHOW RESTORE [FROM db_name] + + 说明: + 1. Palo 中仅保存最近一次 RESTORE 任务。 + 2. 各列含义如下: + JobId: 唯一作业id + Label: 要恢复的备份的名称 + Timestamp: 要恢复的备份的时间版本 + DbName: 所属数据库 + State: 当前阶段 + PENDING: 提交作业后的初始状态 + SNAPSHOTING: 执行快照中 + DOWNLOAD: 快照完成,准备下载仓库中的快照 + DOWNLOADING: 快照下载中 + COMMIT: 快照下载完成,准备生效 + COMMITING: 生效中 + FINISHED: 作业成功 + CANCELLED: 作业失败 + AllowLoad: 恢复时是否允许导入(当前不支持) + ReplicationNum: 指定恢复的副本数 + RestoreJobs: 要恢复的表和分区 + CreateTime: 任务提交时间 + MetaPreparedTime: 元数据准备完成时间 + SnapshotFinishedTime: 快照完成时间 + DownloadFinishedTime: 快照下载完成时间 + FinishedTime: 作业结束时间 + UnfinishedTasks: 在 SNAPSHOTING、DOWNLOADING 和 COMMITING 阶段会显示还未完成的子任务id + Status: 如果作业失败,显示失败信息 + Timeout: 作业超时时间,单位秒 + +## example + 1. 查看 example_db 下最近一次 RESTORE 任务。 + SHOW RESTORE FROM example_db; + +## keyword + SHOW, RESTORE + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK.md new file mode 100644 index 00000000000000..d01e3bb3c5b1af --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK.md @@ -0,0 +1,9 @@ +# SHOW ROUTINE LOAD TASK +## example + +1. 展示名为 test1 的例行导入任务的子任务信息。 + + SHOW ROUTINE LOAD TASK WHERE JobName = "test1"; + +## keyword + SHOW,ROUTINE,LOAD,TASK diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD.md new file mode 100644 index 00000000000000..f453e6c2ca5b96 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD.md @@ -0,0 +1,32 @@ +# SHOW ROUTINE LOAD +## example + +1. 展示名称为 test1 的所有例行导入作业(包括已停止或取消的作业)。结果为一行或多行。 + + SHOW ALL ROUTINE LOAD FOR test1; + +2. 展示名称为 test1 的当前正在运行的例行导入作业 + + SHOW ROUTINE LOAD FOR test1; + +3. 显示 example_db 下,所有的例行导入作业(包括已停止或取消的作业)。结果为一行或多行。 + + use example_db; + SHOW ALL ROUTINE LOAD; + +4. 显示 example_db 下,所有正在运行的例行导入作业 + + use example_db; + SHOW ROUTINE LOAD; + +5. 显示 example_db 下,名称为 test1 的当前正在运行的例行导入作业 + + SHOW ROUTINE LOAD FOR example_db.test1; + +6. 显示 example_db 下,名称为 test1 的所有例行导入作业(包括已停止或取消的作业)。结果为一行或多行。 + + SHOW ALL ROUTINE LOAD FOR example_db.test1; + +## keyword + SHOW,ROUTINE,LOAD + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT.md new file mode 100644 index 00000000000000..6758e865c32839 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT.md @@ -0,0 +1,31 @@ +# SHOW SNAPSHOT +## description + 该语句用于查看仓库中已存在的备份。 + 语法: + SHOW SNAPSHOT ON `repo_name` + [WHERE SNAPSHOT = "snapshot" [AND TIMESTAMP = "backup_timestamp"]]; + + 说明: + 1. 各列含义如下: + Snapshot: 备份的名称 + Timestamp: 对应备份的时间版本 + Status: 如果备份正常,则显示 OK,否则显示错误信息 + + 2. 如果指定了 TIMESTAMP,则会额外显示如下信息: + Database: 备份数据原属的数据库名称 + Details: 以 Json 的形式,展示整个备份的数据目录及文件结构 + +## example + 1. 查看仓库 example_repo 中已有的备份: + SHOW SNAPSHOT ON example_repo; + + 2. 仅查看仓库 example_repo 中名称为 backup1 的备份: + SHOW SNAPSHOT ON example_repo WHERE SNAPSHOT = "backup1"; + + 2. 查看仓库 example_repo 中名称为 backup1 的备份,时间版本为 "2018-05-05-15-34-26" 的详细信息: + SHOW SNAPSHOT ON example_repo + WHERE SNAPSHOT = "backup1" AND TIMESTAMP = "2018-05-05-15-34-26"; + +## keyword + SHOW, SNAPSHOT + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW TABLES.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW TABLES.md new file mode 100644 index 00000000000000..b2670efd724338 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW TABLES.md @@ -0,0 +1,9 @@ +# SHOW TABLES +## description + 该语句用于展示当前 db 下所有的 table + 语法: + SHOW TABLES; + +## keyword + SHOW,TABLES + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW TABLET.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW TABLET.md new file mode 100644 index 00000000000000..29452c5e34d901 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/SHOW TABLET.md @@ -0,0 +1,17 @@ +# SHOW TABLET +## description + 该语句用于显示 tablet 相关的信息(仅管理员使用) + 语法: + SHOW TABLET + [FROM [db_name.]table_name | tablet_id] + +## example + 1. 显示指定 db 的下指定表所有 tablet 信息 + SHOW TABLET FROM example_db.table_name; + + 2. 显示指定 tablet id 为 10000 的 tablet 的父层级 id 信息 + SHOW TABLET 10000; + +## keyword + SHOW,TABLET + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD.md new file mode 100644 index 00000000000000..3e7cd6f7ca0f17 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD.md @@ -0,0 +1,10 @@ +# STOP ROUTINE LOAD +## example + +1. 停止名称为 test1 的例行导入作业。 + + STOP ROUTINE LOAD FOR test1; + +## keyword + STOP,ROUTINE,LOAD + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/streaming.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md similarity index 78% rename from docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/streaming.md rename to docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md index 3a279e2d0c94ac..e8298dae3c063f 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/streaming.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md @@ -90,69 +90,3 @@ ## keyword STREAM,LOAD -# GET LABEL STATE -## description - NAME: - get_label_state: get label's state - - SYNOPSIS - curl -u user:passwd http://host:port/api/{db}/{label}/_state - - DESCRIPTION - 该命令用于查看一个Label对应的事务状态 - - RETURN VALUES - 执行完毕后,会以Json格式返回这次导入的相关内容。当前包括一下字段 - Label:本次导入的 label,如果没有指定,则为一个 uuid。 - Status:此命令是否成功执行,Success表示成功执行 - Message: 具体的执行信息 - State: 只有在Status为Success时才有意义 - UNKNOWN: 没有找到对应的Label - PREPARE: 对应的事务已经prepare,但尚未提交 - COMMITTED: 事务已经提交,不能被cancel - VISIBLE: 事务提交,并且数据可见,不能被cancel - ABORTED: 事务已经被ROLLBACK,导入已经失败。 - - ERRORS - -## example - - 1. 获得testDb, testLabel的状态 - curl -u root http://host:port/api/testDb/testLabel/_state - -## keyword - GET, LABEL, STATE - -# CANCEL LABEL -## description - NAME: - cancel_label: cancel a transaction with label - - SYNOPSIS - curl -u user:passwd -XPOST http://host:port/api/{db}/{label}/_cancel - - DESCRIPTION - 该命令用于cancel一个指定Label对应的事务,事务在Prepare阶段能够被成功cancel - - RETURN VALUES - 执行完成后,会以Json格式返回这次导入的相关内容。当前包括一下字段 - Status: 是否成功cancel - Success: 成功cancel事务 - 其他: cancel失败 - Message: 具体的失败信息 - - ERRORS - -## example - - 1. cancel testDb, testLabel的作业 - curl -u root -XPOST http://host:port/api/testDb/testLabel/_cancel - -## keyword - CANCEL,LABEL - - - - - - diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md index d571410ebf459a..91ed85227e019f 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md @@ -1,6 +1,7 @@ # INSERT +## description -## Syntax + Syntax ``` INSERT INTO table_name @@ -10,9 +11,7 @@ INSERT INTO table_name { VALUES ( { expression | DEFAULT } [, ...] ) [, ...] | query } ``` -## description - -## Parameters + Parameters > tablet_name: 导入数据的目的表。可以是 `db_name.table_name` 形式 > @@ -29,11 +28,11 @@ INSERT INTO table_name > hint: 用于指示 `INSERT` 执行行为的一些指示符。`streaming` 和 默认的非 `streaming` 方式均会使用同步方式完成 `INSERT` 语句执行 > 非 `streaming` 方式在执行完成后会返回一个 label 方便用户通过 `SHOW LOAD` 查询导入的状态 -## Note + Note 当前执行 `INSERT` 语句时,对于有不符合目标表格式的数据,默认的行为是过滤,比如字符串超长等。但是对于有要求数据不能够被过滤的业务场景,可以通过设置会话变量 `enable_insert_strict` 为 `true` 来确保当有数据被过滤掉的时候,`INSERT` 不会被执行成功。 -## Examples +## example `test` 表包含两个列`c1`, `c2`。 @@ -78,3 +77,5 @@ INSERT INTO test (c1, c2) SELECT * from test2 异步的导入其实是,一个同步的导入封装成了异步。填写 streaming 和不填写的*执行效率是一样*的。 由于Doris之前的导入方式都是异步导入方式,为了兼容旧有的使用习惯,不加 streaming 的 `INSERT` 语句依旧会返回一个 label,用户需要通过`SHOW LOAD`命令查看此`label`导入作业的状态。 +##keyword +INSERT,INSERT diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/manipulation_stmt.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/manipulation_stmt.md deleted file mode 100644 index cc55e5f25602a9..00000000000000 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/manipulation_stmt.md +++ /dev/null @@ -1,950 +0,0 @@ -# LOAD -## description - - Palo 目前支持以下4种导入方式: - - 1. Hadoop Load:基于 MR 进行 ETL 的导入。 - 2. Broker Load:使用 broker 进行进行数据导入。 - 3. Mini Load:通过 http 协议上传文件进行批量数据导入。 - 4. Stream Load:通过 http 协议进行流式数据导入。 - - 本帮助主要描述第一种导入方式,即 Hadoop Load 相关帮助信息。其余导入方式可以使用以下命令查看帮助: - - !!!该导入方式可能在后续某个版本即不再支持,建议使用其他导入方式进行数据导入。!!! - - 1. help broker load; - 2. help mini load; - 3. help stream load; - - Hadoop Load 仅适用于百度内部环境。公有云、私有云以及开源环境无法使用这种导入方式。 - 该导入方式必须设置用于 ETL 的 Hadoop 计算队列,设置方式可以通过 help set property 命令查看帮助。 - - Stream load 暂时只支持百度内部用户使用。开源社区和公有云用户将在后续版本更新中支持。 - -语法: - - LOAD LABEL load_label - ( - data_desc1[, data_desc2, ...] - ) - [opt_properties]; - - 1. load_label - - 当前导入批次的标签。在一个 database 内唯一。 - 语法: - [database_name.]your_label - - 2. data_desc - - 用于描述一批导入数据。 - 语法: - DATA INFILE - ( - "file_path1"[, file_path2, ...] - ) - [NEGATIVE] - INTO TABLE `table_name` - [PARTITION (p1, p2)] - [COLUMNS TERMINATED BY "column_separator"] - [FORMAT AS "file_type"] - [(column_list)] - [SET (k1 = func(k2))] - - 说明: - file_path: - - 文件路径,可以指定到一个文件,也可以用 * 通配符指定某个目录下的所有文件。通配符必须匹配到文件,而不能是目录。 - - PARTITION: - - 如果指定此参数,则只会导入指定的分区,导入分区以外的数据会被过滤掉。 - 如果不指定,默认导入table的所有分区。 - - NEGATIVE: - 如果指定此参数,则相当于导入一批“负”数据。用于抵消之前导入的同一批数据。 - 该参数仅适用于存在 value 列,并且 value 列的聚合类型仅为 SUM 的情况。 - - column_separator: - - 用于指定导入文件中的列分隔符。默认为 \t - 如果是不可见字符,则需要加\\x作为前缀,使用十六进制来表示分隔符。 - 如hive文件的分隔符\x01,指定为"\\x01" - - file_type: - - 用于指定导入文件的类型,例如:parquet、csv。默认值通过文件后缀名判断。 - - column_list: - - 用于指定导入文件中的列和 table 中的列的对应关系。 - 当需要跳过导入文件中的某一列时,将该列指定为 table 中不存在的列名即可。 - 语法: - (col_name1, col_name2, ...) - - SET: - - 如果指定此参数,可以将源文件某一列按照函数进行转化,然后将转化后的结果导入到table中。 - 目前支持的函数有: - - strftime(fmt, column) 日期转换函数 - fmt: 日期格式,形如%Y%m%d%H%M%S (年月日时分秒) - column: column_list中的列,即输入文件中的列。存储内容应为数字型的时间戳。 - 如果没有column_list,则按照palo表的列顺序默认输入文件的列。 - - time_format(output_fmt, input_fmt, column) 日期格式转化 - output_fmt: 转化后的日期格式,形如%Y%m%d%H%M%S (年月日时分秒) - input_fmt: 转化前column列的日期格式,形如%Y%m%d%H%M%S (年月日时分秒) - column: column_list中的列,即输入文件中的列。存储内容应为input_fmt格式的日期字符串。 - 如果没有column_list,则按照palo表的列顺序默认输入文件的列。 - - alignment_timestamp(precision, column) 将时间戳对齐到指定精度 - precision: year|month|day|hour - column: column_list中的列,即输入文件中的列。存储内容应为数字型的时间戳。 - 如果没有column_list,则按照palo表的列顺序默认输入文件的列。 - 注意:对齐精度为year、month的时候,只支持20050101~20191231范围内的时间戳。 - - default_value(value) 设置某一列导入的默认值 - 不指定则使用建表时列的默认值 - - md5sum(column1, column2, ...) 将指定的导入列的值求md5sum,返回32位16进制字符串 - - replace_value(old_value[, new_value]) 将导入文件中指定的old_value替换为new_value - new_value如不指定则使用建表时列的默认值 - - hll_hash(column) 用于将表或数据里面的某一列转化成HLL列的数据结构 - - 3. opt_properties - - 用于指定一些特殊参数。 - 语法: - [PROPERTIES ("key"="value", ...)] - - 可以指定如下参数: - cluster: 导入所使用的 Hadoop 计算队列。 - timeout: 指定导入操作的超时时间。默认超时为3天。单位秒。 - max_filter_ratio:最大容忍可过滤(数据不规范等原因)的数据比例。默认零容忍。 - load_delete_flag:指定该导入是否通过导入key列的方式删除数据,仅适用于UNIQUE KEY, - 导入时可不指定value列。默认为false。 - - 5. 导入数据格式样例 - - 整型类(TINYINT/SMALLINT/INT/BIGINT/LARGEINT):1, 1000, 1234 - 浮点类(FLOAT/DOUBLE/DECIMAL):1.1, 0.23, .356 - 日期类(DATE/DATETIME):2017-10-03, 2017-06-13 12:34:03。 - (注:如果是其他日期格式,可以在导入命令中,使用 strftime 或者 time_format 函数进行转换) - 字符串类(CHAR/VARCHAR):"I am a student", "a" - NULL值:\N - -## example - - 1. 导入一批数据,指定超时时间和过滤比例。指定导入队列为 my_cluster。 - - LOAD LABEL example_db.label1 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - ) - PROPERTIES - ( - "cluster" = "my_cluster", - "timeout" = "3600", - "max_filter_ratio" = "0.1" - ); - - 其中 hdfs_host 为 namenode 的 host,hdfs_port 为 fs.defaultFS 端口(默认9000) - - 2. 导入一批数据,包含多个文件。导入不同的 table,指定分隔符,指定列对应关系 - - LOAD LABEL example_db.label2 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file1") - INTO TABLE `my_table_1` - COLUMNS TERMINATED BY "," - (k1, k3, k2, v1, v2), - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file2") - INTO TABLE `my_table_2` - COLUMNS TERMINATED BY "\t" - (k1, k2, k3, v2, v1) - ); - - 3. 导入一批数据,指定hive的默认分隔符\x01,并使用通配符*指定目录下的所有文件 - - LOAD LABEL example_db.label3 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/*") - NEGATIVE - INTO TABLE `my_table` - COLUMNS TERMINATED BY "\\x01" - ); - - 4. 导入一批“负”数据 - - LOAD LABEL example_db.label4 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/old_file) - NEGATIVE - INTO TABLE `my_table` - COLUMNS TERMINATED BY "\t" - ); - - 5. 导入一批数据,指定分区 - - LOAD LABEL example_db.label5 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - (k1, k3, k2, v1, v2) - ); - - 6. 导入一批数据,指定分区, 并对导入文件的列做一些转化,如下: - 表结构为: - k1 datetime - k2 date - k3 bigint - k4 varchar(20) - k5 varchar(64) - k6 int - - 假设数据文件只有一行数据,5列,逗号分隔: - - 1537002087,2018-08-09 11:12:13,1537002087,-,1 - - 数据文件中各列,对应导入语句中指定的各列: - tmp_k1, tmp_k2, tmp_k3, k6, v1 - - 转换如下: - - 1) k1:将 tmp_k1 时间戳列转化为 datetime 类型的数据 - 2) k2:将 tmp_k2 datetime 类型的数据转化为 date 的数据 - 3) k3:将 tmp_k3 时间戳列转化为天级别时间戳 - 4) k4:指定导入默认值为1 - 5) k5:将 tmp_k1、tmp_k2、tmp_k3 列计算 md5 值 - 6) k6:将导入文件中的 - 值替换为 10 - - LOAD LABEL example_db.label6 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - (tmp_k1, tmp_k2, tmp_k3, k6, v1) - SET ( - k1 = strftime("%Y-%m-%d %H:%M:%S", tmp_k1), - k2 = time_format("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", tmp_k2), - k3 = alignment_timestamp("day", tmp_k3), - k4 = default_value("1"), - k5 = md5sum(tmp_k1, tmp_k2, tmp_k3), - k6 = replace_value("-", "10") - ) - ); - - 7. 导入数据到含有HLL列的表,可以是表中的列或者数据里面的列 - - LOAD LABEL example_db.label7 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - SET ( - v1 = hll_hash(k1), - v2 = hll_hash(k2) - ) - ); - - LOAD LABEL example_db.label8 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - (k1, k2, tmp_k3, tmp_k4, v1, v2) - SET ( - v1 = hll_hash(tmp_k3), - v2 = hll_hash(tmp_k4) - ) - ) - WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); - - 8. 导入Parquet文件中数据 指定FORMAT 为parquet, 默认是通过文件后缀判断 - LOAD LABEL example_db.label9 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - FORMAT AS "parquet" - (k1, k2, k3) - ) - WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); - -## keyword - LOAD - -# CANCEL LOAD -## description - - 该语句用于撤销指定 load label 的批次的导入作业。 - 这是一个异步操作,任务提交成功则返回。执行后可使用 SHOW LOAD 命令查看进度。 - 语法: - CANCEL LOAD - [FROM db_name] - WHERE LABEL = "load_label"; - -## example - - 1. 撤销数据库 example_db 上, label 为 example_db_test_load_label 的导入作业 - CANCEL LOAD - FROM example_db - WHERE LABEL = "example_db_test_load_label"; - -## keyword - CANCEL,LOAD - -# DELETE -## description - - 该语句用于按条件删除指定 table(base index) partition 中的数据。 - 该操作会同时删除和此 base index 相关的 rollup index 的数据。 - 语法: - DELETE FROM table_name [PARTITION partition_name] - WHERE - column_name1 op value[ AND column_name2 op value ...]; - - 说明: - 1) op 的可选类型包括:=, >, <, >=, <=, != - 2) 只能指定 key 列上的条件。 - 2) 当选定的 key 列不存在于某个 rollup 中时,无法进行 delete。 - 3) 条件之间只能是“与”的关系。 - 若希望达成“或”的关系,需要将条件分写在两个 DELETE 语句中。 - 4) 如果为RANGE分区表,则必须指定 PARTITION。如果是单分区表,可以不指定。 - - 注意: - 该语句可能会降低执行后一段时间内的查询效率。 - 影响程度取决于语句中指定的删除条件的数量。 - 指定的条件越多,影响越大。 - -## example - - 1. 删除 my_table partition p1 中 k1 列值为 3 的数据行 - DELETE FROM my_table PARTITION p1 - WHERE k1 = 3; - - 2. 删除 my_table partition p1 中 k1 列值大于等于 3 且 k2 列值为 "abc" 的数据行 - DELETE FROM my_table PARTITION p1 - WHERE k1 >= 3 AND k2 = "abc"; - -## keyword - DELETE - -# CANCEL DELETE -## description - - 该语句用于撤销一个 DELETE 操作。(仅管理员使用!)(待实现) - -## example - -## keyword - CANCEL,DELETE - -# MINI LOAD -## description - - MINI LOAD 和 STREAM LOAD 的导入实现方式完全一致。在导入功能支持上,MINI LOAD 的功能是 STREAM LOAD 的子集。 - 后续的导入新功能只会在 STREAM LOAD 中支持,MINI LOAD 将不再新增功能。建议改用 STREAM LOAD,具体使用方式请 HELP STREAM LOAD。 - - MINI LOAD 是 通过 http 协议完成的导入方式。用户可以不依赖 Hadoop,也无需通过 Mysql 客户端,即可完成导入。 - 用户通过 http 协议描述导入,数据在接受 http 请求的过程中被流式的导入 Doris , **导入作业完成后** 返回给用户导入的结果。 - - * 注:为兼容旧版本 mini load 使用习惯,用户依旧可以通过 'SHOW LOAD' 命令来查看导入结果。 - - 语法: - 导入: - - curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table}/_load?label=xxx - - 查看导入信息 - - curl -u user:passwd http://host:port/api/{db}/_load_info?label=xxx - - HTTP协议相关说明 - - 权限认证 当前 Doris 使用 http 的 Basic 方式权限认证。所以在导入的时候需要指定用户名密码 - 这种方式是明文传递密码的,暂不支持加密传输。 - - Expect Doris 需要发送过来的 http 请求带有 'Expect' 头部信息,内容为 '100-continue'。 - 为什么呢?因为我们需要将请求进行 redirect,那么必须在传输数据内容之前, - 这样可以避免造成数据的多次传输,从而提高效率。 - - Content-Length Doris 需要在发送请求时带有 'Content-Length' 这个头部信息。如果发送的内容比 - 'Content-Length' 要少,那么 Doris 认为传输出现问题,则提交此次任务失败。 - NOTE: 如果,发送的数据比 'Content-Length' 要多,那么 Doris 只读取 'Content-Length' - 长度的内容,并进行导入 - - - 参数说明: - - user: 用户如果是在default_cluster中的,user即为user_name。否则为user_name@cluster_name。 - - label: 用于指定这一批次导入的 label,用于后期进行作业查询等。 - 这个参数是必须传入的。 - - columns: 用于描述导入文件中对应的列名字。 - 如果不传入,那么认为文件中的列顺序与建表的顺序一致, - 指定的方式为逗号分隔,例如:columns=k1,k2,k3,k4 - - column_separator: 用于指定列与列之间的分隔符,默认的为'\t' - NOTE: 需要进行url编码,譬如 - 需要指定'\t'为分隔符,那么应该传入'column_separator=%09' - 需要指定'\x01'为分隔符,那么应该传入'column_separator=%01' - 需要指定','为分隔符,那么应该传入'column_separator=%2c' - - - max_filter_ratio: 用于指定允许过滤不规范数据的最大比例,默认是0,不允许过滤 - 自定义指定应该如下:'max_filter_ratio=0.2',含义是允许20%的错误率 - - timeout: 指定 load 作业的超时时间,单位是秒。当load执行时间超过该阈值时,会自动取消。默认超时时间是 86400 秒。 - 建议指定 timeout 时间小于 86400 秒。 - - hll: 用于指定数据里面和表里面的HLL列的对应关系,表中的列和数据里面指定的列 - (如果不指定columns,则数据列面的列也可以是表里面的其它非HLL列)通过","分割 - 指定多个hll列使用“:”分割,例如: 'hll1,cuid:hll2,device' - - NOTE: - 1. 此种导入方式当前是在一台机器上完成导入工作,因而不宜进行数据量较大的导入工作。 - 建议导入数据量不要超过 1 GB - - 2. 当前无法使用 `curl -T "{file1, file2}"` 这样的方式提交多个文件,因为curl是将其拆成多个 - 请求发送的,多个请求不能共用一个label号,所以无法使用 - - 3. mini load 的导入方式和 streaming 完全一致,都是在流式的完成导入后,同步的返回结果给用户。 - 后续查询虽可以查到 mini load 的信息,但不能对其进行操作,查询只为兼容旧的使用方式。 - - 4. 当使用 curl 命令行导入时,需要在 & 前加入 \ 转义,否则参数信息会丢失。 - -## example - - 1. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表(用户是defalut_cluster中的) - curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123 - - 2. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表(用户是test_cluster中的)。超时时间是 3600 秒 - curl --location-trusted -u root@test_cluster:root -T testData http://fe.host:port/api/testDb/testTbl/_load?label=123&timeout=3600 - - 3. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表, 允许20%的错误率(用户是defalut_cluster中的) - curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2 - - 4. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表, 允许20%的错误率,并且指定文件的列名(用户是defalut_cluster中的) - curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2\&columns=k1,k2,k3 - - 5. 使用streaming方式导入(用户是defalut_cluster中的) - seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_load?label=123 - - 6. 导入含有HLL列的表,可以是表中的列或者数据中的列用于生成HLL列(用户是defalut_cluster中的 - - curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2 - \&hll=hll_column1,tmp_k4:hll_column2,tmp_k5\&columns=k1,k2,k3,tmp_k4,tmp_k5 - - 7. 查看提交后的导入情况 - - curl -u root http://host:port/api/testDb/_load_info?label=123 - -## keyword - MINI, LOAD - -# MULTI LOAD -## description - - Syntax: - curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_start?label=xxx - curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table1}/_load?label=xxx\&sub_label=yyy - curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table2}/_load?label=xxx\&sub_label=zzz - curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_commit?label=xxx - curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_desc?label=xxx - - 'MULTI LOAD'在'MINI LOAD'的基础上,可以支持用户同时向多个表进行导入,具体的命令如上面所示 - '/api/{db}/_multi_start' 开始一个多表导入任务 - '/api/{db}/{table}/_load' 向一个导入任务添加一个要导入的表,与'MINI LOAD'的主要区别是,需要传入'sub_label'参数 - '/api/{db}/_multi_commit' 提交整个多表导入任务,后台开始进行处理 - '/api/{db}/_multi_abort' 放弃一个多表导入任务 - '/api/{db}/_multi_desc' 可以展示某个多表导入任务已经提交的作业数 - - HTTP协议相关说明 - 权限认证 当前 Doris 使用http的Basic方式权限认证。所以在导入的时候需要指定用户名密码 - 这种方式是明文传递密码的,鉴于我们当前都是内网环境。。。 - - Expect Doris 需要发送过来的http请求,需要有'Expect'头部信息,内容为'100-continue' - 为什么呢?因为我们需要将请求进行redirect,那么必须在传输数据内容之前, - 这样可以避免造成数据的多次传输,从而提高效率。 - - Content-Length Doris 需要在发送请求是带有'Content-Length'这个头部信息。如果发送的内容比 - 'Content-Length'要少,那么Palo认为传输出现问题,则提交此次任务失败。 - NOTE: 如果,发送的数据比'Content-Length'要多,那么 Doris 只读取'Content-Length' - 长度的内容,并进行导入 - - 参数说明: - user: 用户如果是在default_cluster中的,user即为user_name。否则为user_name@cluster_name。 - - label: 用于指定这一批次导入的label号,用于后期进行作业状态查询等。 - 这个参数是必须传入的。 - - sub_label: 用于指定一个多表导入任务内部的子版本号。对于多表导入的load, 这个参数是必须传入的。 - - columns: 用于描述导入文件中对应的列名字。 - 如果不传入,那么认为文件中的列顺序与建表的顺序一致, - 指定的方式为逗号分隔,例如:columns=k1,k2,k3,k4 - - column_separator: 用于指定列与列之间的分隔符,默认的为'\t' - NOTE: 需要进行url编码,譬如需要指定'\t'为分隔符, - 那么应该传入'column_separator=%09' - - max_filter_ratio: 用于指定允许过滤不规范数据的最大比例,默认是0,不允许过滤 - 自定义指定应该如下:'max_filter_ratio=0.2',含义是允许20%的错误率 - 在'_multi_start'时传入有效果 - - NOTE: - 1. 此种导入方式当前是在一台机器上完成导入工作,因而不宜进行数据量较大的导入工作。 - 建议导入数据量不要超过1GB - - 2. 当前无法使用`curl -T "{file1, file2}"`这样的方式提交多个文件,因为curl是将其拆成多个 - 请求发送的,多个请求不能共用一个label号,所以无法使用 - - 3. 支持类似streaming的方式使用curl来向 Doris 中导入数据,但是,只有等这个streaming结束后 Doris - 才会发生真实的导入行为,这中方式数据量也不能过大。 - -## example - - 1. 将本地文件'testData1'中的数据导入到数据库'testDb'中'testTbl1'的表,并且 - 把'testData2'的数据导入到'testDb'中的表'testTbl2'(用户是defalut_cluster中的) - curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123 - curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1 - curl --location-trusted -u root -T testData2 http://host:port/api/testDb/testTbl2/_load?label=123\&sub_label=2 - curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_commit?label=123 - - 2. 多表导入中途放弃(用户是defalut_cluster中的) - curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123 - curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1 - curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_abort?label=123 - - 3. 多表导入查看已经提交多少内容(用户是defalut_cluster中的) - curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123 - curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1 - curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_desc?label=123 - -## keyword - MULTI, MINI, LOAD - -# EXPORT -## description - - 该语句用于将指定表的数据导出到指定位置。 - 该功能通过 broker 进程实现。对于不同的目的存储系统,需要部署不同的 broker。可以通过 SHOW BROKER 查看已部署的 broker。 - 这是一个异步操作,任务提交成功则返回。执行后可使用 SHOW EXPORT 命令查看进度。 - - 语法: - EXPORT TABLE table_name - [PARTITION (p1[,p2])] - TO export_path - [opt_properties] - broker; - - 1. table_name - 当前要导出的表的表名,目前支持engine为olap和mysql的表的导出。 - - 2. partition - 可以只导出指定表的某些指定分区 - - 3. export_path - 导出的路径,需为目录。目前不能导出到本地,需要导出到broker。 - - 4. opt_properties - 用于指定一些特殊参数。 - 语法: - [PROPERTIES ("key"="value", ...)] - - 可以指定如下参数: - column_separator: 指定导出的列分隔符,默认为\t。 - line_delimiter: 指定导出的行分隔符,默认为\n。 - exec_mem_limit: 导出在单个 BE 节点的内存使用上限,默认为 2GB,单位为字节。 - timeout:导入作业的超时时间,默认为1天,单位是秒。 - tablet_num_per_task:每个子任务能分配的最大 Tablet 数量。 - - 5. broker - 用于指定导出使用的broker - 语法: - WITH BROKER broker_name ("key"="value"[,...]) - 这里需要指定具体的broker name, 以及所需的broker属性 - - 对于不同存储系统对应的 broker,这里需要输入的参数不同。具体参数可以参阅:`help broker load` 中 broker 所需属性。 - -## example - - 1. 将 testTbl 表中的所有数据导出到 hdfs 上 - EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); - - 2. 将 testTbl 表中的分区p1,p2导出到 hdfs 上 - - EXPORT TABLE testTbl PARTITION (p1,p2) TO "hdfs://hdfs_host:port/a/b/c" WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); - 3. 将 testTbl 表中的所有数据导出到 hdfs 上,以","作为列分隔符 - - EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("column_separator"=",") WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); - -## keyword - EXPORT - -# SHOW DATABASES -## description - 该语句用于展示当前可见的 db - 语法: - SHOW DATABASES; - -## keyword - SHOW,DATABASES - -# SHOW TABLES -## description - 该语句用于展示当前 db 下所有的 table - 语法: - SHOW TABLES; - -## keyword - SHOW,TABLES - -# SHOW LOAD -## description - 该语句用于展示指定的导入任务的执行情况 - 语法: - SHOW LOAD - [FROM db_name] - [ - WHERE - [LABEL [ = "your_label" | LIKE "label_matcher"]] - [STATE = ["PENDING"|"ETL"|"LOADING"|"FINISHED"|"CANCELLED"|]] - ] - [ORDER BY ...] - [LIMIT limit][OFFSET offset]; - - 说明: - 1) 如果不指定 db_name,使用当前默认db - 2) 如果使用 LABEL LIKE,则会匹配导入任务的 label 包含 label_matcher 的导入任务 - 3) 如果使用 LABEL = ,则精确匹配指定的 label - 4) 如果指定了 STATE,则匹配 LOAD 状态 - 5) 可以使用 ORDER BY 对任意列组合进行排序 - 6) 如果指定了 LIMIT,则显示 limit 条匹配记录。否则全部显示 - 7) 如果指定了 OFFSET,则从偏移量offset开始显示查询结果。默认情况下偏移量为0。 - 8) 如果是使用 broker/mini load,则 URL 列中的连接可以使用以下命令查看: - - SHOW LOAD WARNINGS ON 'url' - -## example - 1. 展示默认 db 的所有导入任务 - SHOW LOAD; - - 2. 展示指定 db 的导入任务,label 中包含字符串 "2014_01_02",展示最老的10个 - SHOW LOAD FROM example_db WHERE LABEL LIKE "2014_01_02" LIMIT 10; - - 3. 展示指定 db 的导入任务,指定 label 为 "load_example_db_20140102" 并按 LoadStartTime 降序排序 - SHOW LOAD FROM example_db WHERE LABEL = "load_example_db_20140102" ORDER BY LoadStartTime DESC; - - 4. 展示指定 db 的导入任务,指定 label 为 "load_example_db_20140102" ,state 为 "loading", 并按 LoadStartTime 降序排序 - SHOW LOAD FROM example_db WHERE LABEL = "load_example_db_20140102" AND STATE = "loading" ORDER BY LoadStartTime DESC; - - 5. 展示指定 db 的导入任务 并按 LoadStartTime 降序排序,并从偏移量5开始显示10条查询结果 - SHOW LOAD FROM example_db ORDER BY LoadStartTime DESC limit 5,10; - SHOW LOAD FROM example_db ORDER BY LoadStartTime DESC limit 10 offset 5; - - 6. 小批量导入是查看导入状态的命令 - curl --location-trusted -u {user}:{passwd} http://{hostname}:{port}/api/{database}/_load_info?label={labelname} - -## keyword - SHOW,LOAD - -# SHOW EXPORT -## description - 该语句用于展示指定的导出任务的执行情况 - 语法: - SHOW EXPORT - [FROM db_name] - [ - WHERE - [EXPORT_JOB_ID = your_job_id] - [STATE = ["PENDING"|"EXPORTING"|"FINISHED"|"CANCELLED"]] - ] - [ORDER BY ...] - [LIMIT limit]; - - 说明: - 1) 如果不指定 db_name,使用当前默认db - 2) 如果指定了 STATE,则匹配 EXPORT 状态 - 3) 可以使用 ORDER BY 对任意列组合进行排序 - 4) 如果指定了 LIMIT,则显示 limit 条匹配记录。否则全部显示 - -## example - 1. 展示默认 db 的所有导出任务 - SHOW EXPORT; - - 2. 展示指定 db 的导出任务,按 StartTime 降序排序 - SHOW EXPORT FROM example_db ORDER BY StartTime DESC; - - 3. 展示指定 db 的导出任务,state 为 "exporting", 并按 StartTime 降序排序 - SHOW EXPORT FROM example_db WHERE STATE = "exporting" ORDER BY StartTime DESC; - - 4. 展示指定db,指定job_id的导出任务 - SHOW EXPORT FROM example_db WHERE EXPORT_JOB_ID = job_id; - -## keyword - SHOW,EXPORT - -# SHOW DELETE -## description - 该语句用于展示已执行成功的历史 delete 任务 - 语法: - SHOW DELETE [FROM db_name] - -## example - 1. 展示数据库 database 的所有历史 delete 任务 - SHOW DELETE FROM database; - -## keyword - SHOW,DELETE - -# SHOW ALTER -## description - 该语句用于展示当前正在进行的各类修改任务的执行情况 - 语法: - SHOW ALTER [CLUSTER | TABLE [COLUMN | ROLLUP] [FROM db_name]]; - - 说明: - TABLE COLUMN:展示修改列的 ALTER 任务 - TABLE ROLLUP:展示创建或删除 ROLLUP index 的任务 - 如果不指定 db_name,使用当前默认 db - CLUSTER: 展示集群操作相关任务情况(仅管理员使用!待实现...) - -## example - 1. 展示默认 db 的所有修改列的任务执行情况 - SHOW ALTER TABLE COLUMN; - - 2. 展示指定 db 的创建或删除 ROLLUP index 的任务执行情况 - SHOW ALTER TABLE ROLLUP FROM example_db; - - 3. 展示集群操作相关任务(仅管理员使用!待实现...) - SHOW ALTER CLUSTER; - -## keyword - SHOW,ALTER - -# SHOW DATA -## description - 该语句用于展示数据量 - 语法: - SHOW DATA [FROM db_name[.table_name]]; - - 说明: - 1. 如果不指定 FROM 子句,使用展示当前 db 下细分到各个 table 的数据量 - 2. 如果指定 FROM 子句,则展示 table 下细分到各个 index 的数据量 - 3. 如果想查看各个 Partition 的大小,请参阅 help show partitions - -## example - 1. 展示默认 db 的各个 table 的数据量及汇总数据量 - SHOW DATA; - - 2. 展示指定 db 的下指定表的细分数据量 - SHOW DATA FROM example_db.table_name; - -## keyword - SHOW,DATA - -# SHOW PARTITIONS -## description - 该语句用于展示分区信息 - 语法: - SHOW PARTITIONS FROM [db_name.]table_name [PARTITION partition_name]; - -## example - 1. 展示指定 db 的下指定表的分区信息 - SHOW PARTITIONS FROM example_db.table_name; - - 1. 展示指定 db 的下指定表的指定分区的信息 - SHOW PARTITIONS FROM example_db.table_name PARTITION p1; - -## keyword - SHOW,PARTITIONS - -# SHOW TABLET -## description - 该语句用于显示 tablet 相关的信息(仅管理员使用) - 语法: - SHOW TABLET - [FROM [db_name.]table_name | tablet_id] - -## example - 1. 显示指定 db 的下指定表所有 tablet 信息 - SHOW TABLET FROM example_db.table_name; - - 2. 显示指定 tablet id 为 10000 的 tablet 的父层级 id 信息 - SHOW TABLET 10000; - -## keyword - SHOW,TABLET - -# SHOW PROPERTY -## description - 该语句用于查看用户的属性 - 语法: - SHOW PROPERTY [FOR user] [LIKE key] - -## example - 1. 查看 jack 用户的属性 - SHOW PROPERTY FOR 'jack' - - 2. 查看 jack 用户导入cluster相关属性 - SHOW PROPERTY FOR 'jack' LIKE '%load_cluster%' - -## keyword - SHOW, PROPERTY - -# SHOW BACKUP -## description - 该语句用于查看 BACKUP 任务 - 语法: - SHOW BACKUP [FROM db_name] - - 说明: - 1. Palo 中仅保存最近一次 BACKUP 任务。 - 2. 各列含义如下: - JobId: 唯一作业id - SnapshotName: 备份的名称 - DbName: 所属数据库 - State: 当前阶段 - PENDING: 提交作业后的初始状态 - SNAPSHOTING: 执行快照中 - UPLOAD_SNAPSHOT:快照完成,准备上传 - UPLOADING: 快照上传中 - SAVE_META: 将作业元信息保存为本地文件 - UPLOAD_INFO: 上传作业元信息 - FINISHED: 作业成功 - CANCELLED: 作业失败 - BackupObjs: 备份的表和分区 - CreateTime: 任务提交时间 - SnapshotFinishedTime: 快照完成时间 - UploadFinishedTime: 快照上传完成时间 - FinishedTime: 作业结束时间 - UnfinishedTasks: 在 SNAPSHOTING 和 UPLOADING 阶段会显示还未完成的子任务id - Status: 如果作业失败,显示失败信息 - Timeout: 作业超时时间,单位秒 - -## example - 1. 查看 example_db 下最后一次 BACKUP 任务。 - SHOW BACKUP FROM example_db; - -## keyword - SHOW, BACKUP - -# SHOW RESTORE -## description - 该语句用于查看 RESTORE 任务 - 语法: - SHOW RESTORE [FROM db_name] - - 说明: - 1. Palo 中仅保存最近一次 RESTORE 任务。 - 2. 各列含义如下: - JobId: 唯一作业id - Label: 要恢复的备份的名称 - Timestamp: 要恢复的备份的时间版本 - DbName: 所属数据库 - State: 当前阶段 - PENDING: 提交作业后的初始状态 - SNAPSHOTING: 执行快照中 - DOWNLOAD: 快照完成,准备下载仓库中的快照 - DOWNLOADING: 快照下载中 - COMMIT: 快照下载完成,准备生效 - COMMITING: 生效中 - FINISHED: 作业成功 - CANCELLED: 作业失败 - AllowLoad: 恢复时是否允许导入(当前不支持) - ReplicationNum: 指定恢复的副本数 - RestoreJobs: 要恢复的表和分区 - CreateTime: 任务提交时间 - MetaPreparedTime: 元数据准备完成时间 - SnapshotFinishedTime: 快照完成时间 - DownloadFinishedTime: 快照下载完成时间 - FinishedTime: 作业结束时间 - UnfinishedTasks: 在 SNAPSHOTING、DOWNLOADING 和 COMMITING 阶段会显示还未完成的子任务id - Status: 如果作业失败,显示失败信息 - Timeout: 作业超时时间,单位秒 - -## example - 1. 查看 example_db 下最近一次 RESTORE 任务。 - SHOW RESTORE FROM example_db; - -## keyword - SHOW, RESTORE - -# SHOW REPOSITORIES -## description - 该语句用于查看当前已创建的仓库。 - 语法: - SHOW REPOSITORIES; - - 说明: - 1. 各列含义如下: - RepoId: 唯一的仓库ID - RepoName: 仓库名称 - CreateTime: 第一次创建该仓库的时间 - IsReadOnly: 是否为只读仓库 - Location: 仓库中用于备份数据的根目录 - Broker: 依赖的 Broker - ErrMsg: Palo 会定期检查仓库的连通性,如果出现问题,这里会显示错误信息 - -## example - 1. 查看已创建的仓库: - SHOW REPOSITORIES; - -## keyword - SHOW, REPOSITORY, REPOSITORIES - -# SHOW SNAPSHOT -## description - 该语句用于查看仓库中已存在的备份。 - 语法: - SHOW SNAPSHOT ON `repo_name` - [WHERE SNAPSHOT = "snapshot" [AND TIMESTAMP = "backup_timestamp"]]; - - 说明: - 1. 各列含义如下: - Snapshot: 备份的名称 - Timestamp: 对应备份的时间版本 - Status: 如果备份正常,则显示 OK,否则显示错误信息 - - 2. 如果指定了 TIMESTAMP,则会额外显示如下信息: - Database: 备份数据原属的数据库名称 - Details: 以 Json 的形式,展示整个备份的数据目录及文件结构 - -## example - 1. 查看仓库 example_repo 中已有的备份: - SHOW SNAPSHOT ON example_repo; - - 2. 仅查看仓库 example_repo 中名称为 backup1 的备份: - SHOW SNAPSHOT ON example_repo WHERE SNAPSHOT = "backup1"; - - 2. 查看仓库 example_repo 中名称为 backup1 的备份,时间版本为 "2018-05-05-15-34-26" 的详细信息: - SHOW SNAPSHOT ON example_repo - WHERE SNAPSHOT = "backup1" AND TIMESTAMP = "2018-05-05-15-34-26"; - -## keyword - SHOW, SNAPSHOT - -# RESTORE TABLET -## description - - 该功能用于恢复trash目录中被误删的tablet数据。 - - 说明:这个功能暂时只在be服务中提供一个http接口。如果要使用, - 需要向要进行数据恢复的那台be机器的http端口发送restore tablet api请求。api格式如下: - METHOD: POST - URI: http://be_host:be_http_port/api/restore_tablet?tablet_id=xxx&schema_hash=xxx - -## example - - curl -X POST "http://hostname:8088/api/restore_tablet?tablet_id=123456&schema_hash=1111111" diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/BIGINT.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/BIGINT.md new file mode 100644 index 00000000000000..13b0e928f3d807 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/BIGINT.md @@ -0,0 +1,7 @@ +# BIGINT +## description + BIGINT + 8字节有符号整数,范围[-9223372036854775808, 9223372036854775807] + +##keyword +BIGINT,BIGINT diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/BOOLEAN.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/BOOLEAN.md new file mode 100644 index 00000000000000..fdf8117aff75e7 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/BOOLEAN.md @@ -0,0 +1,7 @@ +# BOOLEAN +## description + BOOL, BOOLEN + 与TINYINT一样,0代表false,1代表true + +##keyword +BOOLEAN,BOOLEAN diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/CHAR.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/CHAR.md new file mode 100644 index 00000000000000..edf060a74af7c6 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/CHAR.md @@ -0,0 +1,7 @@ +# CHAR +## description + CHAR(M) + 定长字符串,M代表的是定长字符串的长度。M的范围是1-255 + +##keyword +CHAR,CHAR diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/DATE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/DATE.md new file mode 100644 index 00000000000000..978c5ef953d962 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/DATE.md @@ -0,0 +1,15 @@ +# DATE +## description + DATE函数 + Syntax: + DATE(expr) + 将输入的类型转化为DATE类型 + DATE类型 + 日期类型,目前的取值范围是['1900-01-01', '9999-12-31'], 默认的打印形式是'YYYY-MM-DD' + +## example + mysql> SELECT DATE('2003-12-31 01:02:03'); + -> '2003-12-31' + +##keyword +DATE,DATE diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/DATETIME.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/DATETIME.md new file mode 100644 index 00000000000000..90332c68ca744c --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/DATETIME.md @@ -0,0 +1,8 @@ +# DATETIME +## description + DATETIME + 日期时间类型,取值范围是['1000-01-01 00:00:00', '9999-12-31 23:59:59']. + 打印的形式是'YYYY-MM-DD HH:MM:SS' + +##keyword +DATETIME,DATETIME diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/DECIMAL.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/DECIMAL.md new file mode 100644 index 00000000000000..4807798e46aa22 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/DECIMAL.md @@ -0,0 +1,8 @@ +# DECIMAL +## description + DECIMAL(M[,D]) + 高精度定点数,M代表一共有多少个有效数字(precision),D代表小数点后最多有多少数字(scale) + M的范围是[1,27], D的范围[1, 9], 另外,M必须要大于等于D的取值。默认的D取值为0 + +##keyword +DECIMAL,DECIMAL diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/DOUBLE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/DOUBLE.md new file mode 100644 index 00000000000000..17136524824525 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/DOUBLE.md @@ -0,0 +1,7 @@ +# DOUBLE +## description + DOUBLE + 8字节浮点数 + +##keyword +DOUBLE,DOUBLE diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/FLOAT.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/FLOAT.md new file mode 100644 index 00000000000000..6410033ed8d5d1 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/FLOAT.md @@ -0,0 +1,7 @@ +# FLOAT +## description + FLOAT + 4字节浮点数 + +##keyword +FLOAT,FLOAT diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/HLL(HyperLogLog).md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/HLL(HyperLogLog).md new file mode 100644 index 00000000000000..9ad2ed887139d6 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/HLL(HyperLogLog).md @@ -0,0 +1,9 @@ +# HLL(HyperLogLog) +## description + VARCHAR(M) + 变长字符串,M代表的是变长字符串的长度。M的范围是1-16385 + 用户不需要指定长度和默认值。长度根据数据的聚合程度系统内控制 + 并且HLL列只能通过配套的hll_union_agg、hll_raw_agg、hll_cardinality、hll_hash进行查询或使用 + +##keyword +HLL(HYPERLOGLOG),HLL(HYPERLOGLOG) diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/INT.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/INT.md new file mode 100644 index 00000000000000..8943fe20afe6da --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/INT.md @@ -0,0 +1,7 @@ +# INT +## description + INT + 4字节有符号整数,范围[-2147483648, 2147483647] + +##keyword +INT,INT diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/SMALLINT.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/SMALLINT.md new file mode 100644 index 00000000000000..7cb74ce06ffcda --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/SMALLINT.md @@ -0,0 +1,7 @@ +# SMALLINT +## description + SMALLINT + 2字节有符号整数,范围[-32768, 32767] + +##keyword +SMALLINT,SMALLINT diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/TINYINT.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/TINYINT.md new file mode 100644 index 00000000000000..cbdbba67c9f0a0 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/TINYINT.md @@ -0,0 +1,7 @@ +# TINYINT +## description + TINYINT + 1字节有符号整数,范围[-128, 127] + +##keyword +TINYINT,TINYINT diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/VARCHAR.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/VARCHAR.md new file mode 100644 index 00000000000000..8e8cbb7d2b5a5a --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/VARCHAR.md @@ -0,0 +1,7 @@ +# VARCHAR +## description + VARCHAR(M) + 变长字符串,M代表的是变长字符串的长度。M的范围是1-65535 + +##keyword +VARCHAR,VARCHAR diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/help.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/help.md deleted file mode 100644 index b9f9351d021db0..00000000000000 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Types/help.md +++ /dev/null @@ -1,77 +0,0 @@ -# BOOLEAN -## description - BOOL, BOOLEN - 与TINYINT一样,0代表false,1代表true - -# TINYINT -## description - TINYINT - 1字节有符号整数,范围[-128, 127] - -# SMALLINT -## description - SMALLINT - 2字节有符号整数,范围[-32768, 32767] - -# INT -## description - INT - 4字节有符号整数,范围[-2147483648, 2147483647] - -# BIGINT -## description - BIGINT - 8字节有符号整数,范围[-9223372036854775808, 9223372036854775807] - -# FLOAT -## description - FLOAT - 4字节浮点数 - -# DOUBLE -## description - DOUBLE - 8字节浮点数 - -# DECIMAL -## description - DECIMAL(M[,D]) - 高精度定点数,M代表一共有多少个有效数字(precision),D代表小数点后最多有多少数字(scale) - M的范围是[1,27], D的范围[1, 9], 另外,M必须要大于等于D的取值。默认的D取值为0 - -# CHAR -## description - CHAR(M) - 定长字符串,M代表的是定长字符串的长度。M的范围是1-255 - -# VARCHAR -## description - VARCHAR(M) - 变长字符串,M代表的是变长字符串的长度。M的范围是1-65535 - -# DATE -## description - DATE函数 - Syntax: - DATE(expr) - 将输入的类型转化为DATE类型 - DATE类型 - 日期类型,目前的取值范围是['1900-01-01', '9999-12-31'], 默认的打印形式是'YYYY-MM-DD' - -## example - mysql> SELECT DATE('2003-12-31 01:02:03'); - -> '2003-12-31' - -# DATETIME -## description - DATETIME - 日期时间类型,取值范围是['1000-01-01 00:00:00', '9999-12-31 23:59:59']. - 打印的形式是'YYYY-MM-DD HH:MM:SS' - -# HLL(HyperLogLog) -## description - VARCHAR(M) - 变长字符串,M代表的是变长字符串的长度。M的范围是1-16385 - 用户不需要指定长度和默认值。长度根据数据的聚合程度系统内控制 - 并且HLL列只能通过配套的hll_union_agg、hll_raw_agg、hll_cardinality、hll_hash进行查询或使用 - diff --git a/docs/script/keyword.sh b/docs/script/keyword.sh deleted file mode 100755 index ceaaa57673610c..00000000000000 --- a/docs/script/keyword.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/bin/bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#this shell adds keywords to MD files without keywords - -IFS=`echo -en "\n\b"` - -ROOTDIR=`dirname "$0"` -ROOTDIR=`cd "$ROOT"; pwd` - -scandir() { - for file in `ls $*`; do - if [[ ! -d $*"/"$file ]]; then - if [[ $file == *".md" ]]; then - readfile $*"/"${file} - fi - else - scandir $*"/"${file} - fi - done -} - -readfile() { - local file=$* - local topic=`cat $file | grep "^#[^#].*" | grep -o "[^# ]\+\( \+[^ ]\+\)*"` - local keywordNum=`cat $file | grep "^##[^#]*keyword[ ]*$" | wc -l` - if [[ $keywordNum != 0 || -z $topic ]]; then - return - fi - local SAVEIFS=$IFS - IFS=' ' - local array=`echo $topic | tr '\`' ' ' | tr ',' ' '` - local keywords= - for keyword in ${array[*]}; do - keywords=$keywords","$keyword - done - array=`echo $array | tr '_' ' '` - for keyword in ${array[*]}; do - keywords=$keywords","$keyword - done - keywords=`echo ${keywords:1} | tr 'a-z' 'A-Z'` - IFS=$SAVEIFS - file=`echo $file | sed 's/[ \(\)]/\\\&/g'` - eval sed -i '"\$a ##keyword"' $file - eval sed -i '"\$a ${keywords}"' $file -} - -main() { - scandir $ROOTDIR -} - -main "$@" -exit 0 diff --git a/docs/script/merge.sh b/docs/script/merge.sh deleted file mode 100755 index 5d252c7c697b77..00000000000000 --- a/docs/script/merge.sh +++ /dev/null @@ -1,164 +0,0 @@ -#!/bin/bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#run split.h first -#this shell merges user-defined heads in the MD file into "description" head -################ ################# -# #topic # # #topic # -# # # # -# ##syntax # # ##descrption # -# somelines # # somelines # -# # # # -# ##descrption # # syntax # -# somelines # ==> # somelines # -# # # # -# ##example # # parameter # -# somelines # # somelines # -# # # # -# ##parameter # # ##example # -# somelines # # somelines # -# # # # -################ ################# - -IFS=`echo -en "\n\b"` - -ROOTDIR=`dirname "$0"` -ROOTDIR=`cd "$ROOT"; pwd` - -keywords=" -examples -example -description -keywords -keyword -url -" - -matchKeyword(){ - for keyword in ${keywords[*]}; do - if [[ "$1" == $keyword ]]; then - return 0 - fi - done - return 1 -} - -merge(){ - file=$* - sed -n '/^#[^#]/p' $file > ${ROOTDIR}/tempp - sed -n '/^<>TARGET<>/,/^<>END<>/p' $file >> ${ROOTDIR}/tempp - sed -n '/^>>>/,/^<<> ${ROOTDIR}/tempp - sed -n '/^\^\^\^/,/^\$\$\$/p' $file >> ${ROOTDIR}/tempp - sed -i 's/^<>TARGET<>//;s/^<>END<>//;s/^>>>//;s/^<<END<>/"' $tempfile - ;; - *) - echo "Internal error" ; exit 1 - ;; - esac - fi - - if [[ $row == $TotalRow && headlevel > 0 ]]; then - merge $tempfile - continue - fi - - if [[ $line == "##"* ]]; then - headlevel=2 - line=`echo ${line:2} | tr '[A-Z]' '[a-z]' | grep -o "[^ ]\+\( \+[^ ]\+\)*"` - if [[ $line == "description" ]]; then - eval sed -i '"${row}s/description/description/i"' $tempfile - elif [[ $line == "examples" ]]; then - eval sed -i '"${row}s/examples/example/i"' $tempfile - elif [[ $line == "keywords" ]]; then - eval sed -i '"${row}s/keywords/keyword/i"' $tempfile - fi - matchKeyword ${line} - if [[ $? == 1 ]]; then - style="unmatch" - eval sed -i '"${row}s/^##/>>>/"' $tempfile - else - if [[ $line == "description" ]]; then - style="description" - eval sed -i '"${row}s/^/<>TARGET<>/"' $tempfile - continue - fi - style="match" - eval sed -i '"${row}s/^/\^\^\^/"' $tempfile - fi - elif [[ $line == "#"* ]]; then - if [[ headlevel == 0 ]]; then - headlevel=1 - continue - fi - headleve=1 - fi - fi - done < $tempfile - if [[ -f $tempfile ]]; then - rm $tempfile - fi - if [[ -f ${ROOTDIR}/tempp ]]; then - cp ${ROOTDIR}/tempp $* && rm ${ROOTDIR}/tempp - fi -} - -main() { - scandir $ROOTDIR -} - -main "$@" -exit 0 diff --git a/docs/script/split.sh b/docs/script/split.sh deleted file mode 100755 index 18c7a6ab997f93..00000000000000 --- a/docs/script/split.sh +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#this shell splits topics in the same MD file -IFS=`echo -en "\n\b"` - -ROOTDIR=`dirname "$0"` -ROOTDIR=`cd "$ROOT"; pwd` - -scandir() { - for file in `ls $*`; do - if [[ ! -d $*"/"$file ]]; then - if [[ $file == *".md" ]]; then - splitfile $*"/"${file} - fi - else - scandir $*"/"${file} - fi - done -} - -splitfile() { - local file=$* - local filedir=${file%/*} - local evalfile=`echo $file | sed 's/[ \(\)]/\\\&/g'` - - local row=0 - local split=1 - local name= - local TotalRow=`wc -l $file | awk '{print $1}'` - local TopicNum=`grep -o '^#[^#].*' $file | wc -l` - if [ $TopicNum -lt 2 ]; then - return - fi - while read line; do - ((row++)) - - if [[ $row == $TotalRow || $line =~ ^#[^#].* ]]; then - if [[ -n $name && $split != $row ]]; then - eval awk '"NR==${split},NR==$(($row==$TotalRow?row:row-1))"' ${evalfile} > ${ROOTDIR}/tempp - cp ${ROOTDIR}/tempp ${filedir}/${name}.md - fi - name=`echo $line | grep -o "[^# ]\+\( \+[^ ]\+\)*"` - split=$row - fi - done < $file - if [[ -f ${ROOTDIR}/tempp ]]; then - rm ${ROOTDIR}/tempp - fi - rm $file -} - -main() { - scandir $ROOTDIR -} - -main "$@" -exit 0 From 707614b0750391066725d699ab3412efd2f4060a Mon Sep 17 00:00:00 2001 From: xy720 Date: Tue, 6 Aug 2019 19:08:46 +0800 Subject: [PATCH 5/9] change text format to add multi-level head --- .../sql-functions/aggregate-functions/avg.md | 8 ++++---- .../sql-functions/aggregate-functions/count.md | 6 +++--- .../aggregate-functions/count_distinct.md | 6 +++--- .../aggregate-functions/hll_union_agg.md | 8 ++++---- .../sql-functions/aggregate-functions/max.md | 6 +++--- .../sql-functions/aggregate-functions/min.md | 6 +++--- .../sql-functions/aggregate-functions/ndv.md | 8 ++++---- .../aggregate-functions/percentile_approx.md | 8 ++++---- .../aggregate-functions/stddev.md | 6 +++--- .../aggregate-functions/stddev_samp.md | 6 +++--- .../sql-functions/aggregate-functions/sum.md | 6 +++--- .../aggregate-functions/var_samp.md | 6 +++--- .../aggregate-functions/variance.md | 6 +++--- .../cn/sql-reference/sql-functions/cast.md | 18 +++++++++--------- .../date-time-functions/current_timestamp.md | 6 +++--- .../date-time-functions/date_add.md | 8 ++++---- .../date-time-functions/date_format.md | 8 ++++---- .../date-time-functions/date_sub.md | 8 ++++---- .../date-time-functions/datediff.md | 8 ++++---- .../sql-functions/date-time-functions/day.md | 8 ++++---- .../date-time-functions/dayname.md | 8 ++++---- .../date-time-functions/dayofmonth.md | 8 ++++---- .../date-time-functions/dayofweek.md | 8 ++++---- .../date-time-functions/dayofyear.md | 8 ++++---- .../date-time-functions/from_days.md | 6 +++--- .../date-time-functions/from_unixtime.md | 8 ++++---- .../sql-functions/date-time-functions/month.md | 8 ++++---- .../date-time-functions/monthname.md | 8 ++++---- .../sql-functions/date-time-functions/now.md | 6 +++--- .../date-time-functions/str_to_date.md | 8 ++++---- .../date-time-functions/timediff.md | 8 ++++---- .../date-time-functions/to_days.md | 8 ++++---- .../date-time-functions/unix_timestamp.md | 8 ++++---- .../date-time-functions/utc_timestamp.md | 8 ++++---- .../date-time-functions/workofyear.md | 8 ++++---- .../sql-functions/date-time-functions/year.md | 8 ++++---- .../spatial-functions/st_astext.md | 6 +++--- .../spatial-functions/st_circle.md | 8 ++++---- .../spatial-functions/st_contains.md | 6 +++--- .../spatial-functions/st_distance_sphere.md | 6 +++--- .../spatial-functions/st_geometryfromtext.md | 6 +++--- .../spatial-functions/st_linefromtext.md | 6 +++--- .../spatial-functions/st_point.md | 8 ++++---- .../spatial-functions/st_polygon.md | 6 +++--- .../sql-functions/spatial-functions/st_x.md | 6 +++--- .../sql-functions/spatial-functions/st_y.md | 6 +++--- .../sql-functions/string-functions/ascii.md | 6 +++--- .../sql-functions/string-functions/concat.md | 6 +++--- .../string-functions/concat_ws.md | 8 ++++---- .../string-functions/find_in_set.md | 6 +++--- .../string-functions/get_json_double.md | 8 ++++---- .../string-functions/get_json_int.md | 8 ++++---- .../string-functions/get_json_string.md | 8 ++++---- .../string-functions/group_concat.md | 6 +++--- .../sql-functions/string-functions/instr.md | 6 +++--- .../sql-functions/string-functions/lcase.md | 6 +++--- .../sql-functions/string-functions/left.md | 6 +++--- .../sql-functions/string-functions/length.md | 6 +++--- .../sql-functions/string-functions/locate.md | 6 +++--- .../sql-functions/string-functions/lower.md | 6 +++--- .../sql-functions/string-functions/lpad.md | 6 +++--- .../sql-functions/string-functions/ltrim.md | 6 +++--- .../string-functions/money_format.md | 6 +++--- .../string-functions/regexp_extract.md | 6 +++--- .../string-functions/regexp_replace.md | 6 +++--- .../sql-functions/string-functions/repeat.md | 6 +++--- .../sql-functions/string-functions/right.md | 6 +++--- .../string-functions/split_part.md | 6 +++--- .../sql-functions/string-functions/strleft.md | 6 +++--- .../sql-functions/string-functions/strright.md | 6 +++--- .../Account Management/SHOW ROLES.md | 2 +- .../Data Definition/create-function.md | 14 +++++++------- .../Data Definition/drop-function.md | 10 +++++----- .../Data Definition/show-function.md | 14 +++++++------- .../sql-statements/Data Manipulation/insert.md | 7 +++---- .../apache/doris/common/MarkDownParser.java | 8 +++++++- 76 files changed, 275 insertions(+), 270 deletions(-) mode change 100644 => 100755 fe/src/main/java/org/apache/doris/common/MarkDownParser.java diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/avg.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/avg.md index e10a5c91b1c373..16cc5e722cc94b 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/avg.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/avg.md @@ -1,13 +1,13 @@ # AVG ## description +### Syntax -用于返回选中字段的平均值 +`AVG([DISTINCT] expr)` -可选字段DISTINCT参数可以用来返回去重平均值 - Syntax +用于返回选中字段的平均值 -`AVG([DISTINCT] expr)` +可选字段DISTINCT参数可以用来返回去重平均值 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count.md index 39562a563b6710..f452fa21c34019 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count.md @@ -1,11 +1,11 @@ # COUNT ## description +### Syntax -用于返回满足要求的行的数目 +`COUNT([DISTINCT] expr)` - Syntax -`COUNT([DISTINCT] expr)` +用于返回满足要求的行的数目 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count_distinct.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count_distinct.md index b666220ca70185..a5d21e604be4a4 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count_distinct.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count_distinct.md @@ -1,11 +1,11 @@ # COUNT_DISTINCT ## description +### Syntax -用于返回满足要求的行的数目,或者非NULL行的数目 +`COUNT_DISTINCT(expr)` - Syntax -`COUNT_DISTINCT(expr)` +用于返回满足要求的行的数目,或者非NULL行的数目 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md index 06d27b3ecab07f..6d3b1ebdda7058 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md @@ -1,5 +1,9 @@ # HLL_UNION_AGG ## description +### Syntax + +`HLL_UNION_AGG(hll)` + HLL是基于HyperLogLog算法的工程实现,用于保存HyperLogLog计算过程的中间结果 @@ -9,10 +13,6 @@ HLL是基于HyperLogLog算法的工程实现,用于保存HyperLogLog计算过 导入的时候通过hll_hash函数来指定数据中哪一列用于生成hll列,它常用于替代count distinct,通过结合rollup在业务上用于快速计算uv等 - Syntax - -`HLL_UNION_AGG(hll)` - ## example ``` MySQL > select HLL_UNION_AGG(uv_set) from test_uv;; diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/max.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/max.md index 0cb3acbfad2ebf..29c2fdeab64b6f 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/max.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/max.md @@ -1,11 +1,11 @@ # MAX ## description +### Syntax -返回expr表达式的最大值 +`MAX(expr)` - Syntax -`MAX(expr)` +返回expr表达式的最大值 ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/min.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/min.md index 275d9432fa08c9..be805ea192d848 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/min.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/min.md @@ -1,11 +1,11 @@ # MIN ## description +### Syntax -返回expr表达式的最小值 +`MIN(expr)` - Syntax -`MIN(expr)` +返回expr表达式的最小值 ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/ndv.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/ndv.md index 3b01515a9379fa..62eebf873b3bc8 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/ndv.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/ndv.md @@ -1,13 +1,13 @@ # NDV ## description +### Syntax -返回类似于 COUNT(DISTINCT col) 结果的近似值聚合函数。 +`NDV(expr)` -它比 COUNT 和 DISTINCT 组合的速度更快,并使用固定大小的内存,因此对于高基数的列可以使用更少的内存。 - Syntax +返回类似于 COUNT(DISTINCT col) 结果的近似值聚合函数。 -`NDV(expr)` +它比 COUNT 和 DISTINCT 组合的速度更快,并使用固定大小的内存,因此对于高基数的列可以使用更少的内存。 ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/percentile_approx.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/percentile_approx.md index fef57b4cfc40ee..f47ce1ce1430b3 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/percentile_approx.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/percentile_approx.md @@ -1,13 +1,13 @@ # PERCENTILE_APPROX ## description +### Syntax -返回第p个百分位点的近似值,p的值介于0到1之间 +`PERCENTILE_APPROX(expr, DOUBLE p)` -该函数使用固定大小的内存,因此对于高基数的列可以使用更少的内存,可用于计算tp99等统计值 - Syntax +返回第p个百分位点的近似值,p的值介于0到1之间 -`PERCENTILE_APPROX(expr, DOUBLE p)` +该函数使用固定大小的内存,因此对于高基数的列可以使用更少的内存,可用于计算tp99等统计值 ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev.md index fe1d990bf4ed47..f3db9b673e3cb0 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev.md @@ -1,11 +1,11 @@ # STDDEV,STDDEV_POP ## description +### Syntax -返回expr表达式的标准差 +`STDDEV(expr)` - Syntax -`STDDEV(expr)` +返回expr表达式的标准差 ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev_samp.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev_samp.md index d28123d13745d6..2af5308e97d032 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev_samp.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev_samp.md @@ -1,11 +1,11 @@ # STDDEV_SAMP ## description +### Syntax -返回expr表达式的样本标准差 +`STDDEV_SAMP(expr)` - Syntax -`STDDEV_SAMP(expr)` +返回expr表达式的样本标准差 ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/sum.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/sum.md index e69589a2444744..fb71b5154fc421 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/sum.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/sum.md @@ -1,11 +1,11 @@ # SUM ## description +### Syntax -用于返回选中字段所有值的和 +`SUM(expr)` - Syntax -`SUM(expr)` +用于返回选中字段所有值的和 ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/var_samp.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/var_samp.md index 58ba697e283355..94d9f03270bfec 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/var_samp.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/var_samp.md @@ -1,11 +1,11 @@ # VAR_SAMP,VARIANCE_SAMP ## description +### Syntax -返回expr表达式的样本方差 +`VAR_SAMP(expr)` - Syntax -`VAR_SAMP(expr)` +返回expr表达式的样本方差 ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/variance.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/variance.md index a20b18ebad8526..7a46a827b5f624 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/variance.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/variance.md @@ -1,11 +1,11 @@ # VARIANCE,VAR_POP,VARIANCE_POP ## description +### Syntax -返回expr表达式的方差 +`VARIANCE(expr)` - Syntax -`VARIANCE(expr)` +返回expr表达式的方差 ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/cast.md b/docs/documentation/cn/sql-reference/sql-functions/cast.md index ad0b58a79b9e40..4ac9937e0f5bb0 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/cast.md +++ b/docs/documentation/cn/sql-reference/sql-functions/cast.md @@ -1,24 +1,24 @@ # CAST ## description - -将 input 转成 指定的 type - ## description - -将当前列 input 转换为 BIGINT 类型 - - Syntax +### Syntax ``` cast (input as type) ``` - BIGINT type +### BIGINT type - Syntax +### Syntax ``` cast (input as BIGINT) ``` + +将 input 转成 指定的 type + + +将当前列 input 转换为 BIGINT 类型 + ## example 1. 转常量,或表中某列 diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/current_timestamp.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/current_timestamp.md index 2524ad6805eeee..85afc0f01b4f7b 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/current_timestamp.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/current_timestamp.md @@ -1,11 +1,11 @@ # current_timestamp ## description +### Syntax -获得当前的时间,以Datetime类型返回 +`DATETIME CURRENT_TIMESTAMP()` - Syntax -`DATETIME CURRENT_TIMESTAMP()` +获得当前的时间,以Datetime类型返回 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_add.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_add.md index e86c1d7ede9179..ecb45512c7ff18 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_add.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_add.md @@ -1,5 +1,9 @@ # date_add ## description +### Syntax + +`INT DATE_ADD(DATETIME date,INTERVAL expr type)` + 向日期添加指定的时间间隔。 @@ -9,10 +13,6 @@ expr 参数是您希望添加的时间间隔。 type 参数可以是下列值:YEAR, MONTH, DAY, HOUR, MINUTE, SECOND - Syntax - -`INT DATE_ADD(DATETIME date,INTERVAL expr type)` - ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_format.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_format.md index c90482e4421f05..8a05cf0d8f124b 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_format.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_format.md @@ -1,5 +1,9 @@ # date_format ## description +### Syntax + +`VARCHAR DATE_FORMAT(DATETIME date, VARCHAR format)` + 将日期类型按照format的类型转化位字符串, 当前支持最大128字节的字符串,如果返回值长度超过128,则返回NULL @@ -70,10 +74,6 @@ date 参数是合法的日期。format 规定日期/时间的输出格式。 %y | 年,2 位 - Syntax - -`VARCHAR DATE_FORMAT(DATETIME date, VARCHAR format)` - ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_sub.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_sub.md index 0f3eb613206f6e..6d58475ca9985f 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_sub.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_sub.md @@ -1,5 +1,9 @@ # date_sub ## description +### Syntax + +`INT DATE_SUB(DATETIME date,INTERVAL expr type)` + 从日期减去指定的时间间隔 @@ -9,10 +13,6 @@ expr 参数是您希望添加的时间间隔。 type 参数可以是下列值:YEAR, MONTH, DAY, HOUR, MINUTE, SECOND - Syntax - -`INT DATE_SUB(DATETIME date,INTERVAL expr type)` - ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/datediff.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/datediff.md index fb6e1a00398452..02484bfe5da1aa 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/datediff.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/datediff.md @@ -1,5 +1,9 @@ # datediff ## description +### Syntax + +`DATETIME DATEDIFF(DATETIME expr1,DATETIME expr2)` + 计算expr1 - expr2,结果精确到天。 @@ -7,10 +11,6 @@ expr1 和 expr2 参数是合法的日期或日期/时间表达式。 注释:只有值的日期部分参与计算。 - Syntax - -`DATETIME DATEDIFF(DATETIME expr1,DATETIME expr2)` - ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/day.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/day.md index a59c3ede489f04..42f20893b6effe 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/day.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/day.md @@ -1,13 +1,13 @@ # day ## description +### Syntax -获得日期中的天信息,返回值范围从1-31。 +`INT DAY(DATETIME date)` -参数为Date或者Datetime类型 - Syntax +获得日期中的天信息,返回值范围从1-31。 -`INT DAY(DATETIME date)` +参数为Date或者Datetime类型 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayname.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayname.md index 450d417b798d42..2d0a08a0c7b566 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayname.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayname.md @@ -1,13 +1,13 @@ # dayname ## description +### Syntax -返回日期对应的日期名字 +`VARCHAR DAYNAME(DATE)` -参数为Date或者Datetime类型 - Syntax +返回日期对应的日期名字 -`VARCHAR DAYNAME(DATE)` +参数为Date或者Datetime类型 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofmonth.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofmonth.md index a75bd2799e64a9..efe881d4a93c19 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofmonth.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofmonth.md @@ -1,13 +1,13 @@ # dayofmonth ## description +### Syntax -获得日期中的天信息,返回值范围从1-31。 +`INT DAYOFMONTH(DATETIME date)` -参数为Date或者Datetime类型 - Syntax +获得日期中的天信息,返回值范围从1-31。 -`INT DAYOFMONTH(DATETIME date)` +参数为Date或者Datetime类型 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofweek.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofweek.md index 6806d0cc55e5bf..b314fa3ddf5b8d 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofweek.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofweek.md @@ -1,13 +1,13 @@ # dayofweek ## description +### Syntax -DAYOFWEEK函数返回日期的工作日索引值,即星期日为1,星期一为2,星期六为7 +`INT dayofweek(DATETIME date)` -参数为Date或者Datetime类型 - Syntax +DAYOFWEEK函数返回日期的工作日索引值,即星期日为1,星期一为2,星期六为7 -`INT dayofweek(DATETIME date)` +参数为Date或者Datetime类型 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofyear.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofyear.md index e060a7326c1ce6..e205909690b9a0 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofyear.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofyear.md @@ -1,13 +1,13 @@ # dayofyear ## description +### Syntax -获得日期中对应当年中的哪一天。 +`INT DAYOFYEAR(DATETIME date)` -参数为Date或者Datetime类型 - Syntax +获得日期中对应当年中的哪一天。 -`INT DAYOFYEAR(DATETIME date)` +参数为Date或者Datetime类型 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/from_days.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/from_days.md index aea93d22683bc0..741e9ee4b2527d 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/from_days.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/from_days.md @@ -1,11 +1,11 @@ # from_days ## description +### Syntax -通过距离0000-01-01日的天数计算出哪一天 +`DATE FROM_DAYS(INT N)` - Syntax -`DATE FROM_DAYS(INT N)` +通过距离0000-01-01日的天数计算出哪一天 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/from_unixtime.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/from_unixtime.md index 5a9a208d428dd2..9ac15232858f67 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/from_unixtime.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/from_unixtime.md @@ -1,5 +1,9 @@ # from_unixtime ## description +### Syntax + +`DATETIME FROM_UNIXTIME(INT unix_timestamp[, VARCHAR string_format])` + 将unix时间戳转化位对应的time格式,返回的格式由string_format指定 @@ -11,10 +15,6 @@ 其余string_format格式是非法的,返回NULL - Syntax - -`DATETIME FROM_UNIXTIME(INT unix_timestamp[, VARCHAR string_format])` - ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/month.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/month.md index b03bcfeccbf711..82b0d0842afac6 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/month.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/month.md @@ -1,13 +1,13 @@ # month ## description +### Syntax -返回时间类型中的月份信息,范围是1, 12 +`INT MONTH(DATETIME date)` -参数为Date或者Datetime类型 - Syntax +返回时间类型中的月份信息,范围是1, 12 -`INT MONTH(DATETIME date)` +参数为Date或者Datetime类型 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/monthname.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/monthname.md index 54036e0b4b6339..fc4c11ad10d7d0 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/monthname.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/monthname.md @@ -1,13 +1,13 @@ # monthname ## description +### Syntax -返回日期对应的月份名字 +`VARCHAR MONTHNAME(DATE)` -参数为Date或者Datetime类型 - Syntax +返回日期对应的月份名字 -`VARCHAR MONTHNAME(DATE)` +参数为Date或者Datetime类型 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/now.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/now.md index 0a358030b7b926..3454d88f4c3bb0 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/now.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/now.md @@ -1,11 +1,11 @@ # now ## description +### Syntax -获得当前的时间,以Datetime类型返回 +`DATETIME NOW()` - Syntax -`DATETIME NOW()` +获得当前的时间,以Datetime类型返回 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/str_to_date.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/str_to_date.md index 15acc87ac5ecce..1f7ca178525e46 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/str_to_date.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/str_to_date.md @@ -1,13 +1,13 @@ # str_to_date ## description +### Syntax -通过format指定的方式将str转化为DATE类型,如果转化结果不对返回NULL +`DATETIME STR_TO_DATE(VARCHAR str, VARCHAR format)` -支持的format格式与date_format一致 - Syntax +通过format指定的方式将str转化为DATE类型,如果转化结果不对返回NULL -`DATETIME STR_TO_DATE(VARCHAR str, VARCHAR format)` +支持的format格式与date_format一致 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/timediff.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/timediff.md index 20d3efa76013e6..09ed793f1ba52e 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/timediff.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/timediff.md @@ -1,5 +1,9 @@ # timediff ## description +### Syntax + +`TIME TIMEDIFF(DATETIME expr1, DATETIME expr2)` + TIMEDIFF返回两个DATETIME之间的差值 @@ -7,10 +11,6 @@ TIMEDIFF函数返回表示为时间值的expr1 - expr2的结果,返回值为TI 其结果被限制在从-838:59:59到838:59:59之间的TIME值范围内 - Syntax - -`TIME TIMEDIFF(DATETIME expr1, DATETIME expr2)` - ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/to_days.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/to_days.md index e95c874914784a..4dc6894696c849 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/to_days.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/to_days.md @@ -1,13 +1,13 @@ # to_days ## description +### Syntax -返回date距离0000-01-01的天数 +`INT TO_DAYS(DATETIME date)` -参数为Date或者Datetime类型 - Syntax +返回date距离0000-01-01的天数 -`INT TO_DAYS(DATETIME date)` +参数为Date或者Datetime类型 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/unix_timestamp.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/unix_timestamp.md index 1bdc57a5490f93..fc63e69e07fb42 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/unix_timestamp.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/unix_timestamp.md @@ -1,5 +1,9 @@ # unix_timestamp ## description +### Syntax + +`INT UNIX_TIMESTAMP(), UNIX_TIMESTAMP(DATETIME date)` + 将Date或者Datetime类型转化为unix时间戳 @@ -7,10 +11,6 @@ 参数需要是Date或者Datetime类型 - Syntax - -`INT UNIX_TIMESTAMP(), UNIX_TIMESTAMP(DATETIME date)` - ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/utc_timestamp.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/utc_timestamp.md index d2cdd5dcd41393..c61d7a2da0ec55 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/utc_timestamp.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/utc_timestamp.md @@ -1,5 +1,9 @@ # utc_timestamp ## description +### Syntax + +`DATETIME UTC_TIMESTAMP()` + 返回当前UTC日期和时间在 "YYYY-MM-DD HH:MM:SS" 或 @@ -7,10 +11,6 @@ 根据该函数是否用在字符串或数字语境中 - Syntax - -`DATETIME UTC_TIMESTAMP()` - ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/workofyear.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/workofyear.md index bb904f63425432..6226292752104a 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/workofyear.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/workofyear.md @@ -1,14 +1,14 @@ # weekofyear ## description +### Syntax +`INT WEEKOFYEAR(DATETIME date)` -获得一年中的第几周 -参数为Date或者Datetime类型 - Syntax +获得一年中的第几周 -`INT WEEKOFYEAR(DATETIME date)` +参数为Date或者Datetime类型 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/year.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/year.md index 324c24cf57e1f7..3662a773977c9e 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/year.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/year.md @@ -1,13 +1,13 @@ # year ## description +### Syntax -返回date类型的year部分,范围从1000-9999 +`INT YEAR(DATETIME date)` -参数为Date或者Datetime类型 - Syntax +返回date类型的year部分,范围从1000-9999 -`INT YEAR(DATETIME date)` +参数为Date或者Datetime类型 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_astext.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_astext.md index a4dc5b6e37a571..c29c3c984b9db9 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_astext.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_astext.md @@ -1,11 +1,11 @@ # `ST_AsText`,`ST_AsWKT` ## description +### Syntax -将一个几何图形转化为WKT(Well Known Text)的表示形式 +`VARCHAR ST_AsText(GEOMETRY geo)` - Syntax -`VARCHAR ST_AsText(GEOMETRY geo)` +将一个几何图形转化为WKT(Well Known Text)的表示形式 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_circle.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_circle.md index 0c02204403c06b..b244e04366deb1 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_circle.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_circle.md @@ -1,12 +1,12 @@ # `ST_Circle` ## description +### Syntax -将一个WKT(Well Known Text)转化为地球球面上的一个圆。其中`center_lng`表示的圆心的经度, -`center_lat`表示的是圆心的纬度,`radius`表示的是圆的半径,单位是米 +`GEOMETRY ST_Circle(DOUBLE center_lng, DOUBLE center_lat, DOUBLE radius)` - Syntax -`GEOMETRY ST_Circle(DOUBLE center_lng, DOUBLE center_lat, DOUBLE radius)` +将一个WKT(Well Known Text)转化为地球球面上的一个圆。其中`center_lng`表示的圆心的经度, +`center_lat`表示的是圆心的纬度,`radius`表示的是圆的半径,单位是米 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_contains.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_contains.md index 5269281c0c6254..ce47c52a45f148 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_contains.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_contains.md @@ -1,11 +1,11 @@ # `ST_Contains` ## description +### Syntax -判断几何图形shape1是否完全能够包含几何图形shape2 +`BOOL ST_Contains(GEOMETRY shape1, GEOMETRY shape2)` - Syntax -`BOOL ST_Contains(GEOMETRY shape1, GEOMETRY shape2)` +判断几何图形shape1是否完全能够包含几何图形shape2 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md index fc48a1ae2e3e64..27b085861463b0 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md @@ -1,11 +1,11 @@ # `ST_Distance_Sphere` ## description +### Syntax -计算地球两点之间的球面距离,单位为 米。传入的参数分别为X点的经度,X点的纬度,Y点的经度,Y点的纬度。 +`DOUBLE ST_Distance_Sphere(DOUBLE x_lng, DOUBLE x_lat, DOUBLE y_lng, DOUBLE x_lat)` - Syntax -`DOUBLE ST_Distance_Sphere(DOUBLE x_lng, DOUBLE x_lat, DOUBLE y_lng, DOUBLE x_lat)` +计算地球两点之间的球面距离,单位为 米。传入的参数分别为X点的经度,X点的纬度,Y点的经度,Y点的纬度。 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md index bdd99cbdc759fb..9f223c55ffa68f 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md @@ -1,11 +1,11 @@ # `ST_GeometryFromText`,`ST_GeomFromText` ## description +### Syntax -将一个WKT(Well Known Text)转化为对应的内存的几何形式 +`GEOMETRY ST_GeometryFromText(VARCHAR wkt)` - Syntax -`GEOMETRY ST_GeometryFromText(VARCHAR wkt)` +将一个WKT(Well Known Text)转化为对应的内存的几何形式 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_linefromtext.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_linefromtext.md index bad31994e470dd..29799f48e1e60c 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_linefromtext.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_linefromtext.md @@ -1,11 +1,11 @@ # `ST_LineFromText`,`ST_LineStringFromText` ## description +### Syntax -将一个WKT(Well Known Text)转化为一个Line形式的内存表现形式 +`GEOMETRY ST_LineFromText(VARCHAR wkt)` - Syntax -`GEOMETRY ST_LineFromText(VARCHAR wkt)` +将一个WKT(Well Known Text)转化为一个Line形式的内存表现形式 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_point.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_point.md index 19a43f9f1a0457..876ac0035b12e3 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_point.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_point.md @@ -1,12 +1,12 @@ # `ST_Point` ## description +### Syntax -通过给定的X坐标值,Y坐标值返回对应的Point。 -当前这个值只是在球面集合上有意义,X/Y对应的是经度/纬度(longitude/latitude) +`POINT ST_Point(DOUBLE x, DOUBLE y)` - Syntax -`POINT ST_Point(DOUBLE x, DOUBLE y)` +通过给定的X坐标值,Y坐标值返回对应的Point。 +当前这个值只是在球面集合上有意义,X/Y对应的是经度/纬度(longitude/latitude) ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_polygon.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_polygon.md index 74cd5a93da14ac..35392fd8c9683c 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_polygon.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_polygon.md @@ -1,11 +1,11 @@ # `ST_Polygon`,`ST_PolyFromText`,`ST_PolygonFromText` ## description +### Syntax -将一个WKT(Well Known Text)转化为对应的多边形内存形式 +`GEOMETRY ST_Polygon(VARCHAR wkt)` - Syntax -`GEOMETRY ST_Polygon(VARCHAR wkt)` +将一个WKT(Well Known Text)转化为对应的多边形内存形式 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_x.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_x.md index 523aecae93f270..f819e697c4f876 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_x.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_x.md @@ -1,11 +1,11 @@ # `ST_X` ## description +### Syntax -当point是一个合法的POINT类型时,返回对应的X坐标值 +`DOUBLE ST_X(POINT point)` - Syntax -`DOUBLE ST_X(POINT point)` +当point是一个合法的POINT类型时,返回对应的X坐标值 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_y.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_y.md index 6b23348b23a620..a8bc23b257ae28 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_y.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_y.md @@ -1,11 +1,11 @@ # `ST_Y` ## description +### Syntax -当point是一个合法的POINT类型时,返回对应的Y坐标值 +`DOUBLE ST_Y(POINT point)` - Syntax -`DOUBLE ST_Y(POINT point)` +当point是一个合法的POINT类型时,返回对应的Y坐标值 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/ascii.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/ascii.md index 832d791418ae10..0b1c2626dad3f2 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/ascii.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/ascii.md @@ -1,11 +1,11 @@ # ascii ## description +### Syntax -返回字符串第一个字符对应的 ascii 码 +`INT ascii(VARCHAR str)` - Syntax -`INT ascii(VARCHAR str)` +返回字符串第一个字符对应的 ascii 码 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat.md index 59600c30a8973c..85056e2c969821 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat.md @@ -1,11 +1,11 @@ # concat ## description +### Syntax -将多个字符串连接起来, 如果参数中任意一个值是 NULL,那么返回的结果就是 NULL +`VARCHAR concat(VARCHAR,...)` - Syntax -`VARCHAR concat(VARCHAR,...)` +将多个字符串连接起来, 如果参数中任意一个值是 NULL,那么返回的结果就是 NULL ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat_ws.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat_ws.md index 83e38b7b4e2aaf..44784208fae6ad 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat_ws.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat_ws.md @@ -1,14 +1,14 @@ # concat_ws ## description +### Syntax + +`VARCHAR concat_ws(VARCHAR sep, VARCHAR str,...)` + 使用第一个参数 sep 作为连接符,将第二个参数以及后续所有参数拼接成一个字符串. 如果分隔符是 NULL,返回 NULL。 `concat_ws`函数不会跳过空字符串,会跳过 NULL 值 - Syntax - -`VARCHAR concat_ws(VARCHAR sep, VARCHAR str,...)` - ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/find_in_set.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/find_in_set.md index 15e992ea5642d4..e66d41d7ac7d6a 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/find_in_set.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/find_in_set.md @@ -1,11 +1,11 @@ # find_in_set ## description +### Syntax -返回 strlist 中第一次出现 str 的位置(从1开始计数)。strlist 是用逗号分隔的字符串。如果没有找到,返回0。任意参数为 NULL ,返回 NULL。 +`INT find_in_set(VARCHAR str, VARCHAR strlist)` - Syntax -`INT find_in_set(VARCHAR str, VARCHAR strlist)` +返回 strlist 中第一次出现 str 的位置(从1开始计数)。strlist 是用逗号分隔的字符串。如果没有找到,返回0。任意参数为 NULL ,返回 NULL。 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_double.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_double.md index 7eff7cad1eb22f..c6f766e6fe515f 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_double.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_double.md @@ -1,5 +1,9 @@ # get_json_double ## description +### Syntax + +`DOUBLE get_json_double(VARCHAR json_str, VARCHAR json_path) + 解析并获取 json 字符串内指定路径的浮点型内容。 其中 json_path 必须以 $ 符号作为开头,使用 . 作为路径分割符。如果路径中包含 . ,则可以使用双引号包围。 @@ -7,10 +11,6 @@ path 的内容不能包含 ", [ 和 ]。 如果 json_string 格式不对,或 json_path 格式不对,或无法找到匹配项,则返回 NULL。 - Syntax - -`DOUBLE get_json_double(VARCHAR json_str, VARCHAR json_path) - ## example 1. 获取 key 为 "k1" 的 value diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_int.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_int.md index 5b6088223b350b..56366e4b956b16 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_int.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_int.md @@ -1,5 +1,9 @@ # get_json_int ## description +### Syntax + +`INT get_json_int(VARCHAR json_str, VARCHAR json_path) + 解析并获取 json 字符串内指定路径的整型内容。 其中 json_path 必须以 $ 符号作为开头,使用 . 作为路径分割符。如果路径中包含 . ,则可以使用双引号包围。 @@ -7,10 +11,6 @@ path 的内容不能包含 ", [ 和 ]。 如果 json_string 格式不对,或 json_path 格式不对,或无法找到匹配项,则返回 NULL。 - Syntax - -`INT get_json_int(VARCHAR json_str, VARCHAR json_path) - ## example 1. 获取 key 为 "k1" 的 value diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_string.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_string.md index eb377a5bf30e39..504de06f92c318 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_string.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_string.md @@ -1,5 +1,9 @@ # get_json_string ## description +### Syntax + +`VARCHAR get_json_string(VARCHAR json_str, VARCHAR json_path) + 解析并获取 json 字符串内指定路径的字符串内容。 其中 json_path 必须以 $ 符号作为开头,使用 . 作为路径分割符。如果路径中包含 . ,则可以使用双引号包围。 @@ -7,10 +11,6 @@ path 的内容不能包含 ", [ 和 ]。 如果 json_string 格式不对,或 json_path 格式不对,或无法找到匹配项,则返回 NULL。 - Syntax - -`VARCHAR get_json_string(VARCHAR json_str, VARCHAR json_path) - ## example 1. 获取 key 为 "k1" 的 value diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/group_concat.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/group_concat.md index 6a6f58d2d862f9..721f820543d25e 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/group_concat.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/group_concat.md @@ -1,11 +1,11 @@ # group_concat ## description +### Syntax -该函数是类似于 sum() 的聚合函数,group_concat 将结果集中的多行结果连接成一个字符串。第二个参数 sep 为字符串之间的连接符号,该参数可以省略。该函数通常需要和 group by 语句一起使用。 +`VARCHAR group_concat(VARCHAR str[, VARCHAR sep])` - Syntax -`VARCHAR group_concat(VARCHAR str[, VARCHAR sep])` +该函数是类似于 sum() 的聚合函数,group_concat 将结果集中的多行结果连接成一个字符串。第二个参数 sep 为字符串之间的连接符号,该参数可以省略。该函数通常需要和 group by 语句一起使用。 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/instr.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/instr.md index 8da641329705e6..5abaf02eadb24f 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/instr.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/instr.md @@ -1,11 +1,11 @@ # instr ## description +### Syntax -返回 substr 在 str 中第一次出现的位置(从1开始计数)。如果 substr 不在 str 中出现,则返回0。 +`INT instr(VARCHAR str, VARCHAR substr)` - Syntax -`INT instr(VARCHAR str, VARCHAR substr)` +返回 substr 在 str 中第一次出现的位置(从1开始计数)。如果 substr 不在 str 中出现,则返回0。 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lcase.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lcase.md index a56b914f568aee..3972cd48c19f4f 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lcase.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lcase.md @@ -1,11 +1,11 @@ # lcase ## description +### Syntax -与`lower`一致 +`INT lcase(VARCHAR str)` - Syntax -`INT lcase(VARCHAR str)` +与`lower`一致 ##keyword LCASE,LCASE diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/left.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/left.md index 6279e98aed8f96..dc0fa432343d23 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/left.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/left.md @@ -1,11 +1,11 @@ # left ## description +### Syntax -它返回具有指定长度的字符串的左边部分 +`VARCHAR left(VARCHAR str)` - Syntax -`VARCHAR left(VARCHAR str)` +它返回具有指定长度的字符串的左边部分 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/length.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/length.md index cc4bf04618c5df..1e959f9db0685c 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/length.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/length.md @@ -1,11 +1,11 @@ # length ## description +### Syntax -返回字符串的长度,对于多字节字符,返回的字符数。比如5个两字节宽度字,返回的长度是10。 +`INT length(VARCHAR str)` - Syntax -`INT length(VARCHAR str)` +返回字符串的长度,对于多字节字符,返回的字符数。比如5个两字节宽度字,返回的长度是10。 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/locate.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/locate.md index b7b178e0fa409a..bab569fe64447f 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/locate.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/locate.md @@ -1,11 +1,11 @@ # locate ## description +### Syntax -返回 substr 在 str 中出现的位置(从1开始计数)。如果指定第3个参数 pos,则从 str 以 pos 下标开始的字符串处开始查找 substr 出现的位置。如果没有找到,返回0 +`INT locate(VARCHAR substr, VARCHAR str[, INT pos])` - Syntax -`INT locate(VARCHAR substr, VARCHAR str[, INT pos])` +返回 substr 在 str 中出现的位置(从1开始计数)。如果指定第3个参数 pos,则从 str 以 pos 下标开始的字符串处开始查找 substr 出现的位置。如果没有找到,返回0 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lower.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lower.md index fe7c3e4aa606ac..d0b394388fd471 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lower.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lower.md @@ -1,11 +1,11 @@ # lower ## description +### Syntax -将参数中所有的字符串都转换成小写 +`INT lower(VARCHAR str)` - Syntax -`INT lower(VARCHAR str)` +将参数中所有的字符串都转换成小写 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lpad.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lpad.md index 178aa0eebcc954..060a784695bed8 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lpad.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lpad.md @@ -1,11 +1,11 @@ # lpad ## description +### Syntax -返回 str 中长度为 len(从首字母开始算起)的字符串。如果 len 大于 str 的长度,则在 str 的前面不断补充 pad 字符,直到该字符串的长度达到 len 为止。如果 len 小于 str 的长度,该函数相当于截断 str 字符串,只返回长度为 len 的字符串。 +`VARCHAR lpad(VARCHAR str, INT len, VARCHAR pad)` - Syntax -`VARCHAR lpad(VARCHAR str, INT len, VARCHAR pad)` +返回 str 中长度为 len(从首字母开始算起)的字符串。如果 len 大于 str 的长度,则在 str 的前面不断补充 pad 字符,直到该字符串的长度达到 len 为止。如果 len 小于 str 的长度,该函数相当于截断 str 字符串,只返回长度为 len 的字符串。 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/ltrim.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/ltrim.md index e7454e6f8e5aed..566c7b07a8dcb0 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/ltrim.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/ltrim.md @@ -1,11 +1,11 @@ # ltrim ## description +### Syntax -将参数 str 中从开始部分连续出现的空格去掉 +`VARCHAR ltrim(VARCHAR str)` - Syntax -`VARCHAR ltrim(VARCHAR str)` +将参数 str 中从开始部分连续出现的空格去掉 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/money_format.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/money_format.md index 11824e3fee3d1b..6abc5de94f408d 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/money_format.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/money_format.md @@ -1,11 +1,11 @@ # money_format ## description +### Syntax -将数字按照货币格式输出,整数部分每隔3位用逗号分隔,小数部分保留2位 +VARCHAR money_format(Number) - Syntax -VARCHAR money_format(Number) +将数字按照货币格式输出,整数部分每隔3位用逗号分隔,小数部分保留2位 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/regexp_extract.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/regexp_extract.md index 7ca0602a547d9d..5746b7b7b0cdd8 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/regexp_extract.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/regexp_extract.md @@ -1,11 +1,11 @@ # regexp_extract ## description +### Syntax -对字符串 str 进行正则匹配,抽取符合 pattern 的第 pos 个匹配部分。需要 pattern 完全匹配 str 中的某部分,这样才能返回 pattern 部分中需匹配部分。如果没有匹配,返回空字符串。 +`VARCHAR regexp_extract(VARCHAR str, VARCHAR pattern, int pos)` - Syntax -`VARCHAR regexp_extract(VARCHAR str, VARCHAR pattern, int pos)` +对字符串 str 进行正则匹配,抽取符合 pattern 的第 pos 个匹配部分。需要 pattern 完全匹配 str 中的某部分,这样才能返回 pattern 部分中需匹配部分。如果没有匹配,返回空字符串。 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/regexp_replace.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/regexp_replace.md index f4fc3b70f4e6c1..c6bdae2d96e723 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/regexp_replace.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/regexp_replace.md @@ -1,11 +1,11 @@ # regexp_replace ## description +### Syntax -对字符串 str 进行正则匹配, 将命中 pattern 的部分使用 repl 来进行替换 +`VARCHAR regexp_replace(VARCHAR str, VARCHAR pattern, VARCHAR repl) - Syntax -`VARCHAR regexp_replace(VARCHAR str, VARCHAR pattern, VARCHAR repl) +对字符串 str 进行正则匹配, 将命中 pattern 的部分使用 repl 来进行替换 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/repeat.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/repeat.md index 14a9a59d785434..ef8a49861ea884 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/repeat.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/repeat.md @@ -1,11 +1,11 @@ # repeat ## description +### Syntax -将字符串 str 重复 count 次输出,count 小于1时返回空串,str,count 任一为NULL时,返回 NULL +`VARCHAR repeat(VARCHAR str, INT count) - Syntax -`VARCHAR repeat(VARCHAR str, INT count) +将字符串 str 重复 count 次输出,count 小于1时返回空串,str,count 任一为NULL时,返回 NULL ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/right.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/right.md index 4c01a89bd05c79..9931cb594bab2c 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/right.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/right.md @@ -1,11 +1,11 @@ # right ## description +### Syntax -它返回具有指定长度的字符串的右边部分 +`VARCHAR right(VARCHAR str)` - Syntax -`VARCHAR right(VARCHAR str)` +它返回具有指定长度的字符串的右边部分 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/split_part.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/split_part.md index c2ef720da13f0c..80c8c25292771a 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/split_part.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/split_part.md @@ -1,11 +1,11 @@ # split_part ## description +### Syntax -根据分割符拆分字符串, 返回指定的分割部分(从一开始计数)。 +`VARCHAR split_part(VARCHAR content, VARCHAR delimiter, INT field)` - Syntax -`VARCHAR split_part(VARCHAR content, VARCHAR delimiter, INT field)` +根据分割符拆分字符串, 返回指定的分割部分(从一开始计数)。 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/strleft.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/strleft.md index 95cf82da514dbd..102b5786ed240f 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/strleft.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/strleft.md @@ -1,11 +1,11 @@ # strleft ## description +### Syntax -它返回具有指定长度的字符串的左边部分 +`VARCHAR strleft(VARCHAR str)` - Syntax -`VARCHAR strleft(VARCHAR str)` +它返回具有指定长度的字符串的左边部分 ## example diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/strright.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/strright.md index 56ebc34020153f..8dcf94dbc39290 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/strright.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/strright.md @@ -1,11 +1,11 @@ # strright ## description +### Syntax -它返回具有指定长度的字符串的右边部分 +`VARCHAR strright(VARCHAR str)` - Syntax -`VARCHAR strright(VARCHAR str)` +它返回具有指定长度的字符串的右边部分 ## example diff --git a/docs/documentation/cn/sql-reference/sql-statements/Account Management/SHOW ROLES.md b/docs/documentation/cn/sql-reference/sql-statements/Account Management/SHOW ROLES.md index 1b51f48b0799bd..21eb0e08e3fc23 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Account Management/SHOW ROLES.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Account Management/SHOW ROLES.md @@ -1,11 +1,11 @@ # SHOW ROLES +## description ## description 该语句用于展示所有已创建的角色信息,包括角色名称,包含的用户以及权限。 语法: SHOW ROLES; -## description 该语句用户删除一个角色 语法: diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/create-function.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/create-function.md index f2bf7953928c04..6e197e710596c2 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/create-function.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/create-function.md @@ -1,11 +1,6 @@ # CREATE FUNCTION ## description - -此语句创建一个自定义函数。执行此命令需要用户拥有 `ADMIN` 权限。 - -如果 `function_name` 中包含了数据库名字,那么这个自定义函数会创建在对应的数据库中,否则这个函数将会创建在当前会话所在的数据库。新函数的名字与参数不能够与当前命名空间中已存在的函数相同,否则会创建失败。但是只有名字相同,参数不同是能够创建成功的。 - - Syntax +### Syntax ``` CREATE [AGGREGATE] FUNCTION function_name @@ -15,7 +10,7 @@ CREATE [AGGREGATE] FUNCTION function_name [PROPERTIES ("key" = "value" [, ...]) ] ``` - Parameters +### Parameters > `AGGREGATE`: 如果有此项,表示的是创建的函数是一个聚合函数,否则创建的是一个标量函数。 > @@ -45,6 +40,11 @@ CREATE [AGGREGATE] FUNCTION function_name > > "md5": 函数动态链接库的MD5值,用于校验下载的内容是否正确。此选项是可选项 + +此语句创建一个自定义函数。执行此命令需要用户拥有 `ADMIN` 权限。 + +如果 `function_name` 中包含了数据库名字,那么这个自定义函数会创建在对应的数据库中,否则这个函数将会创建在当前会话所在的数据库。新函数的名字与参数不能够与当前命名空间中已存在的函数相同,否则会创建失败。但是只有名字相同,参数不同是能够创建成功的。 + ## example 1. 创建一个自定义标量函数 diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/drop-function.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/drop-function.md index b0ee8166cc893f..eac71eb7f74b78 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/drop-function.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/drop-function.md @@ -1,22 +1,22 @@ # DROP FUNCTION ## description - -删除一个自定义函数。函数的名字、参数类型完全一致才能够被删除 - - Syntax +### Syntax ``` DROP FUNCTION function_name (arg_type [, ...]) ``` - Parameters +### Parameters > `function_name`: 要删除函数的名字 > > `arg_type`: 要删除函数的参数列表 > + +删除一个自定义函数。函数的名字、参数类型完全一致才能够被删除 + ## example 1. 删除掉一个函数 diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/show-function.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/show-function.md index eeb84e0ff79631..b4f513654408d4 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/show-function.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/show-function.md @@ -1,20 +1,20 @@ # SHOW FUNCTION ## description - -查看数据库下所有的自定义函数。如果用户指定了数据库,那么查看对应数据库的,否则直接查询当前会话所在数据库 - -需要对这个数据库拥有 `SHOW` 权限 - - Syntax +### Syntax ``` SHOW FUNCTION [FROM db] ``` - Parameters +### Parameters > `db`: 要查询的数据库名字 + +查看数据库下所有的自定义函数。如果用户指定了数据库,那么查看对应数据库的,否则直接查询当前会话所在数据库 + +需要对这个数据库拥有 `SHOW` 权限 + ## example ``` diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md index 91ed85227e019f..a43d8f39b0612e 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md @@ -1,7 +1,6 @@ # INSERT ## description - - Syntax +### Syntax ``` INSERT INTO table_name @@ -11,7 +10,7 @@ INSERT INTO table_name { VALUES ( { expression | DEFAULT } [, ...] ) [, ...] | query } ``` - Parameters +### Parameters > tablet_name: 导入数据的目的表。可以是 `db_name.table_name` 形式 > @@ -28,7 +27,7 @@ INSERT INTO table_name > hint: 用于指示 `INSERT` 执行行为的一些指示符。`streaming` 和 默认的非 `streaming` 方式均会使用同步方式完成 `INSERT` 语句执行 > 非 `streaming` 方式在执行完成后会返回一个 label 方便用户通过 `SHOW LOAD` 查询导入的状态 - Note +### Note 当前执行 `INSERT` 语句时,对于有不符合目标表格式的数据,默认的行为是过滤,比如字符串超长等。但是对于有要求数据不能够被过滤的业务场景,可以通过设置会话变量 `enable_insert_strict` 为 `true` 来确保当有数据被过滤掉的时候,`INSERT` 不会被执行成功。 diff --git a/fe/src/main/java/org/apache/doris/common/MarkDownParser.java b/fe/src/main/java/org/apache/doris/common/MarkDownParser.java old mode 100644 new mode 100755 index 573f2333822230..c8aa2558febbb3 --- a/fe/src/main/java/org/apache/doris/common/MarkDownParser.java +++ b/fe/src/main/java/org/apache/doris/common/MarkDownParser.java @@ -133,7 +133,13 @@ private Map.Entry parseOneItem() { if (!lines.get(nextToRead).startsWith("#")) { sb.append(lines.get(nextToRead)).append('\n'); nextToRead++; - } else { + } + // Ignore head at level 3 or bigger + else if (lines.get(nextToRead).startsWith("###")) { + sb.append(lines.get(nextToRead).replaceAll("#","")).append('\n'); + nextToRead++; + } + else { break; } } From 0b362e4eb5ccb590923820e2a9ee886bf8e6fdc4 Mon Sep 17 00:00:00 2001 From: xy720 Date: Wed, 7 Aug 2019 12:45:24 +0800 Subject: [PATCH 6/9] Add a unit tests in MarkDownParserTest --- .../apache/doris/common/MarkDownParser.java | 7 ++-- .../doris/common/MarkDownParserTest.java | 39 +++++++++++++++++-- 2 files changed, 39 insertions(+), 7 deletions(-) mode change 100644 => 100755 fe/src/test/java/org/apache/doris/common/MarkDownParserTest.java diff --git a/fe/src/main/java/org/apache/doris/common/MarkDownParser.java b/fe/src/main/java/org/apache/doris/common/MarkDownParser.java index c8aa2558febbb3..0ad093ff91b40c 100755 --- a/fe/src/main/java/org/apache/doris/common/MarkDownParser.java +++ b/fe/src/main/java/org/apache/doris/common/MarkDownParser.java @@ -99,8 +99,9 @@ public Map> parse() throws UserException { } else if (headLevel == 2) { keyValues.put(keyValue.getKey(), keyValue.getValue()); } else { - // State error - throw new UserException("Unknown head level when parsing head level(2)"); + //Ignore headlevel greater than 2 instead of throwing a exception + //State error + //throw new UserException("Unknown head level when parsing head level(2)"); } break; default: @@ -134,7 +135,7 @@ private Map.Entry parseOneItem() { sb.append(lines.get(nextToRead)).append('\n'); nextToRead++; } - // Ignore head at level 3 or bigger + // Ignore headlevel greater than 2 else if (lines.get(nextToRead).startsWith("###")) { sb.append(lines.get(nextToRead).replaceAll("#","")).append('\n'); nextToRead++; diff --git a/fe/src/test/java/org/apache/doris/common/MarkDownParserTest.java b/fe/src/test/java/org/apache/doris/common/MarkDownParserTest.java old mode 100644 new mode 100755 index 3cd40e74844398..57fa3d1d287a3b --- a/fe/src/test/java/org/apache/doris/common/MarkDownParserTest.java +++ b/fe/src/test/java/org/apache/doris/common/MarkDownParserTest.java @@ -117,17 +117,48 @@ public void testNoFirst() throws UserException { Assert.fail("No exception throws."); } - @Test(expected = UserException.class) - public void testErrorState() throws UserException { +// When encounter a headlevel at 3 or greater, we ignore it rather than throw exception +// @Test(expected = UserException.class) +// public void testErrorState() throws UserException { +// List lines = Lists.newArrayList(); +// lines.add("# SHOW TABLES"); +// lines.add("## name"); +// lines.add("### name"); +// MarkDownParser parser = new MarkDownParser(lines); +// Map> map = parser.parse(); +// Assert.fail("No exception throws."); +// } + + @Test + public void testMultiHeadLevel() throws UserException { List lines = Lists.newArrayList(); lines.add("# SHOW TABLES"); lines.add("## name"); - lines.add("### name"); + lines.add(" SHOW TABLES"); + lines.add("## description"); + lines.add("###Syntax"); + lines.add("SYNTAX:\n\tSHOW TABLES [FROM] database"); + lines.add("####Parameter"); + lines.add(">table_name"); + lines.add("## example"); + lines.add("show tables;"); + lines.add("### Exam1"); + lines.add("exam1"); + lines.add("## keyword"); + lines.add("SHOW, TABLES"); + lines.add("## url"); + lines.add("http://www.baidu.com"); MarkDownParser parser = new MarkDownParser(lines); Map> map = parser.parse(); - Assert.fail("No exception throws."); + Assert.assertNotNull(map.get("SHOW TABLES")); + Assert.assertEquals(" SHOW TABLES\n", map.get("SHOW TABLES").get("name")); + Assert.assertEquals("Syntax\nSYNTAX:\n\tSHOW TABLES [FROM] database\nParameter\n>table_name\n", map.get("SHOW TABLES").get("description")); + Assert.assertEquals("show tables;\n Exam1\nexam1\n", map.get("SHOW TABLES").get("example")); + Assert.assertEquals("SHOW, TABLES\n", map.get("SHOW TABLES").get("keyword")); + Assert.assertEquals("http://www.baidu.com\n", map.get("SHOW TABLES").get("url")); } + @Test public void testEmptyTitle() throws UserException { List lines = Lists.newArrayList(); From 1646651b4eb40d6ca6ae5e40f683e12989f809d5 Mon Sep 17 00:00:00 2001 From: xy720 <22125576+xy720@users.noreply.github.com> Date: Wed, 7 Aug 2019 14:44:46 +0800 Subject: [PATCH 7/9] Update docs/Makefile Co-Authored-By: ZHAO Chun --- docs/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Makefile b/docs/Makefile index a51e1336b6c279..1efeb975d1b87a 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -20,7 +20,7 @@ # BUILD_DIR = ${CURDIR}/build -HELP_DIR = ${CURDIR}/contents +HELP_DIR = ${CURDIR}/build/contents all: zip_help .PHONY: all From e6d7e3fb5f22860c0d3808c0d7e5d6febfd2f66c Mon Sep 17 00:00:00 2001 From: xy720 Date: Wed, 7 Aug 2019 16:47:44 +0800 Subject: [PATCH 8/9] fix dup head and keywords, fix makefile --- docs/Makefile | 4 ++-- .../sql-functions/aggregate-functions/avg.md | 2 +- .../sql-functions/aggregate-functions/count.md | 2 +- .../sql-functions/aggregate-functions/max.md | 2 +- .../sql-functions/aggregate-functions/min.md | 2 +- .../sql-functions/aggregate-functions/ndv.md | 2 +- .../sql-functions/aggregate-functions/stddev.md | 2 +- .../sql-functions/aggregate-functions/sum.md | 2 +- .../aggregate-functions/var_samp.md | 2 +- .../aggregate-functions/variance.md | 2 +- .../cn/sql-reference/sql-functions/cast.md | 5 ++--- .../date-time-functions/datediff.md | 2 +- .../sql-functions/date-time-functions/day.md | 2 +- .../date-time-functions/dayname.md | 2 +- .../date-time-functions/dayofmonth.md | 2 +- .../date-time-functions/dayofweek.md | 2 +- .../date-time-functions/dayofyear.md | 2 +- .../sql-functions/date-time-functions/month.md | 2 +- .../date-time-functions/monthname.md | 2 +- .../sql-functions/date-time-functions/now.md | 2 +- .../date-time-functions/timediff.md | 2 +- .../date-time-functions/workofyear.md | 2 +- .../sql-functions/date-time-functions/year.md | 2 +- .../spatial-functions/st_astext.md | 2 +- .../spatial-functions/st_geometryfromtext.md | 2 +- .../spatial-functions/st_linefromtext.md | 2 +- .../spatial-functions/st_polygon.md | 2 +- .../sql-functions/string-functions/ascii.md | 2 +- .../sql-functions/string-functions/concat.md | 2 +- .../sql-functions/string-functions/instr.md | 2 +- .../sql-functions/string-functions/lcase.md | 2 +- .../sql-functions/string-functions/left.md | 2 +- .../sql-functions/string-functions/length.md | 2 +- .../sql-functions/string-functions/locate.md | 2 +- .../sql-functions/string-functions/lower.md | 2 +- .../sql-functions/string-functions/lpad.md | 2 +- .../sql-functions/string-functions/ltrim.md | 2 +- .../sql-functions/string-functions/repeat.md | 2 +- .../sql-functions/string-functions/right.md | 2 +- .../sql-functions/string-functions/strleft.md | 2 +- .../sql-functions/string-functions/strright.md | 2 +- .../Account Management/SHOW ROLES.md | 17 ----------------- .../Data Definition/create-function.md | 2 +- .../Data Definition/drop-function.md | 2 +- .../Data Definition/show-function.md | 2 +- .../Data Manipulation/broker_load.md | 2 +- .../sql-statements/Data Manipulation/insert.md | 2 +- .../sql-statements/Data Types/BIGINT.md | 2 +- .../sql-statements/Data Types/BOOLEAN.md | 2 +- .../sql-statements/Data Types/CHAR.md | 2 +- .../sql-statements/Data Types/DATE.md | 2 +- .../sql-statements/Data Types/DATETIME.md | 2 +- .../sql-statements/Data Types/DECIMAL.md | 2 +- .../sql-statements/Data Types/DOUBLE.md | 2 +- .../sql-statements/Data Types/FLOAT.md | 2 +- .../Data Types/HLL(HyperLogLog).md | 2 +- .../sql-statements/Data Types/INT.md | 2 +- .../sql-statements/Data Types/SMALLINT.md | 2 +- .../sql-statements/Data Types/TINYINT.md | 2 +- .../sql-statements/Data Types/VARCHAR.md | 2 +- 60 files changed, 61 insertions(+), 79 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index 1efeb975d1b87a..6d971114bc8806 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -20,7 +20,7 @@ # BUILD_DIR = ${CURDIR}/build -HELP_DIR = ${CURDIR}/build/contents +HELP_DIR = ${BUILD_DIR}/contents all: zip_help .PHONY: all @@ -34,7 +34,7 @@ ${HELP_DIR}: HELP_OUTPUT = ${BUILD_DIR}/help-resource.zip ${HELP_OUTPUT}: documentation/cn/sql-reference ${BUILD_DIR} ${HELP_DIR} cp -r $ SELECT datetime, AVG(distinct cost_time) FROM log_statis group by datetim ``` ##keyword -AVG,AVG +AVG diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count.md index f452fa21c34019..b8bdef43a55e96 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count.md @@ -32,4 +32,4 @@ MySQL > select count(distinct datetime) from log_statis group by datetime; +-------------------------------+ ``` ##keyword -COUNT,COUNT +COUNT diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/max.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/max.md index 29c2fdeab64b6f..fe4ecdadc63254 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/max.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/max.md @@ -17,4 +17,4 @@ MySQL > select max(scan_rows) from log_statis group by datetime; +------------------+ ``` ##keyword -MAX,MAX +MAX diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/min.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/min.md index be805ea192d848..3a6ce810f91f1f 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/min.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/min.md @@ -17,4 +17,4 @@ MySQL > select min(scan_rows) from log_statis group by datetime; +------------------+ ``` ##keyword -MIN,MIN +MIN diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/ndv.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/ndv.md index 62eebf873b3bc8..01a46f5b9a70d0 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/ndv.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/ndv.md @@ -19,4 +19,4 @@ MySQL > select ndv(query_id) from log_statis group by datetime; +-----------------+ ``` ##keyword -NDV,NDV +NDV diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev.md index f3db9b673e3cb0..3a84758a2783fc 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev.md @@ -24,4 +24,4 @@ MySQL > select stddev_pop(scan_rows) from log_statis group by datetime; +-------------------------+ ``` ##keyword -STDDEV,STDDEV_POP,STDDEV,STDDEV,POP +STDDEV,STDDEV_POP,POP diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/sum.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/sum.md index fb71b5154fc421..4d0959af1ddee5 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/sum.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/sum.md @@ -17,4 +17,4 @@ MySQL > select sum(scan_rows) from log_statis group by datetime; +------------------+ ``` ##keyword -SUM,SUM +SUM diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/var_samp.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/var_samp.md index 94d9f03270bfec..ac105c257f2447 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/var_samp.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/var_samp.md @@ -16,4 +16,4 @@ MySQL > select var_samp(scan_rows) from log_statis group by datetime; | 5.6227132145741789 | +-----------------------+ ##keyword -VAR_SAMP,VARIANCE_SAMP,VAR,SAMP,VARIANCE,SAMP +VAR_SAMP,VARIANCE_SAMP,VAR,SAMP,VARIANCE diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/variance.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/variance.md index 7a46a827b5f624..20c3a6fa152152 100755 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/variance.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/variance.md @@ -23,4 +23,4 @@ MySQL > select var_pop(scan_rows) from log_statis group by datetime; | 5.6230744719006163 | +----------------------+ ##keyword -VARIANCE,VAR_POP,VARIANCE_POP,VARIANCE,VAR,POP,VARIANCE,POP +VARIANCE,VAR_POP,VARIANCE_POP,VAR,POP diff --git a/docs/documentation/cn/sql-reference/sql-functions/cast.md b/docs/documentation/cn/sql-reference/sql-functions/cast.md index 4ac9937e0f5bb0..9b08ba6ea6cb04 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/cast.md +++ b/docs/documentation/cn/sql-reference/sql-functions/cast.md @@ -1,6 +1,5 @@ # CAST ## description -## description ### Syntax ``` @@ -9,7 +8,7 @@ cast (input as type) ### BIGINT type -### Syntax +### Syntax(BIGINT) ``` cast (input as BIGINT) ``` @@ -54,4 +53,4 @@ mysql> select cast(cast ("11.2" as double) as bigint); 1 row in set (0.00 sec) ``` ##keyword -CAST,CAST +CAST diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/datediff.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/datediff.md index 02484bfe5da1aa..67a8fedeba25b3 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/datediff.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/datediff.md @@ -29,4 +29,4 @@ mysql> select datediff(CAST('2010-11-30 23:59:59' AS DATETIME), CAST('2010-12-31 +-----------------------------------------------------------------------------------+ ``` ##keyword -DATEDIFF,DATEDIFF +DATEDIFF diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/day.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/day.md index 42f20893b6effe..cd926a4287d54e 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/day.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/day.md @@ -19,4 +19,4 @@ mysql> select day('1987-01-31'); | 31 | +----------------------------+ ##keyword -DAY,DAY +DAY diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayname.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayname.md index 2d0a08a0c7b566..872daa7cdc0a9c 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayname.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayname.md @@ -19,4 +19,4 @@ mysql> select dayname('2007-02-03 00:00:00'); | Saturday | +--------------------------------+ ##keyword -DAYNAME,DAYNAME +DAYNAME diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofmonth.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofmonth.md index efe881d4a93c19..7071b7d20bfda9 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofmonth.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofmonth.md @@ -19,4 +19,4 @@ mysql> select dayofmonth('1987-01-31'); | 31 | +-----------------------------------+ ##keyword -DAYOFMONTH,DAYOFMONTH +DAYOFMONTH diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofweek.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofweek.md index b314fa3ddf5b8d..63f8853e54edd6 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofweek.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofweek.md @@ -19,4 +19,4 @@ mysql> select dayofweek('2019-06-25'); | 3 | +----------------------------------+ ##keyword -DAYOFWEEK,DAYOFWEEK +DAYOFWEEK diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofyear.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofyear.md index e205909690b9a0..2d0bae42f6fb3c 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofyear.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/dayofyear.md @@ -19,4 +19,4 @@ mysql> select dayofyear('2007-02-03 00:00:00'); | 34 | +----------------------------------+ ##keyword -DAYOFYEAR,DAYOFYEAR +DAYOFYEAR diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/month.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/month.md index 82b0d0842afac6..7588b1f63cd8fb 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/month.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/month.md @@ -19,4 +19,4 @@ mysql> select month('1987-01-01'); | 1 | +-----------------------------+ ##keyword -MONTH,MONTH +MONTH diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/monthname.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/monthname.md index fc4c11ad10d7d0..8e0a6e0901666c 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/monthname.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/monthname.md @@ -19,4 +19,4 @@ mysql> select monthname('2008-02-03 00:00:00'); | February | +----------------------------------+ ##keyword -MONTHNAME,MONTHNAME +MONTHNAME diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/now.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/now.md index 3454d88f4c3bb0..71afb0f0f99be4 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/now.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/now.md @@ -17,4 +17,4 @@ mysql> select now(); | 2019-05-27 15:58:25 | +---------------------+ ##keyword -NOW,NOW +NOW diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/timediff.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/timediff.md index 09ed793f1ba52e..7ab169a3c57f8f 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/timediff.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/timediff.md @@ -43,4 +43,4 @@ mysql> SELECT TIMEDIFF('2019-01-01 00:00:00', NULL); +---------------------------------------+ ``` ##keyword -TIMEDIFF,TIMEDIFF +TIMEDIFF diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/workofyear.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/workofyear.md index 6226292752104a..3377af454e0309 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/workofyear.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/workofyear.md @@ -20,4 +20,4 @@ mysql> select weekofyear('2008-02-20 00:00:00'); | 8 | +-----------------------------------+ ##keyword -WEEKOFYEAR,WEEKOFYEAR +WEEKOFYEAR diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/year.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/year.md index 3662a773977c9e..1ed81559f893dc 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/year.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/year.md @@ -19,4 +19,4 @@ mysql> select year('1987-01-01'); | 1987 | +-----------------------------+ ##keyword -YEAR,YEAR +YEAR diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_astext.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_astext.md index c29c3c984b9db9..7e8c47c795d645 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_astext.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_astext.md @@ -18,4 +18,4 @@ mysql> SELECT ST_AsText(ST_Point(24.7, 56.7)); +---------------------------------+ ``` ##keyword -ST_ASTEXT,ST_ASWKT,ST,ASTEXT,ST,ASWKT +ST_ASTEXT,ST_ASWKT,ST,ASTEXT,ASWKT diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md index 9f223c55ffa68f..72dd9d7dfbaa58 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md @@ -18,4 +18,4 @@ mysql> SELECT ST_AsText(ST_GeometryFromText("LINESTRING (1 1, 2 2)")); +---------------------------------------------------------+ ``` ##keyword -ST_GEOMETRYFROMTEXT,ST_GEOMFROMTEXT,ST,GEOMETRYFROMTEXT,ST,GEOMFROMTEXT +ST_GEOMETRYFROMTEXT,ST_GEOMFROMTEXT,ST,GEOMETRYFROMTEXT,GEOMFROMTEXT diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_linefromtext.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_linefromtext.md index 29799f48e1e60c..7f7165cd9f5d65 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_linefromtext.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_linefromtext.md @@ -18,4 +18,4 @@ mysql> SELECT ST_AsText(ST_LineFromText("LINESTRING (1 1, 2 2)")); +---------------------------------------------------------+ ``` ##keyword -ST_LINEFROMTEXT,ST_LINESTRINGFROMTEXT,ST,LINEFROMTEXT,ST,LINESTRINGFROMTEXT +ST_LINEFROMTEXT,ST_LINESTRINGFROMTEXT,ST,LINEFROMTEXT,LINESTRINGFROMTEXT diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_polygon.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_polygon.md index 35392fd8c9683c..d1b810199e5fd6 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_polygon.md +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_polygon.md @@ -18,4 +18,4 @@ mysql> SELECT ST_AsText(ST_Polygon("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))")); +------------------------------------------------------------------+ ``` ##keyword -ST_POLYGON,ST_POLYFROMTEXT,ST_POLYGONFROMTEXT,ST,POLYGON,ST,POLYFROMTEXT,ST,POLYGONFROMTEXT +ST_POLYGON,ST_POLYFROMTEXT,ST_POLYGONFROMTEXT,ST,POLYGON,POLYFROMTEXT,POLYGONFROMTEXT diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/ascii.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/ascii.md index 0b1c2626dad3f2..49f96fa28409b7 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/ascii.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/ascii.md @@ -25,4 +25,4 @@ mysql> select ascii('234'); +--------------+ ``` ##keyword -ASCII,ASCII +ASCII diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat.md index 85056e2c969821..4bcae3df62183c 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/concat.md @@ -32,4 +32,4 @@ mysql> select concat("a", null, "c"); +------------------------+ ``` ##keyword -CONCAT,CONCAT +CONCAT diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/instr.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/instr.md index 5abaf02eadb24f..65db339d3b7592 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/instr.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/instr.md @@ -25,4 +25,4 @@ mysql> select instr("abc", "d"); +-------------------+ ``` ##keyword -INSTR,INSTR +INSTR diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lcase.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lcase.md index 3972cd48c19f4f..7acb6840be8b18 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lcase.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lcase.md @@ -8,4 +8,4 @@ 与`lower`一致 ##keyword -LCASE,LCASE +LCASE diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/left.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/left.md index dc0fa432343d23..705b3f88655146 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/left.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/left.md @@ -18,4 +18,4 @@ mysql> select left("Hello doris",5); +------------------------+ ``` ##keyword -LEFT,LEFT +LEFT diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/length.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/length.md index 1e959f9db0685c..fa852edd50e67d 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/length.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/length.md @@ -25,4 +25,4 @@ mysql> select length("中国"); +------------------+ ``` ##keyword -LENGTH,LENGTH +LENGTH diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/locate.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/locate.md index bab569fe64447f..e239904e014287 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/locate.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/locate.md @@ -32,4 +32,4 @@ mysql> SELECT LOCATE('bar', 'foobarbar', 5); +-------------------------------+ ``` ##keyword -LOCATE,LOCATE +LOCATE diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lower.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lower.md index d0b394388fd471..00f3e20313aa64 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lower.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lower.md @@ -18,4 +18,4 @@ mysql> SELECT lower("AbC123"); +-----------------+ ``` ##keyword -LOWER,LOWER +LOWER diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lpad.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lpad.md index 060a784695bed8..41c836a168b047 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/lpad.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/lpad.md @@ -25,4 +25,4 @@ mysql> SELECT lpad("hi", 1, "xy"); +---------------------+ ``` ##keyword -LPAD,LPAD +LPAD diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/ltrim.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/ltrim.md index 566c7b07a8dcb0..c3c8f04c383a7a 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/ltrim.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/ltrim.md @@ -18,4 +18,4 @@ mysql> SELECT ltrim(' ab d'); +------------------+ ``` ##keyword -LTRIM,LTRIM +LTRIM diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/repeat.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/repeat.md index ef8a49861ea884..71885baaba7a39 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/repeat.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/repeat.md @@ -25,4 +25,4 @@ mysql> SELECT repeat("a", -1); +-----------------+ ``` ##keyword -REPEAT,REPEAT +REPEAT, diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/right.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/right.md index 9931cb594bab2c..e5fb9a16cd7a04 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/right.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/right.md @@ -18,4 +18,4 @@ mysql> select right("Hello doris",5); +-------------------------+ ``` ##keyword -RIGHT,RIGHT +RIGHT diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/strleft.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/strleft.md index 102b5786ed240f..c2c4d904a81d91 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/strleft.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/strleft.md @@ -18,4 +18,4 @@ mysql> select strleft("Hello doris",5); +------------------------+ ``` ##keyword -STRLEFT,STRLEFT +STRLEFT diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/strright.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/strright.md index 8dcf94dbc39290..7976d9288c4dbd 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/string-functions/strright.md +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/strright.md @@ -18,4 +18,4 @@ mysql> select strright("Hello doris",5); +-------------------------+ ``` ##keyword -STRRIGHT,STRRIGHT +STRRIGHT diff --git a/docs/documentation/cn/sql-reference/sql-statements/Account Management/SHOW ROLES.md b/docs/documentation/cn/sql-reference/sql-statements/Account Management/SHOW ROLES.md index 21eb0e08e3fc23..b25119cedfa1dc 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Account Management/SHOW ROLES.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Account Management/SHOW ROLES.md @@ -1,17 +1,9 @@ # SHOW ROLES -## description ## description 该语句用于展示所有已创建的角色信息,包括角色名称,包含的用户以及权限。 语法: SHOW ROLES; - - 该语句用户删除一个角色 - - 语法: - DROP ROLE role1; - - 删除一个角色,不会影响之前属于该角色的用户的权限。仅相当于将该角色与用户解耦。用户已经从该角色中获取到的权限,不会改变。 ## example @@ -22,12 +14,3 @@ ## keyword SHOW,ROLES -## example - - 1. 删除一个角色 - - DROP ROLE role1; - -## keyword - DROP, ROLE - diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/create-function.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/create-function.md index 6e197e710596c2..00e2d609170f1d 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/create-function.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/create-function.md @@ -68,4 +68,4 @@ CREATE AGGREGATE FUNCTION my_count (BIGINT) RETURNS BIGINT PROPERTIES ( ); ``` ##keyword -CREATE,FUNCTION,CREATE,FUNCTION +CREATE,FUNCTION diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/drop-function.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/drop-function.md index eac71eb7f74b78..419059f68d070d 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/drop-function.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/drop-function.md @@ -25,4 +25,4 @@ DROP FUNCTION function_name DROP FUNCTION my_add(INT, INT) ``` ##keyword -DROP,FUNCTION,DROP,FUNCTION +DROP,FUNCTION diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/show-function.md b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/show-function.md index b4f513654408d4..edd64917304286 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Definition/show-function.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Definition/show-function.md @@ -34,4 +34,4 @@ Intermediate Type: NULL 2 rows in set (0.00 sec) ``` ##keyword -SHOW,FUNCTION,SHOW,FUNCTION +SHOW,FUNCTION diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/broker_load.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/broker_load.md index 3f83280c16da4d..b6b445a1b3ceeb 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/broker_load.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/broker_load.md @@ -363,4 +363,4 @@ WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); ## keyword - BROKER LOAD + BROKER,LOAD diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md index a43d8f39b0612e..14e3cfe9f7751c 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/insert.md @@ -77,4 +77,4 @@ INSERT INTO test (c1, c2) SELECT * from test2 由于Doris之前的导入方式都是异步导入方式,为了兼容旧有的使用习惯,不加 streaming 的 `INSERT` 语句依旧会返回一个 label,用户需要通过`SHOW LOAD`命令查看此`label`导入作业的状态。 ##keyword -INSERT,INSERT +INSERT diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/BIGINT.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/BIGINT.md index 13b0e928f3d807..af2e6bb8296e3b 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Types/BIGINT.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/BIGINT.md @@ -4,4 +4,4 @@ 8字节有符号整数,范围[-9223372036854775808, 9223372036854775807] ##keyword -BIGINT,BIGINT +BIGINT diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/BOOLEAN.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/BOOLEAN.md index fdf8117aff75e7..caa44dc17fa96e 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Types/BOOLEAN.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/BOOLEAN.md @@ -4,4 +4,4 @@ 与TINYINT一样,0代表false,1代表true ##keyword -BOOLEAN,BOOLEAN +BOOLEAN diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/CHAR.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/CHAR.md index edf060a74af7c6..df645ae99eec48 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Types/CHAR.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/CHAR.md @@ -4,4 +4,4 @@ 定长字符串,M代表的是定长字符串的长度。M的范围是1-255 ##keyword -CHAR,CHAR +CHAR diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/DATE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/DATE.md index 978c5ef953d962..d0082d7f674d07 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Types/DATE.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/DATE.md @@ -12,4 +12,4 @@ -> '2003-12-31' ##keyword -DATE,DATE +DATE diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/DATETIME.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/DATETIME.md index 90332c68ca744c..858e838c633d10 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Types/DATETIME.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/DATETIME.md @@ -5,4 +5,4 @@ 打印的形式是'YYYY-MM-DD HH:MM:SS' ##keyword -DATETIME,DATETIME +DATETIME diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/DECIMAL.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/DECIMAL.md index 4807798e46aa22..26e91fe0c42bc0 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Types/DECIMAL.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/DECIMAL.md @@ -5,4 +5,4 @@ M的范围是[1,27], D的范围[1, 9], 另外,M必须要大于等于D的取值。默认的D取值为0 ##keyword -DECIMAL,DECIMAL +DECIMAL diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/DOUBLE.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/DOUBLE.md index 17136524824525..6334fe5223b8ae 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Types/DOUBLE.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/DOUBLE.md @@ -4,4 +4,4 @@ 8字节浮点数 ##keyword -DOUBLE,DOUBLE +DOUBLE diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/FLOAT.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/FLOAT.md index 6410033ed8d5d1..fe72e134a44697 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Types/FLOAT.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/FLOAT.md @@ -4,4 +4,4 @@ 4字节浮点数 ##keyword -FLOAT,FLOAT +FLOAT diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/HLL(HyperLogLog).md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/HLL(HyperLogLog).md index 9ad2ed887139d6..0d6ded08d88b41 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Types/HLL(HyperLogLog).md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/HLL(HyperLogLog).md @@ -6,4 +6,4 @@ 并且HLL列只能通过配套的hll_union_agg、hll_raw_agg、hll_cardinality、hll_hash进行查询或使用 ##keyword -HLL(HYPERLOGLOG),HLL(HYPERLOGLOG) +HLL(HYPERLOGLOG),HLL,HYPERLOGLOG diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/INT.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/INT.md index 8943fe20afe6da..3632659b9158f4 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Types/INT.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/INT.md @@ -4,4 +4,4 @@ 4字节有符号整数,范围[-2147483648, 2147483647] ##keyword -INT,INT +INT diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/SMALLINT.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/SMALLINT.md index 7cb74ce06ffcda..6fc2e83235af36 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Types/SMALLINT.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/SMALLINT.md @@ -4,4 +4,4 @@ 2字节有符号整数,范围[-32768, 32767] ##keyword -SMALLINT,SMALLINT +SMALLINT diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/TINYINT.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/TINYINT.md index cbdbba67c9f0a0..c9ba5ed88d734c 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Types/TINYINT.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/TINYINT.md @@ -4,4 +4,4 @@ 1字节有符号整数,范围[-128, 127] ##keyword -TINYINT,TINYINT +TINYINT diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Types/VARCHAR.md b/docs/documentation/cn/sql-reference/sql-statements/Data Types/VARCHAR.md index 8e8cbb7d2b5a5a..dc3dc5adce2e6a 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Types/VARCHAR.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Types/VARCHAR.md @@ -4,4 +4,4 @@ 变长字符串,M代表的是变长字符串的长度。M的范围是1-65535 ##keyword -VARCHAR,VARCHAR +VARCHAR From c0735a1126a03094237346a1486a16c2addc82df Mon Sep 17 00:00:00 2001 From: xy720 Date: Thu, 29 Aug 2019 10:07:37 +0800 Subject: [PATCH 9/9] Add english documentation --- .../administrator-guide/backup-restore_EN.md | 160 +++++ .../administrator-guide/colocation-join_EN.md | 421 ++++++++++++ .../administrator-guide/export_manual_EN.md | 165 +++++ .../http-actions/fe-get-log-file_EN.md | 52 ++ .../http-actions/index.rst | 9 + .../en/administrator-guide/index.rst | 17 + .../load-data/broker-load-manual_EN.md | 449 ++++++++++++ .../administrator-guide/load-data/index.rst | 12 + .../load-data/insert-into-manual_EN.md | 137 ++++ .../load-data/load-manual_EN.md | 172 +++++ .../load-data/routine-load-manual_EN.md | 220 ++++++ .../load-data/stream-load-manual_EN.md | 259 +++++++ .../administrator-guide/operation/index.rst | 9 + .../operation/metadata-operation_EN.md | 297 ++++++++ .../operation/monitor-alert_EN.md | 283 ++++++++ .../operation/multi-tenant_EN.md | 212 ++++++ .../operation/tablet-meta-tool_EN.md | 86 +++ .../operation/tablet-repair-and-balance_EN.md | 641 ++++++++++++++++++ .../en/administrator-guide/privilege_EN.md | 188 +++++ .../administrator-guide/small-file-mgr_EN.md | 78 +++ docs/documentation/en/community/gitter_EN.md | 37 + .../en/community/how-to-contribute_EN.md | 55 ++ docs/documentation/en/community/index.rst | 8 + docs/documentation/en/community/members_EN.md | 31 + .../en/community/pull-request_EN.md | 233 +++++++ .../en/community/release-process_EN.md | 566 ++++++++++++++++ .../en/community/subscribe-mail-list_EN.md | 43 ++ .../en/community/verify-apache-release_EN.md | 100 +++ .../en/extending-doris/doris-on-es_EN.md | 205 ++++++ .../en/extending-doris/index.rst | 8 + .../user-defined-function_EN.md | 92 +++ .../en/getting-started/advance-usage_EN.md | 246 +++++++ .../en/getting-started/basic-usage_EN.md | 355 ++++++++++ .../en/getting-started/best-practice_EN.md | 163 +++++ .../getting-started/data-model-rollup_EN.md | 612 +++++++++++++++++ .../en/getting-started/data-partition_EN.md | 269 ++++++++ .../en/getting-started/hit-the-rollup_EN.md | 268 ++++++++ .../en/getting-started/index.rst | 12 + .../en/installing/compilation.md | 77 +++ .../en/installing/compilation_EN.md | 76 +++ docs/documentation/en/installing/index.rst | 9 + .../en/installing/install-deploy.md | 380 +++++++++++ .../en/installing/install-deploy_EN.md | 391 +++++++++++ docs/documentation/en/installing/upgrade.md | 37 + .../documentation/en/installing/upgrade_EN.md | 37 + .../internal/doris_storage_optimization_EN.md | 206 ++++++ docs/documentation/en/internal/index.rst | 8 + .../en/internal/metadata-design_EN.md | 100 +++ docs/documentation/en/sql-reference/index.rst | 9 + .../aggregate-functions/avg_EN.md | 31 + .../aggregate-functions/count_EN.md | 35 + .../aggregate-functions/count_distinct_EN.md | 21 + .../aggregate-functions/hll_union_agg_EN.md | 26 + .../aggregate-functions/index.rst | 8 + .../aggregate-functions/max_EN.md | 20 + .../aggregate-functions/min_EN.md | 20 + .../aggregate-functions/ndv_EN.md | 22 + .../percentile_approx_EN.md | 21 + .../aggregate-functions/stddev_EN.md | 27 + .../aggregate-functions/stddev_samp_EN.md | 20 + .../aggregate-functions/sum_EN.md | 20 + .../aggregate-functions/var_samp_EN.md | 19 + .../aggregate-functions/variance_EN.md | 26 + .../en/sql-reference/sql-functions/cast_EN.md | 56 ++ .../current_timestamp_EN.md | 20 + .../date-time-functions/date_add_EN.md | 26 + .../date-time-functions/date_format_EN.md | 123 ++++ .../date-time-functions/date_sub_EN.md | 26 + .../date-time-functions/datediff_EN.md | 32 + .../date-time-functions/day_EN.md | 22 + .../date-time-functions/dayname_EN.md | 22 + .../date-time-functions/dayofmonth_EN.md | 22 + .../date-time-functions/dayofweek_EN.md | 22 + .../date-time-functions/dayofyear_EN.md | 22 + .../date-time-functions/from_days_EN.md | 20 + .../date-time-functions/from_unixtime_EN.md | 42 ++ .../date-time-functions/index.rst | 8 + .../date-time-functions/month_EN.md | 22 + .../date-time-functions/monthname_EN.md | 22 + .../date-time-functions/now_EN.md | 20 + .../date-time-functions/str_to_date_EN.md | 29 + .../date-time-functions/timediff_EN.md | 46 ++ .../date-time-functions/to_days_EN.md | 22 + .../date-time-functions/unix_timestamp_EN.md | 31 + .../date-time-functions/utc_timestamp_EN.md | 24 + .../date-time-functions/workofyear_EN.md | 23 + .../date-time-functions/year_EN.md | 22 + .../en/sql-reference/sql-functions/index.rst | 16 + .../sql-functions/spatial-functions/index.rst | 8 + .../spatial-functions/st_astext_EN.md | 21 + .../spatial-functions/st_circle_EN.md | 22 + .../spatial-functions/st_contains_EN.md | 28 + .../st_distance_sphere_EN.md | 21 + .../st_geometryfromtext_EN.md | 21 + .../spatial-functions/st_linefromtext_EN.md | 21 + .../spatial-functions/st_point_EN.md | 22 + .../spatial-functions/st_polygon_EN.md | 21 + .../spatial-functions/st_x_EN.md | 21 + .../spatial-functions/st_y_EN.md | 21 + .../string-functions/ascii_EN.md | 28 + .../string-functions/concat_EN.md | 35 + .../string-functions/concat_ws_EN.md | 37 + .../string-functions/find_in_set_EN.md | 21 + .../string-functions/get_json_double_EN.md | 48 ++ .../string-functions/get_json_int_EN.md | 48 ++ .../string-functions/get_json_string_EN.md | 58 ++ .../string-functions/group_concat_EN.md | 37 + .../sql-functions/string-functions/index.rst | 8 + .../string-functions/instr_EN.md | 28 + .../string-functions/lcase_EN.md | 11 + .../sql-functions/string-functions/left_EN.md | 21 + .../string-functions/length_EN.md | 28 + .../string-functions/locate_EN.md | 35 + .../string-functions/lower_EN.md | 21 + .../sql-functions/string-functions/lpad_EN.md | 28 + .../string-functions/ltrim_EN.md | 21 + .../string-functions/money_format_EN.md | 35 + .../string-functions/regexp_extract_EN.md | 28 + .../string-functions/regexp_replace_EN.md | 28 + .../string-functions/repeat_EN.md | 28 + .../string-functions/right_EN.md | 21 + .../string-functions/split_part_EN.md | 43 ++ .../string-functions/strleft_EN.md | 21 + .../string-functions/strright_EN.md | 21 + .../Account Management/CREATE ROLE_EN.md | 19 + .../Account Management/CREATE USER_EN.md | 48 ++ .../Account Management/DROP ROLE_EN.md | 17 + .../Account Management/DROP USER_EN.md | 17 + .../Account Management/GRANT_EN.md | 55 ++ .../Account Management/REVOKE_EN.md | 22 + .../Account Management/SET PASSWORD_EN.md | 29 + .../Account Management/SET PROPERTY_EN.md | 56 ++ .../Account Management/SHOW GRANTS_EN.md | 30 + .../Account Management/SHOW ROLES_EN.md | 15 + .../Administration/ADMIN CANCEL REPAIR_EN.md | 21 + .../Administration/ADMIN REPAIR_EN.md | 26 + .../Administration/ADMIN SET CONFIG_EN.md | 18 + .../Administration/ADMIN SHOW CONFIG_EN.md | 27 + .../ADMIN SHOW REPLICA DISTRIBUTION_EN.md | 25 + .../ADMIN SHOW REPLICA STATUS_EN.md | 38 ++ .../Administration/ALTER CLUSTER_EN.md | 27 + .../Administration/ALTER SYSTEM_EN.md | 93 +++ .../Administration/CANCEL DECOMMISSION_EN.md | 14 + .../Administration/CREATE CLUSTER_EN.md | 35 + .../Administration/CREATE FILE_EN.md | 50 ++ .../Administration/DROP CLUSTER_EN.md | 17 + .../Administration/DROP FILE_EN.md | 25 + .../sql-statements/Administration/ENTER_EN.md | 18 + .../Administration/LINK DATABASE_EN.md | 23 + .../Administration/MIGRATE DATABASE_EN.md | 19 + .../Administration/SHOW BACKENDS_EN.md | 21 + .../Administration/SHOW BROKER_EN.md | 14 + .../Administration/SHOW FILE_EN.md | 26 + .../Administration/SHOW FRONTENDS_EN.md | 17 + .../Administration/SHOW MIGRATIONS_EN.md | 11 + .../Data Definition/ALTER DATABASE_EN.md | 29 + .../Data Definition/ALTER TABLE_EN.md | 240 +++++++ .../Data Definition/BACKUP_EN.md | 39 ++ .../Data Definition/CANCEL ALTER_EN.md | 32 + .../Data Definition/CANCEL BACKUP_EN.md | 13 + .../Data Definition/CANCEL RESTORE_EN.md | 16 + .../Data Definition/CREATE DATABASE_EN.md | 13 + .../Data Definition/CREATE REPOSITORY_EN.md | 49 ++ .../Data Definition/CREATE TABLE_EN.md | 334 +++++++++ .../Data Definition/CREATE VIEW_EN.md | 22 + .../Data Definition/Colocate Join_EN.md | 72 ++ .../Data Definition/DROP DATABASE_EN.md | 16 + .../Data Definition/DROP REPOSITORY_EN.md | 15 + .../Data Definition/DROP TABLE_EN.md | 19 + .../Data Definition/DROP VIEW_EN.md | 14 + .../sql-statements/Data Definition/HLL_EN.md | 79 +++ .../Data Definition/RECOVER_EN.md | 28 + .../Data Definition/RESTORE_EN.md | 52 ++ .../Data Definition/TRUNCATE TABLE_EN.md | 26 + .../Data Definition/create-function_EN.md | 71 ++ .../Data Definition/drop-function_EN.md | 28 + .../Data Definition/show-function_EN.md | 37 + .../Data Manipulation/CANCEL DELETE_EN.md | 10 + .../Data Manipulation/CANCEL LABEL_EN.md | 27 + .../Data Manipulation/CANCEL LOAD_EN.md | 19 + .../Data Manipulation/DELETE_EN.md | 36 + .../Data Manipulation/EXPORT_EN.md | 57 ++ .../Data Manipulation/GET LABEL STATE_EN.md | 32 + .../Data Manipulation/LOAD_EN.md | 284 ++++++++ .../Data Manipulation/MINI LOAD_EN.md | 103 +++ .../Data Manipulation/MULTI LOAD_EN.md | 81 +++ .../PAUSE ROUTINE LOAD_EN.md | 9 + .../Data Manipulation/RESTORE TABLET_EN.md | 15 + .../RESUME ROUTINE LOAD_EN.md | 9 + .../Data Manipulation/ROUTINE LOAD_EN.md | 248 +++++++ .../Data Manipulation/SHOW ALTER_EN.md | 25 + .../Data Manipulation/SHOW BACKUP_EN.md | 36 + .../Data Manipulation/SHOW DATABASES_EN.md | 9 + .../Data Manipulation/SHOW DATA_EN.md | 20 + .../Data Manipulation/SHOW DELETE_EN.md | 13 + .../Data Manipulation/SHOW EXPORT_EN.md | 36 + .../Data Manipulation/SHOW LOAD_EN.md | 48 ++ .../Data Manipulation/SHOW PARTITIONS_EN.md | 16 + .../Data Manipulation/SHOW PROPERTY_EN.md | 16 + .../Data Manipulation/SHOW REPOSITORIES_EN.md | 23 + .../Data Manipulation/SHOW RESTORE_EN.md | 41 ++ .../SHOW ROUTINE LOAD TASK_EN.md | 9 + .../Data Manipulation/SHOW ROUTINE LOAD_EN.md | 31 + .../Data Manipulation/SHOW SNAPSHOT_EN.md | 30 + .../Data Manipulation/SHOW TABLES_EN.md | 8 + .../Data Manipulation/SHOW TABLET_EN.md | 16 + .../Data Manipulation/STOP ROUTINE LOAD_EN.md | 9 + .../Data Manipulation/STREAM LOAD_EN.md | 91 +++ .../Data Manipulation/broker_load_EN.md | 366 ++++++++++ .../Data Manipulation/insert_EN.md | 80 +++ .../sql-statements/Data Types/BIGINT_EN.md | 7 + .../sql-statements/Data Types/BOOLEAN_EN.md | 7 + .../sql-statements/Data Types/CHAR_EN.md | 7 + .../sql-statements/Data Types/DATETIME_EN.md | 8 + .../sql-statements/Data Types/DATE_EN.md | 15 + .../sql-statements/Data Types/DECIMAL_EN.md | 8 + .../sql-statements/Data Types/DOUBLE_EN.md | 7 + .../sql-statements/Data Types/FLOAT_EN.md | 7 + .../Data Types/HLL(HyperLogLog)_EN.md | 9 + .../sql-statements/Data Types/INT_EN.md | 7 + .../sql-statements/Data Types/SMALLINT_EN.md | 7 + .../sql-statements/Data Types/TINYINT_EN.md | 7 + .../sql-statements/Data Types/VARCHAR_EN.md | 7 + .../sql-statements/Utility/util_stmt_EN.md | 13 + .../en/sql-reference/sql-statements/index.rst | 8 + 225 files changed, 14615 insertions(+) create mode 100644 docs/documentation/en/administrator-guide/backup-restore_EN.md create mode 100644 docs/documentation/en/administrator-guide/colocation-join_EN.md create mode 100644 docs/documentation/en/administrator-guide/export_manual_EN.md create mode 100644 docs/documentation/en/administrator-guide/http-actions/fe-get-log-file_EN.md create mode 100644 docs/documentation/en/administrator-guide/http-actions/index.rst create mode 100644 docs/documentation/en/administrator-guide/index.rst create mode 100644 docs/documentation/en/administrator-guide/load-data/broker-load-manual_EN.md create mode 100644 docs/documentation/en/administrator-guide/load-data/index.rst create mode 100644 docs/documentation/en/administrator-guide/load-data/insert-into-manual_EN.md create mode 100644 docs/documentation/en/administrator-guide/load-data/load-manual_EN.md create mode 100644 docs/documentation/en/administrator-guide/load-data/routine-load-manual_EN.md create mode 100644 docs/documentation/en/administrator-guide/load-data/stream-load-manual_EN.md create mode 100644 docs/documentation/en/administrator-guide/operation/index.rst create mode 100644 docs/documentation/en/administrator-guide/operation/metadata-operation_EN.md create mode 100644 docs/documentation/en/administrator-guide/operation/monitor-alert_EN.md create mode 100644 docs/documentation/en/administrator-guide/operation/multi-tenant_EN.md create mode 100644 docs/documentation/en/administrator-guide/operation/tablet-meta-tool_EN.md create mode 100644 docs/documentation/en/administrator-guide/operation/tablet-repair-and-balance_EN.md create mode 100644 docs/documentation/en/administrator-guide/privilege_EN.md create mode 100644 docs/documentation/en/administrator-guide/small-file-mgr_EN.md create mode 100644 docs/documentation/en/community/gitter_EN.md create mode 100644 docs/documentation/en/community/how-to-contribute_EN.md create mode 100644 docs/documentation/en/community/index.rst create mode 100644 docs/documentation/en/community/members_EN.md create mode 100644 docs/documentation/en/community/pull-request_EN.md create mode 100644 docs/documentation/en/community/release-process_EN.md create mode 100644 docs/documentation/en/community/subscribe-mail-list_EN.md create mode 100644 docs/documentation/en/community/verify-apache-release_EN.md create mode 100644 docs/documentation/en/extending-doris/doris-on-es_EN.md create mode 100644 docs/documentation/en/extending-doris/index.rst create mode 100644 docs/documentation/en/extending-doris/user-defined-function_EN.md create mode 100644 docs/documentation/en/getting-started/advance-usage_EN.md create mode 100644 docs/documentation/en/getting-started/basic-usage_EN.md create mode 100644 docs/documentation/en/getting-started/best-practice_EN.md create mode 100644 docs/documentation/en/getting-started/data-model-rollup_EN.md create mode 100644 docs/documentation/en/getting-started/data-partition_EN.md create mode 100644 docs/documentation/en/getting-started/hit-the-rollup_EN.md create mode 100644 docs/documentation/en/getting-started/index.rst create mode 100644 docs/documentation/en/installing/compilation.md create mode 100644 docs/documentation/en/installing/compilation_EN.md create mode 100644 docs/documentation/en/installing/index.rst create mode 100644 docs/documentation/en/installing/install-deploy.md create mode 100644 docs/documentation/en/installing/install-deploy_EN.md create mode 100644 docs/documentation/en/installing/upgrade.md create mode 100644 docs/documentation/en/installing/upgrade_EN.md create mode 100644 docs/documentation/en/internal/doris_storage_optimization_EN.md create mode 100644 docs/documentation/en/internal/index.rst create mode 100644 docs/documentation/en/internal/metadata-design_EN.md create mode 100644 docs/documentation/en/sql-reference/index.rst create mode 100644 docs/documentation/en/sql-reference/sql-functions/aggregate-functions/avg_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/aggregate-functions/count_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/aggregate-functions/count_distinct_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/aggregate-functions/hll_union_agg_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/aggregate-functions/index.rst create mode 100644 docs/documentation/en/sql-reference/sql-functions/aggregate-functions/max_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/aggregate-functions/min_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/aggregate-functions/ndv_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/aggregate-functions/percentile_approx_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/aggregate-functions/stddev_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/aggregate-functions/stddev_samp_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/aggregate-functions/sum_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/aggregate-functions/var_samp_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/aggregate-functions/variance_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/cast_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/current_timestamp_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_add_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_format_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_sub_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/datediff_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/day_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/dayname_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/dayofmonth_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/dayofweek_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/dayofyear_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/from_days_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/from_unixtime_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/index.rst create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/month_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/monthname_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/now_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/str_to_date_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/timediff_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/to_days_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/unix_timestamp_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/utc_timestamp_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/workofyear_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/date-time-functions/year_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/index.rst create mode 100644 docs/documentation/en/sql-reference/sql-functions/spatial-functions/index.rst create mode 100644 docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_astext_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_circle_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_contains_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_distance_sphere_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_geometryfromtext_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_linefromtext_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_point_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_polygon_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_x_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_y_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/ascii_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/concat_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/concat_ws_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/find_in_set_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/get_json_double_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/get_json_int_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/get_json_string_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/group_concat_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/index.rst create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/instr_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/lcase_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/left_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/length_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/locate_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/lower_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/lpad_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/ltrim_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/money_format_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/regexp_extract_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/regexp_replace_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/repeat_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/right_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/split_part_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/strleft_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-functions/string-functions/strright_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Account Management/CREATE ROLE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Account Management/CREATE USER_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Account Management/DROP ROLE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Account Management/DROP USER_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Account Management/GRANT_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Account Management/REVOKE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Account Management/SET PASSWORD_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Account Management/SET PROPERTY_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Account Management/SHOW GRANTS_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Account Management/SHOW ROLES_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN REPAIR_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN SET CONFIG_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/ALTER CLUSTER_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/ALTER SYSTEM_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/CREATE CLUSTER_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/CREATE FILE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/DROP CLUSTER_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/DROP FILE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/ENTER_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/LINK DATABASE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/MIGRATE DATABASE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/SHOW BACKENDS_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/SHOW BROKER_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/SHOW FILE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/SHOW FRONTENDS_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Administration/SHOW MIGRATIONS_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/ALTER DATABASE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/ALTER TABLE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/BACKUP_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/CANCEL ALTER_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/CANCEL BACKUP_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/CANCEL RESTORE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE DATABASE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE TABLE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE VIEW_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/Colocate Join_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/DROP DATABASE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/DROP REPOSITORY_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/DROP TABLE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/DROP VIEW_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/HLL_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/RECOVER_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/RESTORE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/create-function_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/drop-function_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Definition/show-function_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/CANCEL DELETE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/CANCEL LABEL_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/DELETE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/EXPORT_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/GET LABEL STATE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/LOAD_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/MINI LOAD_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/MULTI LOAD_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/RESTORE TABLET_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW ALTER_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW DELETE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW LOAD_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLES_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLET_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/broker_load_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Manipulation/insert_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Types/BIGINT_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Types/BOOLEAN_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Types/CHAR_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Types/DATETIME_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Types/DATE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Types/DECIMAL_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Types/DOUBLE_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Types/FLOAT_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Types/HLL(HyperLogLog)_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Types/INT_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Types/SMALLINT_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Types/TINYINT_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Data Types/VARCHAR_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/Utility/util_stmt_EN.md create mode 100644 docs/documentation/en/sql-reference/sql-statements/index.rst diff --git a/docs/documentation/en/administrator-guide/backup-restore_EN.md b/docs/documentation/en/administrator-guide/backup-restore_EN.md new file mode 100644 index 00000000000000..e06af4c987edf6 --- /dev/null +++ b/docs/documentation/en/administrator-guide/backup-restore_EN.md @@ -0,0 +1,160 @@ +# Backup and Recovery + +Doris supports the backup of current data in the form of files to remote storage systems via broker. The data can then be restored from the remote storage system to any Doris cluster by the restore command. With this feature, Doris can support regular snapshot backups of data. It can also be used to migrate data between different clusters. + +This feature requires Doris version 0.8.2+ + +Using this function, brokers corresponding to remote storage need to be deployed. Such as BOS, HDFS, etc. You can view the currently deployed broker through `SHOW BROKER;` + +## Brief Principle Description + +### Backup + +The backup operation is to upload the data of the specified table or partition directly to the remote warehouse in the form of files stored by Doris for storage. When a user submits a Backup request, the following actions will be done within the system: + +1. Snapshot and snapshot upload + + The snapshot phase takes a snapshot of the specified table or partition data file. Later, backups are all snapshots. After the snapshot, changes to tables, imports, and other operations no longer affect the results of the backup. Snapshots only produce a hard link to the current data file, which takes very little time. Once the snapshots are completed, they are uploaded one by one. Snapshot upload is done concurrently by each Backend. + +2. Metadata preparation and upload + + After the data file snapshot is uploaded, Frontend first writes the corresponding metadata to the local file, and then uploads the local metadata file to the remote warehouse through broker. Finish the final backup job. + +###Restore + +Recovery operations need to specify a backup that already exists in a remote repository, and then restore the backup content to the local cluster. When a user submits a Restore request, the following actions will be done within the system: + +1. Create corresponding metadata locally + + This step starts by creating structures such as restoring the corresponding table partitions in the local cluster. When created, the table is visible, but not accessible. + +2. Local snapshot + + This step is to take a snapshot of the table created in the previous step. This is actually an empty snapshot (because the tables just created have no data), and its main purpose is to generate the corresponding snapshot directory on the Backend for receiving the snapshot files downloaded from the remote repository later. + +3. Download snapshots + + The snapshot files in the remote warehouse are downloaded to the corresponding snapshot directory generated in the previous step. This step is done concurrently by each backend. + +4. Effective snapshot + + When the snapshot download is complete, we map each snapshot to the metadata of the current local table. These snapshots are then reloaded to take effect and complete the final recovery operation. + +## Best Practices + +### Backup + +We currently support full backup at the minimum partition granularity (incremental backup may be supported in future versions). If data need to be backed up regularly, first of all, it is necessary to plan the partition and bucket allocation of tables reasonably, such as partitioning according to time. Then in the subsequent run process, periodic data backup is performed according to partition granularity. + +### Data migration + +Users can first backup the data to the remote warehouse, and then restore the data to another cluster through the remote warehouse to complete data migration. Because data backup is done in the form of snapshots, new imported data after the snapshot phase of the backup job will not be backed up. Therefore, after the snapshot is completed, the data imported on the original cluster needs to be imported on the new cluster as well until the recovery job is completed. + +It is suggested that the new and old clusters be imported in parallel for a period of time after the migration is completed. After completing data and business correctness checks, the business is migrated to the new cluster. + +## Highlights + +1. Backup and recovery-related operations are currently only allowed to be performed by users with ADMIN privileges. +2. Within a database, only one backup or recovery job is allowed to be performed. +3. Both backup and recovery support the operation at the minimum partition level. When the table has a large amount of data, it is recommended to perform partition-by-partition to reduce the cost of failed retries. +4. Because backup and recovery operations, the operation is the actual data files. So when there are too many fragments of a table or too many small versions of a fragment, it may take a long time to backup or restore even if the total amount of data is very small. Users can estimate job execution time by `SHOW PARTITIONS FROM table_name;`, and `SHOW TABLET FROM table_name;`, viewing the number of partitions and the number of file versions of each partition. The number of files has a great impact on the execution time of the job, so it is suggested that the partition buckets should be planned reasonably in order to avoid excessive partitioning. +5. When viewing the job status through `SHOW BACKUP` or `SHOW RESTORE`. It is possible to see an error message in the `TaskErrMsg` column. But as long as the `State` column does not +`CANCELLED`, that means the job is still going on. These Tasks may succeed in retrying. Of course, some Task errors can also directly lead to job failure. +6. If the recovery operation is a coverage operation (specifying the recovery data to an existing table or partition), then starting from the `COMMIT` phase of the recovery operation, the data covered on the current cluster may not be restored. At this time, if the recovery operation fails or is cancelled, it may cause the previous data to be damaged and inaccessible. In this case, the recovery operation can only be performed again and wait for the job to complete. Therefore, we recommend that if it is not necessary, try not to use coverage to recover data unless it is confirmed that the current data is no longer in use. + +## Relevant orders + +The commands related to the backup recovery function are as follows. The following commands, you can use `help cmd;'to view detailed help after connecting Doris through mysql-client. + +1. CREATE REPOSITORY + + Create a remote warehouse Path for backup or recovery. + +1. BACKUP + + Perform a backup operation. + +3. SHOW BACKUP + + View the execution of the last backup job, including: + + * JobId: ID of this backup job. + * SnapshotName: User-specified name of this backup job (Label). + * DbName: The database corresponding to the backup job. + * State: The current stage of the backup job: + * PENDING: The initial state of the job. + * SNAPSHOTING: Snapshot operation is in progress. + * UPLOAD_SNAPSHOT: The snapshot is over and ready to upload. + * UPLOADING: Uploading snapshots. + * SAVE_META: Metadata files are being generated locally. + * UPLOAD_INFO: Upload metadata files and information for this backup job. + * FINISHED: The backup is complete. + * CANCELLED: Backup failed or cancelled. + * Backup Objs: List of tables and partitions involved in this backup. + * CreateTime: Job creation time. + * Snapshot Finished Time: Snapshot completion time. + * Upload Finished Time: Snapshot upload completion time. + * FinishedTime: The completion time of this assignment. + * Unfinished Tasks: In the `SNAPSHOTTING', `UPLOADING'and other stages, there will be multiple sub-tasks at the same time, the current stage shown here, the task ID of the unfinished sub-tasks. + * TaskErrMsg: If there is a sub-task execution error, the error message corresponding to the sub-task will be displayed here. + * Status: It is used to record some status information that may appear during the whole operation. + * Timeout: The timeout time of a job in seconds. + +4. SHOW SNAPSHOT + + View the backup that already exists in the remote warehouse. + + * Snapshot: The name of the backup specified at the time of backup (Label). + * Timestamp: Backup timestamp. + * Status: Is the backup normal? + + If the where clause is specified after `SHOW SNAPSHOT', more detailed backup information can be displayed. + + * Database: The database corresponding to backup. + * Details: Shows the complete data directory structure of the backup. + +5. RESTOR + + Perform a recovery operation. + +6. SHOW RESTORE + + View the execution of the last restore job, including: + + * JobId: ID of this resumption job. + * Label: The name of the backup in the user-specified warehouse (Label). + * Timestamp: The timestamp for backup in a user-specified warehouse. + * DbName: Restore the database corresponding to the job. + * State: The current stage of the recovery operation: + * PENDING: The initial state of the job. + * SNAPSHOTING: A snapshot of a new local table is in progress. + * DOWNLOAD: The download snapshot task is being sent. + * DOWNLOADING: Snapshot is downloading. + * COMMIT: Prepare to take effect the downloaded snapshot. + * COMMITTING: The downloaded snapshot is in effect. + * FINISHED: Recovery is complete. + * CANCELLED: Recovery failed or cancelled. + * AllowLoad: Is import allowed during recovery? + * ReplicationNum: Restores the specified number of copies. + * Restore Objs: List of tables and partitions involved in this recovery. + * CreateTime: Job creation time. + * MetaPreparedTime: Completion time of local metadata generation. + * Snapshot Finished Time: Local snapshot completion time. + * Download Finished Time: The download completion time of the remote snapshot. + * FinishedTime: The completion time of this assignment. + * Unfinished Tasks: In the `SNAPSHOTTING`, `DOWNLOADING`, `COMMITTING`, and other stages, there will be multiple sub-tasks at the same time, the current stage shown here, the task ID of the unfinished sub-tasks. + * TaskErrMsg: If there is a sub-task execution error, the error message corresponding to the sub-task will be displayed here. + * Status: It is used to record some status information that may appear during the whole operation. + * Timeout: The timeout time of a job in seconds. + +7. CANCEL BACKUP + + Cancel the backup job currently being performed. + +8. CANCEL RESTORE + + Cancel the recovery job currently being performed. + +9. DROP REPOSITORY + + Delete the created remote warehouse. Delete the warehouse, just delete the mapping of the warehouse in Doris, will not delete the actual warehouse data. diff --git a/docs/documentation/en/administrator-guide/colocation-join_EN.md b/docs/documentation/en/administrator-guide/colocation-join_EN.md new file mode 100644 index 00000000000000..46364ecd47f61e --- /dev/null +++ b/docs/documentation/en/administrator-guide/colocation-join_EN.md @@ -0,0 +1,421 @@ +# Colocation Join + +Colocation Join is a new feature introduced in Doris 0.9. The purpose of this paper is to provide local optimization for some Join queries to reduce data transmission time between nodes and speed up queries. + +The original design, implementation and effect can be referred to [ISSUE 245] (https://github.com/apache/incubator-doris/issues/245). + +The Colocation Join function has undergone a revision, and its design and use are slightly different from the original design. This document mainly introduces Colocation Join's principle, implementation, usage and precautions. + +## Noun Interpretation + +* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. +* BE: Backend, Doris's back-end node. Responsible for query execution and data storage. +* Colocation Group (CG): A CG contains one or more tables. Tables within the same group have the same Colocation Group Schema and the same data fragmentation distribution. +* Colocation Group Schema (CGS): Used to describe table in a CG and general Schema information related to Colocation. Including bucket column type, bucket number and copy number. + +## Principle + +The Colocation Join function is to make a CG of a set of tables with the same CGS. Ensure that the corresponding data fragments of these tables will fall on the same BE node. When tables in CG perform Join operations on bucket columns, local data Join can be directly performed to reduce data transmission time between nodes. + +The data of a table will eventually fall into a barrel according to the barrel column value Hash and the number of barrels modeled. Assuming that the number of buckets in a table is 8, there are eight buckets `[0, 1, 2, 3, 4, 5, 6, 7] `Buckets'. We call such a sequence a `Buckets Sequence`. Each Bucket has one or more Tablets. When a table is a single partitioned table, there is only one Tablet in a Bucket. If it is a multi-partition table, there will be more than one. + +In order for a table to have the same data distribution, the table in the same CG must ensure the following attributes are the same: + +1. Barrel row and number of barrels + + Bucket column, that is, the column specified in `DISTRIBUTED BY HASH (col1, col2,...)'in the table building statement. Bucket columns determine which column values are used to Hash data from a table into different Tablets. Tables in the same CG must ensure that the type and number of barrel columns are identical, and the number of barrels is identical, so that the data fragmentation of multiple tables can be controlled one by one. + +2. Number of copies + + The number of copies of all partitions of all tables in the same CG must be the same. If inconsistent, there may be a copy of a Tablet, and there is no corresponding copy of other table fragments on the same BE. + +Tables in the same CG do not require consistency in the number, scope, and type of partition columns. + +After fixing the number of bucket columns and buckets, the tables in the same CG will have the same Buckets Sequnce. The number of replicas determines the number of replicas of Tablets in each bucket, which BE they are stored on. Suppose that Buckets Sequnce is `[0, 1, 2, 3, 4, 5, 6, 7] `, and that BE nodes have `[A, B, C, D] `4. A possible distribution of data is as follows: + +``` ++---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+ +| 0 | | 1 | | 2 | | 3 | | 4 | | 5 | | 6 | | 7 | ++---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+ +| A | | B | | C | | D | | A | | B | | C | | D | +| | | | | | | | | | | | | | | | +| B | | C | | D | | A | | B | | C | | D | | A | +| | | | | | | | | | | | | | | | +| C | | D | | A | | B | | C | | D | | A | | B | ++---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+ +``` + +The data of all tables in CG will be uniformly distributed according to the above rules, which ensures that the data with the same barrel column value are on the same BE node, and local data Join can be carried out. + +## Usage + +### Establishment of tables + +When creating a table, you can specify the attribute `"colocate_with"="group_name"` in `PROPERTIES`, which means that the table is a Colocation Join table and belongs to a specified Colocation Group. + +Examples: + +``` +CREATE TABLE tbl (k1 int, v1 int sum) +DISTRIBUTED BY HASH(k1) +BUCKETS 8 +PROPERTIES( + "colocate_with" = "group1" +); +``` + +If the specified group does not exist, Doris automatically creates a group that contains only the current table. If the Group already exists, Doris checks whether the current table satisfies the Colocation Group Schema. If satisfied, the table is created and added to the Group. At the same time, tables create fragments and replicas based on existing data distribution rules in Groups. +Group belongs to a database, and its name is unique in a database. Internal storage is the full name of Group `dbId_groupName`, but users only perceive groupName. + +### Delete table + +When the last table in Group is deleted completely (deleting completely means deleting from the recycle bin). Usually, when a table is deleted by the `DROP TABLE` command, it will be deleted after the default one-day stay in the recycle bin, and the group will be deleted automatically. + +### View Group + +The following command allows you to view the existing Group information in the cluster. + +``` +SHOW PROC '/colocation_group'; + ++-------------+--------------+--------------+------------+----------------+----------+----------+ +| GroupId | GroupName | TableIds | BucketsNum | ReplicationNum | DistCols | IsStable | ++-------------+--------------+--------------+------------+----------------+----------+----------+ +| 10005.10008 | 10005_group1 | 10007, 10040 | 10 | 3 | int(11) | true | ++-------------+--------------+--------------+------------+----------------+----------+----------+ +``` + +* GroupId: The unique identity of a group's entire cluster, with DB ID in the first half and group ID in the second half. +* GroupName: The full name of Group. +* Tablet Ids: The group contains a list of Tables'ID. +* Buckets Num: Number of barrels. +* Replication Num: Number of copies. +* DistCols: Distribution columns,即分桶列类型。 +* IsStable: Is the group stable (for the definition of stability, see section `Collocation replica balancing and repair'). + +You can further view the data distribution of a group by following commands: + +``` +SHOW PROC '/colocation_group/10005.10008'; + ++-------------+---------------------+ +| BucketIndex | BackendIds | ++-------------+---------------------+ +| 0 | 10004, 10002, 10001 | +| 1 | 10003, 10002, 10004 | +| 2 | 10002, 10004, 10001 | +| 3 | 10003, 10002, 10004 | +| 4 | 10002, 10004, 10003 | +| 5 | 10003, 10002, 10001 | +| 6 | 10003, 10004, 10001 | +| 7 | 10003, 10004, 10002 | ++-------------+---------------------+ +``` + +* BucketIndex: Subscript to the bucket sequence. +* Backend Ids: A list of BE node IDs where data fragments are located in buckets. + +> The above commands require AMDIN privileges. Normal user view is not supported at this time. + +### 修改表 Colocate Group 属性 + +You can modify the Colocation Group property of a table that has been created. Examples: + +`ALTER TABLE tbl SET ("colocate_with" = "group2");` + +* If the table has not previously specified a Group, the command checks the Schema and adds the table to the Group (if the Group does not exist, it will be created). +* If other groups are specified before the table, the command first removes the table from the original group and adds a new group (if the group does not exist, it will be created). + +You can also delete the Colocation attribute of a table by following commands: + +`ALTER TABLE tbl SET ("colocate_with" = "");` + +### Other related operations + +When an ADD PARTITION is added to a table with a Colocation attribute and the number of copies is modified, Doris checks whether the modification violates the Colocation Group Schema and rejects it if it does. + +## Colocation Duplicate Balancing and Repair + +Copy distribution of Colocation tables needs to follow the distribution specified in Group, so it is different from common fragmentation in replica repair and balancing. + +Group itself has a Stable attribute, when Stable is true, which indicates that all fragments of the table in the current Group are not changing, and the Colocation feature can be used normally. When Stable is false, it indicates that some tables in Group are being repaired or migrated. At this time, Colocation Join of related tables will degenerate into ordinary Join. + +### Replica Repair + +Copies can only be stored on specified BE nodes. So when a BE is unavailable (downtime, Decommission, etc.), a new BE is needed to replace it. Doris will first look for the BE with the lowest load to replace it. After replacement, all data fragments on the old BE in the Bucket will be repaired. During the migration process, Group is marked Unstable. + +### Duplicate Equilibrium + +Doris will try to distribute the fragments of the Collocation table evenly across all BE nodes. For the replica balancing of common tables, the granularity is single replica, that is to say, it is enough to find BE nodes with lower load for each replica alone. The equilibrium of the Colocation table is at the Bucket level, where all replicas within a Bucket migrate together. We adopt a simple equalization algorithm, which distributes Buckets Sequnce evenly on all BEs, regardless of the actual size of the replicas, but only according to the number of replicas. Specific algorithms can be referred to the code annotations in `ColocateTableBalancer.java`. + +> Note 1: Current Colocation replica balancing and repair algorithms may not work well for heterogeneous deployed Oris clusters. The so-called heterogeneous deployment, that is, the BE node's disk capacity, number, disk type (SSD and HDD) is inconsistent. In the case of heterogeneous deployment, small BE nodes and large BE nodes may store the same number of replicas. +> +> Note 2: When a group is in an Unstable state, the Join of the table in it will degenerate into a normal Join. At this time, the query performance of the cluster may be greatly reduced. If you do not want the system to balance automatically, you can set the FE configuration item `disable_colocate_balance` to prohibit automatic balancing. Then open it at the right time. (See Section `Advanced Operations` for details) + +## Query + +The Colocation table is queried in the same way as ordinary tables, and users do not need to perceive Colocation attributes. If the Group in which the Colocation table is located is in an Unstable state, it will automatically degenerate to a normal Join. + +Examples are given to illustrate: + +Table 1: + +``` +CREATE TABLE `tbl1` ( + `k1` date NOT NULL COMMENT "", + `k2` int(11) NOT NULL COMMENT "", + `v1` int(11) SUM NOT NULL COMMENT "" +) ENGINE=OLAP +AGGREGATE KEY(`k1`, `k2`) +PARTITION BY RANGE(`k1`) +( + PARTITION p1 VALUES LESS THAN ('2019-05-31'), + PARTITION p2 VALUES LESS THAN ('2019-06-30') +) +DISTRIBUTED BY HASH(`k2`) BUCKETS 8 +PROPERTIES ( + "colocate_with" = "group1" +); +``` + +Table 2: + +``` +CREATE TABLE `tbl2` ( + `k1` datetime NOT NULL COMMENT "", + `k2` int(11) NOT NULL COMMENT "", + `v1` double SUM NOT NULL COMMENT "" +) ENGINE=OLAP +AGGREGATE KEY(`k1`, `k2`) +DISTRIBUTED BY HASH(`k2`) BUCKETS 8 +PROPERTIES ( + "colocate_with" = "group1" +); +``` + +View the query plan: + +``` +DESC SELECT * FROM tbl1 INNER JOIN tbl2 ON (tbl1.k2 = tbl2.k2); + ++----------------------------------------------------+ +| Explain String | ++----------------------------------------------------+ +| PLAN FRAGMENT 0 | +| OUTPUT EXPRS:`tbl1`.`k1` | | +| PARTITION: RANDOM | +| | +| RESULT SINK | +| | +| 2:HASH JOIN | +| | join op: INNER JOIN | +| | hash predicates: | +| | colocate: true | +| | `tbl1`.`k2` = `tbl2`.`k2` | +| | tuple ids: 0 1 | +| | | +| |----1:OlapScanNode | +| | TABLE: tbl2 | +| | PREAGGREGATION: OFF. Reason: null | +| | partitions=0/1 | +| | rollup: null | +| | buckets=0/0 | +| | cardinality=-1 | +| | avgRowSize=0.0 | +| | numNodes=0 | +| | tuple ids: 1 | +| | | +| 0:OlapScanNode | +| TABLE: tbl1 | +| PREAGGREGATION: OFF. Reason: No AggregateInfo | +| partitions=0/2 | +| rollup: null | +| buckets=0/0 | +| cardinality=-1 | +| avgRowSize=0.0 | +| numNodes=0 | +| tuple ids: 0 | ++----------------------------------------------------+ +``` +如果 Colocation Join 生效,则 Hash Join 节点会显示 `colocate: true`。 + +If not, the query plan is as follows: + +``` ++----------------------------------------------------+ +| Explain String | ++----------------------------------------------------+ +| PLAN FRAGMENT 0 | +| OUTPUT EXPRS:`tbl1`.`k1` | | +| PARTITION: RANDOM | +| | +| RESULT SINK | +| | +| 2:HASH JOIN | +| | join op: INNER JOIN (BROADCAST) | +| | hash predicates: | +| | colocate: false, reason: group is not stable | +| | `tbl1`.`k2` = `tbl2`.`k2` | +| | tuple ids: 0 1 | +| | | +| |----3:EXCHANGE | +| | tuple ids: 1 | +| | | +| 0:OlapScanNode | +| TABLE: tbl1 | +| PREAGGREGATION: OFF. Reason: No AggregateInfo | +| partitions=0/2 | +| rollup: null | +| buckets=0/0 | +| cardinality=-1 | +| avgRowSize=0.0 | +| numNodes=0 | +| tuple ids: 0 | +| | +| PLAN FRAGMENT 1 | +| OUTPUT EXPRS: | +| PARTITION: RANDOM | +| | +| STREAM DATA SINK | +| EXCHANGE ID: 03 | +| UNPARTITIONED | +| | +| 1:OlapScanNode | +| TABLE: tbl2 | +| PREAGGREGATION: OFF. Reason: null | +| partitions=0/1 | +| rollup: null | +| buckets=0/0 | +| cardinality=-1 | +| avgRowSize=0.0 | +| numNodes=0 | +| tuple ids: 1 | ++----------------------------------------------------+ +``` + +The HASH JOIN node displays the corresponding reason: `colocate: false, reason: group is not stable`. At the same time, an EXCHANGE node will be generated. + + +## Advanced Operations + +### FE Configuration Item + +* disable\_colocate\_relocate + +Whether to close Doris's automatic Colocation replica repair. The default is false, i.e. not closed. This parameter only affects the replica repair of the Colocation table, but does not affect the normal table. + +* disable\_colocate\_balance + +Whether to turn off automatic Colocation replica balancing for Doris. The default is false, i.e. not closed. This parameter only affects the replica balance of the Collocation table, but does not affect the common table. + +以上参数可以动态修改,设置方式请参阅 `HELP ADMIN SHOW CONFIG;` 和 `HELP ADMIN SET CONFIG;`。 + +* disable\_colocate\_join + +Whether to turn off the Colocation Join function or not. In 0.10 and previous versions, the default is true, that is, closed. In a later version, it will default to false, that is, open. + +* use\_new\_tablet\_scheduler + +In 0.10 and previous versions, the new replica scheduling logic is incompatible with the Colocation Join function, so in 0.10 and previous versions, if `disable_colocate_join = false`, you need to set `use_new_tablet_scheduler = false`, that is, close the new replica scheduler. In later versions, `use_new_tablet_scheduler` will be equal to true. + +###HTTP Restful API + +Doris provides several HTTP Restful APIs related to Colocation Join for viewing and modifying Colocation Group. + +The API is implemented on the FE side and accessed using `fe_host: fe_http_port`. ADMIN privileges are required. + +1. View all Colocation information for the cluster + + ``` + GET /api/colocate + + 返回以 Json 格式表示内部 Colocation 信息。 + + { + "colocate_meta": { + "groupName2Id": { + "g1": { + "dbId": 10005, + "grpId": 10008 + } + }, + "group2Tables": {}, + "table2Group": { + "10007": { + "dbId": 10005, + "grpId": 10008 + }, + "10040": { + "dbId": 10005, + "grpId": 10008 + } + }, + "group2Schema": { + "10005.10008": { + "groupId": { + "dbId": 10005, + "grpId": 10008 + }, + "distributionColTypes": [{ + "type": "INT", + "len": -1, + "isAssignedStrLenInColDefinition": false, + "precision": 0, + "scale": 0 + }], + "bucketsNum": 10, + "replicationNum": 2 + } + }, + "group2BackendsPerBucketSeq": { + "10005.10008": [ + [10004, 10002], + [10003, 10002], + [10002, 10004], + [10003, 10002], + [10002, 10004], + [10003, 10002], + [10003, 10004], + [10003, 10004], + [10003, 10004], + [10002, 10004] + ] + }, + "unstableGroups": [] + }, + "status": "OK" + } + ``` +2. 将 Group 标记为 Stable 或 Unstable + + * 标记为 Stable + + ``` + POST /api/colocate/group_stable?db_id=10005&group_id=10008 + + 返回:200 + ``` + + * 标记为 Unstable + + ``` + DELETE /api/colocate/group_stable?db_id=10005&group_id=10008 + + 返回:200 + ``` + +3. Setting Data Distribution for Group + + The interface can force the number distribution of a group. + + ``` + POST /api/colocate/bucketseq?db_id=10005&group_id= 10008 + + Body: + [[10004,10002],[10003,10002],[10002,10004],[10003,10002],[10002,10004],[10003,10002],[10003,10004],[10003,10004],[10003,10004],[10002,10004]] + + 返回 200 + ``` + Body is a Buckets Sequence represented by a nested array and the ID of the BE where the fragments are distributed in each Bucket. + + Note that using this command, you may need to set the FE configuration `disable_colocate_relocate` and `disable_colocate_balance` to true. That is to shut down the system for automatic Colocation replica repair and balancing. Otherwise, it may be automatically reset by the system after modification. \ No newline at end of file diff --git a/docs/documentation/en/administrator-guide/export_manual_EN.md b/docs/documentation/en/administrator-guide/export_manual_EN.md new file mode 100644 index 00000000000000..12617b99d3314c --- /dev/null +++ b/docs/documentation/en/administrator-guide/export_manual_EN.md @@ -0,0 +1,165 @@ +# Data export + +Export is a function provided by Doris to export data. This function can export user-specified table or partition data in text format to remote storage through Broker process, such as HDFS/BOS. + +This document mainly introduces the basic principles, usage, best practices and precautions of Export. + +## Noun Interpretation + +* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. +* BE: Backend, Doris's back-end node. Responsible for query execution and data storage. +* Broker: Doris can manipulate files for remote storage through the Broker process. +* Tablet: Data fragmentation. A table is divided into multiple data fragments. + +## Principle + +After the user submits an Export job. Doris counts all Tablets involved in this job. These tablets are then grouped to generate a special query plan for each group. The query plan reads the data on the included tablet and then writes the data to the specified path of the remote storage through Broker. + +The overall mode of dispatch is as follows: + +``` ++--------+ +| Client | ++---+----+ + | 1. Submit Job + | ++---v--------------------+ +| FE | +| | +| +-------------------+ | +| | ExportPendingTask | | +| +-------------------+ | +| | 2. Generate Tasks +| +--------------------+ | +| | ExportExporingTask | | +| +--------------------+ | +| | +| +-----------+ | +----+ +------+ +---------+ +| | QueryPlan +----------------> BE +--->Broker+---> | +| +-----------+ | +----+ +------+ | Remote | +| +-----------+ | +----+ +------+ | Storage | +| | QueryPlan +----------------> BE +--->Broker+---> | +| +-----------+ | +----+ +------+ +---------+ ++------------------------+ 3. Execute Tasks + +``` + +1. The user submits an Export job to FE. +2. FE's Export scheduler performs an Export job in two stages: + 1. PENDING: FE generates Export Pending Task, sends snapshot command to BE, and takes a snapshot of all Tablets involved. And generate multiple query plans. + 2. EXPORTING: FE generates Export ExporingTask and starts executing the query plan. + +### query plan splitting + +The Export job generates multiple query plans, each of which scans a portion of the Tablet. The number of Tablets scanned by each query plan is specified by the FE configuration parameter `export_tablet_num_per_task`, which defaults to 5. That is, assuming a total of 100 Tablets, 20 query plans will be generated. Users can also specify this number by the job attribute `tablet_num_per_task`, when submitting a job. + +Multiple query plans for a job are executed sequentially. + +### Query Plan Execution + +A query plan scans multiple fragments, organizes read data in rows, batches every 1024 actions, and writes Broker to remote storage. + +The query plan will automatically retry three times if it encounters errors. If a query plan fails three retries, the entire job fails. + +Doris will first create a temporary directory named `doris_export_tmp_12345` (where `12345` is the job id) in the specified remote storage path. The exported data is first written to this temporary directory. Each query plan generates a file with an example file name: + +`export-data-c69fcf2b6db5420f-a96b94c1ff8bccef-1561453713822` + +Among them, `c69fcf2b6db5420f-a96b94c1ff8bccef` is the query ID of the query plan. ` 1561453713822` Timestamp generated for the file. + +When all data is exported, Doris will rename these files to the user-specified path. + +## Use examples + +Export's detailed commands can be passed through `HELP EXPORT;` Examples are as follows: + +``` +EXPORT TABLE db1.tbl1 +PARTITION (p1,p2) +TO "bos://bj-test-cmy/export/" +PROPERTIES +( + "column_separator"=",", + "exec_mem_limit"="2147483648", + "timeout" = "3600" +) +WITH BROKER "hdfs" +( + "username" = "user", + "password" = "passwd", +); +``` + +* `column_separator`: Column separator. The default is `\t`. +* `line_delimiter`: Line separator. The default is `\n`. +* `exec_mem_limit`: Represents the memory usage limitation of a query plan on a single BE in an Export job. Default 2GB. Unit bytes. +* `timeout`: homework timeout. Default 2 hours. Unit seconds. +* `tablet_num_per_task`: The maximum number of fragments allocated per query plan. The default is 5. + +After submitting a job, the job status can be imported by querying the `SHOW EXPORT'command. The results are as follows: + +``` + JobId: 14008 + State: FINISHED + Progress: 100% + TaskInfo: {"partitions":["*"],"exec mem limit":2147483648,"column separator":",","line delimiter":"\n","tablet num":1,"broker":"hdfs","coord num":1,"db":"default_cluster:db1","tbl":"tbl3"} + Path: bos://bj-test-cmy/export/ +CreateTime: 2019-06-25 17:08:24 + StartTime: 2019-06-25 17:08:28 +FinishTime: 2019-06-25 17:08:34 + Timeout: 3600 + ErrorMsg: N/A +``` + + +* JobId: The unique ID of the job +* State: Job status: + * PENDING: Jobs to be Scheduled + * EXPORING: Data Export + * FINISHED: Operation Successful + * CANCELLED: Job Failure +* Progress: Work progress. The schedule is based on the query plan. Assuming a total of 10 query plans have been completed, the progress will be 30%. +* TaskInfo: Job information in Json format: + * db: database name + * tbl: Table name + * partitions: Specify the exported partition. `*` Represents all partitions. + * exec MEM limit: query plan memory usage limit. Unit bytes. + * column separator: The column separator for the exported file. + * line delimiter: The line separator for the exported file. + * tablet num: The total number of tablets involved. + * Broker: The name of the broker used. + * Coord num: Number of query plans. +* Path: Export path on remote storage. +* CreateTime/StartTime/FinishTime: Creation time, start scheduling time and end time of jobs. +* Timeout: Job timeout. The unit is seconds. This time is calculated from CreateTime. +* Error Msg: If there is an error in the job, the cause of the error is shown here. + +## Best Practices + +### Splitting Query Plans + +How many query plans need to be executed for an Export job depends on the total number of Tablets and how many Tablets can be allocated for a query plan at most. Since multiple query plans are executed serially, the execution time of jobs can be reduced if more fragments are processed by one query plan. However, if the query plan fails (e.g., the RPC fails to call Broker, the remote storage jitters, etc.), too many tablets can lead to a higher retry cost of a query plan. Therefore, it is necessary to arrange the number of query plans and the number of fragments to be scanned for each query plan in order to balance the execution time and the success rate of execution. It is generally recommended that the amount of data scanned by a query plan be within 3-5 GB (the size and number of tables in a table can be viewed by `SHOW TABLET FROM tbl_name;`statement. + +### exec\_mem\_limit + +Usually, a query plan for an Export job has only two parts `scan`- `export`, and does not involve computing logic that requires too much memory. So usually the default memory limit of 2GB can satisfy the requirement. But in some scenarios, such as a query plan, too many Tablets need to be scanned on the same BE, or too many data versions of Tablets, may lead to insufficient memory. At this point, larger memory needs to be set through this parameter, such as 4 GB, 8 GB, etc. + +## Notes + +* It is not recommended to export large amounts of data at one time. The maximum amount of exported data recommended by an Export job is tens of GB. Excessive export results in more junk files and higher retry costs. +* If the amount of table data is too large, it is recommended to export it by partition. +* During the operation of the Export job, if FE restarts or cuts the master, the Export job will fail, requiring the user to resubmit. +* If the Export job fails, the `__doris_export_tmp_xxx` temporary directory generated in the remote storage and the generated files will not be deleted, requiring the user to delete them manually. +* If the Export job runs successfully, the `__doris_export_tmp_xxx` directory generated in the remote storage may be retained or cleared according to the file system semantics of the remote storage. For example, in Baidu Object Storage (BOS), after removing the last file in a directory through rename operation, the directory will also be deleted. If the directory is not cleared, the user can clear it manually. +* When the Export runs successfully or fails, the FE reboots or cuts, then some information of the jobs displayed by `SHOW EXPORT` will be lost and can not be viewed. +* Export jobs only export data from Base tables, not Rollup Index. +* Export jobs scan data and occupy IO resources, which may affect the query latency of the system. + +## Relevant configuration + +### FE + +* `expo_checker_interval_second`: Scheduling interval of Export job scheduler, default is 5 seconds. Setting this parameter requires restarting FE. +* `export_running_job_num_limit `: Limit on the number of Export jobs running. If exceeded, the job will wait and be in PENDING state. The default is 5, which can be adjusted at run time. +* `Export_task_default_timeout_second`: Export job default timeout time. The default is 2 hours. It can be adjusted at run time. +* `export_tablet_num_per_task`: The maximum number of fragments that a query plan is responsible for. The default is 5. diff --git a/docs/documentation/en/administrator-guide/http-actions/fe-get-log-file_EN.md b/docs/documentation/en/administrator-guide/http-actions/fe-get-log-file_EN.md new file mode 100644 index 00000000000000..f4042df14f378a --- /dev/null +++ b/docs/documentation/en/administrator-guide/http-actions/fe-get-log-file_EN.md @@ -0,0 +1,52 @@ +# get\_log\_file + +Users can access FE log files through the HTTP interface. + +## Log type + +The following types of FE logs are supported: + +1. fe.audit.log (audit log) + + The audit log records information that has been requested by all request statements for the corresponding FE node. The file naming rules for audit logs are as follows: + + ``` + fe.audit.log # Current Latest Log + fe.audit.log.20190603.1 # The audit log of the corresponding date generates a serial suffix when the log size of the corresponding date exceeds 1GB. The smaller the serial number, the newer the content. + fe.audit.log.20190603.2 + fe.audit.log.20190602.1 + ... + ``` + +## Interface examples + +1. Get a list of log files of the corresponding type + + Examples: + + `curl -X HEAD -uuser:passwd http://fe_host:http_port/api/get_log_file?type=fe.audit.log` + + Result: + + ``` + HTTP/1.1 200 OK + file_infos: {"fe.audit.log":24759,"fe.audit.log.20190528.1":132934} + content-type: text/html + connection: keep-alive + ``` + In the returned header, the `file_infos'field displays the list of files in JSON format and the corresponding file size (in bytes) + +2. Download log files + + Examples: + + ``` + curl -X GET -uuser:passwd http://fe_host:http_port/api/get_log_file?type=fe.audit.log\&file=fe.audit.log.20190528.1 + ``` + Result: + + Download the specified file as a file. + +## Interface description + +The interface requires admin privileges. diff --git a/docs/documentation/en/administrator-guide/http-actions/index.rst b/docs/documentation/en/administrator-guide/http-actions/index.rst new file mode 100644 index 00000000000000..268e99b5a8fd78 --- /dev/null +++ b/docs/documentation/en/administrator-guide/http-actions/index.rst @@ -0,0 +1,9 @@ +============= +HTTP API +============= + +.. toctree:: + :maxdepth: 1 + :glob: + + * diff --git a/docs/documentation/en/administrator-guide/index.rst b/docs/documentation/en/administrator-guide/index.rst new file mode 100644 index 00000000000000..cb0f29fa9aff83 --- /dev/null +++ b/docs/documentation/en/administrator-guide/index.rst @@ -0,0 +1,17 @@ +============= +操作手册 +============= + +.. toctree:: + :hidden: + + load-data/index + http-actions/index + operation/index + +.. toctree:: + :maxdepth: 1 + :glob: + + * + diff --git a/docs/documentation/en/administrator-guide/load-data/broker-load-manual_EN.md b/docs/documentation/en/administrator-guide/load-data/broker-load-manual_EN.md new file mode 100644 index 00000000000000..5149ca60090f93 --- /dev/null +++ b/docs/documentation/en/administrator-guide/load-data/broker-load-manual_EN.md @@ -0,0 +1,449 @@ +# Broker Load + +Broker load is an asynchronous import method, and the data source supported depends on the data source supported by the Broker process. + +Users need to create Broker load imports through MySQL protocol and check the import results by viewing the import commands. + +## Applicable scenarios + +* Source data in Broker accessible storage systems, such as HDFS. +* Data volumes range from tens to hundreds of GB. + +## Noun Interpretation + +1. Frontend (FE): Metadata and scheduling nodes of Doris system. In the import process, it is mainly responsible for the generation of import plan and the scheduling of import tasks. +2. Backend (BE): The computing and storage nodes of Doris system. In the import process, it is mainly responsible for ETL and storage of data. +3. Broker: Broker is an independent stateless process. It encapsulates the file system interface and provides Doris with the ability to read files in the remote storage system. +4. Plan: Import the execution plan, and BE executes the import execution plan to import data into Doris system. + +## Basic Principles + +After the user submits the import task, FE generates the corresponding plan and distributes the plan to several BEs according to the number of BEs and the size of the file. Each BE performs part of the import data. + +BE pulls data from Broker and imports it into the system after transforming the data. All BEs complete the import, and the FE decides whether the import is successful or not. + +``` + + + | 1. user create broker load + v + +----+----+ + | | + | FE | + | | + +----+----+ + | + | 2. BE etl and load the data + +--------------------------+ + | | | ++---v---+ +--v----+ +---v---+ +| | | | | | +| BE | | BE | | BE | +| | | | | | ++---+-^-+ +---+-^-+ +--+-^--+ + | | | | | | + | | | | | | 3. pull data from broker ++---v-+-+ +---v-+-+ +--v-+--+ +| | | | | | +|Broker | |Broker | |Broker | +| | | | | | ++---+-^-+ +---+-^-+ +---+-^-+ + | | | | | | ++---v-+-----------v-+----------v-+-+ +| HDFS/BOS/AFS cluster | +| | ++----------------------------------+ + +``` + +## Basic operations + +### Create a load + +Broker load 创建导入语句 + +Grammar: + +``` +LOAD LABEL db_name.label_name +(data_desc, ...) +WITH BROKER broker_name broker_properties +[PROPERTIES (key1=value1, ... )] + +* data_desc: + + DATA INFILE ('file_path', ...) + [NEGATIVE] + INTO TABLE tbl_name + [PARTITION (p1, p2)] + [COLUMNS TERMINATED BY separator ] + [(col1, ...)] + [SET (k1=f1(xx), k2=f2(xx))] + +* broker_properties: + + (key1=value1, ...) +``` +Examples: + +``` +LOAD LABEL db1.label1 +( + DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file1") + INTO TABLE tbl1 + COLUMNS TERMINATED BY "," + (tmp_c1,tmp_c2) + SET + ( + id=tmp_c2, + name=tmp_c1) + ), + DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file2") + INTO TABLE tbl2 + COLUMNS TERMINATED BY "," + (col1, col2) +) +WITH BROKER 'broker' +( + "username"="user", + "password"="pass" +) +PROPERTIES +( + "timeout" = "3600" +); + +``` + +Create the imported detailed grammar execution ``HELP BROKER LOAD `` View grammar help. This paper mainly introduces the parametric meaning and points for attention in Broker load's creation import grammar. + +#### Label + +Identity of import task. Each import task has a unique Label within a single database. Label is a user-defined name in the import command. With this Label, users can view the execution of the corresponding import task. + +Another function of Label is to prevent users from repeatedly importing the same data. **It is strongly recommended that users use the same label for the same batch of data. Thus, repeated requests for the same batch of data can only be accepted once, guaranteeing at-Most-One semantics** + +When the corresponding import job status of Label is CANCELLED, it can be used again to submit the import job. + +#### Data Description Class Parameters + +Data description class parameters mainly refer to the parameters belonging to ``data_desc`` in Broker load creating import statements. Each group of ```data_desc``` mainly describes the data source address, ETL function, target table and partition information involved in this import. + +The following is a detailed explanation of some parameters of the data description class: + ++ Multi-table import + + Broker load supports a single import task involving multiple tables, and each Broker load import task can implement multiple tables import by declaring multiple tables in multiple ``data_desc``. Each individual ```data_desc``` can also specify the data source address belonging to the table. Broker load guarantees atomic success or failure between multiple tables imported at a single time. + ++ negative + + ```data_desc``` can also set up data fetching and anti-importing. This function is mainly used when aggregated columns in data tables are of SUM type. If you want to revoke a batch of imported data. The `negative'parameter can be used as a batch of data. Doris automatically retrieves this batch of data on aggregated columns to eliminate the same batch of data. + ++ partition + + In `data_desc`, you can specify the partition information of the table to be imported, but it will not be imported if the data to be imported does not belong to the specified partition. At the same time, data that does not specify a Partition is considered error data. + +#### Import job parameters + +Import job parameters mainly refer to the parameters in Broker load creating import statement that belong to ``opt_properties``. Import operation parameters act on the whole import operation. + +The following is a detailed explanation of some parameters of the import operation parameters: + ++ time out + + The time-out of the import job (in seconds) allows the user to set the time-out of each import by himself in ``opt_properties``. If the import task is not completed within the set timeout time, it will be cancelled by the system and become CANCELLED. The default import timeout for Broker load is 4 hours. + + Usually, the user does not need to manually set the timeout of the import task. When the import cannot be completed within the default timeout time, the task timeout can be set manually. + + > Recommended timeout + > + > Total File Size (MB) / Slowest Import Speed (MB/s) > timeout >((MB) * Number of tables to be imported and related Roll up tables) / (10 * Number of concurrent imports) + + > The concurrency of imports can be seen in the final configuration of the import system in the document. The current import speed limit is 10MB/s in 10 of the formulas. + + > For example, a 1G data to be imported contains three Rollup tables, and the current concurrency of imports is 3. The minimum value of timeout is ```(1 * 1024 * 3) / (10 * 3) = 102 seconds.``` + + Because the machine environment of each Doris cluster is different and the concurrent query tasks of the cluster are different, the slowest import speed of the user Doris cluster requires the user to guess the import task speed according to the history. + ++ max\_filter\_ratio + + The maximum tolerance rate of the import task is 0 by default, and the range of values is 0-1. When the import error rate exceeds this value, the import fails. + + If the user wishes to ignore the wrong row, the import can be successful by setting this parameter greater than 0. + + The calculation formula is as follows: + + ``` (dpp.abnorm.ALL / (dpp.abnorm.ALL + dpp.norm.ALL ) ) > max_filter_ratio ``` + + ``` dpp.abnorm.ALL``` denotes the number of rows whose data quality is not up to standard. Such as type mismatch, column mismatch, length mismatch and so on. + + ``` dpp.norm.ALL ``` refers to the number of correct data in the import process. The correct amount of data for the import task can be queried by the ``SHOW LOAD`` command. + + The number of rows in the original file = `dpp.abnorm.ALL + dpp.norm.ALL` + +* exec\_mem\_limit + + The upper limit of memory usage for import tasks. When the memory used by the import task exceeds the set upper limit, the import task will be CANCEL. The default is 2G in bytes. + + When `Memory exceed limit` error occurs in the import, this parameter can be adjusted appropriately, such as 4G, 8G, etc. + ++ strict\_mode + + Broker load 导入可以开启 strict mode 模式。开启方式为 ```properties ("strict_mode" = "true")``` 。默认的 strict mode 为开启。 + + The strict mode means that the column type conversion in the import process is strictly filtered. The strategy of strict filtering is as follows: + + 1. For column type conversion, if strict mode is true, the wrong data will be filtered. Error data here refers to the kind of data that the original data is not null and the result is null after participating in column type conversion. + + 2. Strict mode does not affect the imported column when it is generated by a function transformation. + + 3. For a column type imported that contains scope restrictions, strict mode does not affect it if the original data can normally pass type conversion, but can not pass scope restrictions. For example, if the type is decimal (1,0) and the original data is 10, it falls within the scope of type conversion but not column declaration. This data strict has no effect on it. + +#### Import Relation between strict mode source data + +Here's an example of a column type TinyInt + +> Note: When columns in a table allow null values to be imported + +|source data | source data example | string to int | strict_mode | result| +|------------|---------------------|-----------------|--------------------|---------| +|空值 | \N | N/A | true or false | NULL| +|not null | aaa or 2000 | NULL | true | invalid data(filtered)| +|not null | aaa | NULL | false | NULL| +|not null | 1 | 1 | true or false | correct data| + +Here's an example of column type Decimal (1,0) + +> Note: When columns in a table allow null values to be imported + +|source data | source data example | string to int | strict_mode | result| +|------------|---------------------|-----------------|--------------------|--------| +|空值 | \N | N/A | true or false | NULL| +|not null | aaa | NULL | true | invalid data(filtered)| +|not null | aaa | NULL | false | NULL| +|not null | 1 or 10 | 1 | true or false | correct data| + +> Note: Although 10 is a value beyond the range, strict mode does not affect it because its type meets the requirements of decimal. 10 will eventually be filtered in other ETL processes. But it will not be filtered by strict mode. + +### View load + +Broker load import mode is asynchronous, so the user must create the imported Label record and use Label in the **view Import command to view the import result**. View import commands are common in all import modes. The specific syntax can be `HELP SHOW LOAD`. + +Examples: + +``` +mysql> show load order by createtime desc limit 1\G +*************************** 1. row *************************** + JobId: 76391 + Label: label1 + State: FINISHED + Progress: ETL:N/A; LOAD:100% + Type: BROKER + EtlInfo: dpp.abnorm.ALL=15; dpp.norm.ALL=28133376 + TaskInfo: cluster:N/A; timeout(s):10800; max_filter_ratio:5.0E-5 + ErrorMsg: N/A + CreateTime: 2019-07-27 11:46:42 + EtlStartTime: 2019-07-27 11:46:44 + EtlFinishTime: 2019-07-27 11:46:44 + LoadStartTime: 2019-07-27 11:46:44 +LoadFinishTime: 2019-07-27 11:50:16 + URL: http://192.168.1.1:8040/api/_load_error_log?file=__shard_4/error_log_insert_stmt_4bb00753932c491a-a6da6e2725415317_4bb00753932c491a_a6da6e2725415317 +``` + +The following is mainly about the significance of viewing the parameters in the return result set of the import command: + ++ JobId + + The unique ID of the import task is different for each import task, which is automatically generated by the system. Unlike Label, JobId will never be the same, while Label can be reused after the import task fails. + ++ Label + + Identity of import task. + ++ State + + Import the current phase of the task. In the Broker load import process, PENDING and LOADING are the two main import states. If the Broker load is in the PENDING state, it indicates that the current import task is waiting to be executed; the LOADING state indicates that it is executing. + + There are two final stages of the import task: CANCELLED and FINISHED. When Load job is in these two stages, the import is completed. CANCELLED is the import failure, FINISHED is the import success. + ++ Progress + + Import the progress description of the task. There are two kinds of progress: ETL and LOAD, which correspond to the two stages of the import process, ETL and LOADING. At present, Broker load only has the LOADING stage, so ETL will always be displayed as `N/A`. + + The progress range of LOAD is 0-100%. + + ``` LOAD Progress = Number of tables currently completed / Number of tables designed for this import task * 100%``` + + **If all import tables complete the import, then the progress of LOAD is 99%** import enters the final effective stage, and the progress of LOAD will only be changed to 100% after the entire import is completed. + + Import progress is not linear. So if there is no change in progress over a period of time, it does not mean that the import is not being implemented. + ++ Type + + Types of import tasks. The type value of Broker load is only BROKER. ++ Etlinfo + + It mainly shows the imported data quantity indicators `dpp.norm.ALL` and `dpp.abnorm.ALL`. Users can verify that the error rate of the current import task exceeds max\_filter\_ratio based on these two indicators. + ++ TaskInfo + + It mainly shows the current import task parameters, that is, the user-specified import task parameters when creating the Broker load import task, including `cluster`, `timeout`, and `max_filter_ratio`. + ++ ErrorMsg + + When the import task status is CANCELLED, the reason for the failure is displayed in two parts: type and msg. If the import task succeeds, the `N/A` is displayed. + + The value meaning of type: + + ``` + USER_CANCEL: User Canceled Tasks + ETL_RUN_FAIL:Import tasks that failed in the ETL phase + ETL_QUALITY_UNSATISFIED:Data quality is not up to standard, that is, the error rate exceedsmax_filter_ratio + LOAD_RUN_FAIL:Import tasks that failed in the LOADING phase + TIMEOUT:Import task not completed in overtime + UNKNOWN:Unknown import error + ``` + ++ CreateTime /EtlStartTime /EtlFinishTime /LoadStartTime /LoadFinishTime + + These values represent the creation time of the import, the beginning time of the ETL phase, the completion time of the ETL phase, the beginning time of the Loading phase and the completion time of the entire import task, respectively. + + Broker load import has no ETL stage, so its EtlStartTime, EtlFinishTime, LoadStartTime are set to the same value. + + Import tasks stay in CreateTime for a long time, while LoadStartTime is N/A, which indicates that import tasks are heavily stacked at present. Users can reduce the frequency of import submissions. + + ``` + LoadFinishTime - CreateTime = Time consumed by the entire import task + LoadFinishTime - LoadStartTime = The entire Broker load import task execution time = the time consumed by the entire import task - the time the import task waits + ``` + ++ URL + + The error data sample of the import task can be obtained by accessing the URL address. When there is no error data in this import, the URL field is N/A. + +### Cancel load + +When the Broker load job status is not CANCELLED or FINISHED, it can be manually cancelled by the user. When canceling, you need to specify a Label for the import task to be cancelled. Canceling Import command syntax can perform `HELP CANCEL LOAD` view. + +## Relevant System Configuration + +### FE configuration + +The following configurations belong to the Broker load system-level configuration, which acts on all Broker load import tasks. Configuration values are adjusted mainly by modifying `fe.conf`. + ++ min\_bytes\_per\_broker\_scanner/max\_bytes\_per\_broker\_scanner/max\_broker\_concurrency + + The first two configurations limit the minimum and maximum amount of data processed by a single BE. The third configuration limits the maximum number of concurrent imports for a job. The minimum amount of data processed, the maximum number of concurrencies, the size of source files and the number of BEs in the current cluster **together determine the concurrency of this import**. + + ``` + The number of concurrent imports = Math. min (source file size / minimum throughput, maximum concurrency, current number of BE nodes) + Processing capacity of this import of a single BE = source file size / concurrency of this import + ``` + + Usually the maximum amount of data supported by an import job is `max_bytes_per_broker_scanner * number of BE nodes`. If you need to import a larger amount of data, you need to adjust the size of the `max_bytes_per_broker_scanner` parameter appropriately. + +Default configuration: + + ``` + Parameter name: min_bytes_per_broker_scanner, default 64MB, unit bytes. + Parameter name: max_broker_concurrency, default 10. + Parameter name: max_bytes_per_broker_scanner, default 3G, unit bytes. + ``` + +## Best Practices + +### Application scenarios + +The most appropriate scenario to use Broker load is the scenario of raw data in a file system (HDFS, BOS, AFS). Secondly, since Broker load is the only way of asynchronous import in a single import, users can also consider using Broker load if they need to use asynchronous access in importing large files. + +### Data volume + +We will only discuss the case of a single BE. If the user cluster has more than one BE, the amount of data in the heading below should be multiplied by the number of BEs. For example, if the user has three BEs, then the number below 3G (including) should be multiplied by 3, that is, under 9G (including). + ++ Below 3G (including) + + Users can submit Broker load to create import requests directly. + ++ Over 3G + + Since the maximum processing capacity of a single imported BE is 3G, the imported files over 3G need to be imported by adjusting the import parameters of Broker load to achieve the import of large files. + + 1. Modify the maximum number of scans and concurrency of a single BE according to the current number of BEs and the size of the original file. + + ``` + Modify the configuration in fe.conf + + max_broker_concurrency = BE 个数 + The amount of data processed by a single BE for the current import task = the original file size / max_broker_concurrency + Max_bytes_per_broker_scanner >= the amount of data processed by a single BE of the current import task + + For example, a 100G file with 10 BEs in the cluster + max_broker_concurrency = 10 + Max================ + + ``` + + After modification, all BEs process import tasks concurrently, and each BE processes part of the original file. + + *Note: The configurations in both FEs are system configurations, that is to say, their modifications work on all Broker load tasks.* + + 2. Customize the timeout time of the current import task when creating the import + + ``` + Current import task single BE processing data volume / user Doris cluster slowest import speed (MB/s) >= current import task timeout time >= current import task single BE processing data volume / 10M/s + + For example, a 100G file with 10 BEs in the cluster + Timeout > 1000s = 10G / 10M /s + + ``` + + 3. When the user finds that the timeout time calculated in the second step exceeds the default maximum time-out time for importing the system by 4 hours. + + At this time, it is not recommended that users directly increase the maximum time-out to solve the problem. If the single import time exceeds the default maximum import timeout of 4 hours, it is better to solve the problem by splitting the file to be imported and importing it several times. The main reason is that if a single import exceeds 4 hours, the time cost of retry after import failure is very high. + + The maximum amount of imported file data expected by the Doris cluster can be calculated by the following formula: + + ``` + Expected maximum imported file data = 14400s * 10M / s * BE number + For example, the BE number of clusters is 10. + Expected maximum imported file data volume = 14400 * 10M / s * 10 = 1440000M 1440G + + Note: The average user's environment may not reach the speed of 10M/s, so it is recommended that more than 500G files be split and imported. + + ``` + +### Complete examples + +Data situation: User data in HDFS, file address is hdfs://abc.com:8888/store_sales, HDFS authentication user name is root, password is password, data size is about 30G, hope to import into database bj_sales table store_sales. + +Cluster situation: The number of BEs in the cluster is about 3, and the Broker name is broker. + ++ Step 1: After the calculation of the above method, the single BE import quantity is 10G, then the configuration of FE needs to be modified first, and the maximum amount of single BE import is changed to: + + ``` + max_bytes_per_broker_scanner = 10737418240 + + ``` + ++ Step 2: Calculated, the import time is about 1000s, which does not exceed the default timeout time. No custom timeout time for import can be configured. + ++ Step 3: Create import statements + + ``` + LOAD LABEL bj_sales.store_sales_broker_load_01 + ( + DATA INFILE("hdfs://abc.com:8888/store_sales") + INTO TABLE store_sales + ) + WITH BROKER 'broker' + ("username"="root", "password"="password"); + ``` + +## Common Questions + +* 导入报错:`Scan bytes per broker scanner exceed limit:xxx` + + Refer to the Best Practices section of the document to modify the FE configuration items `max_bytes_per_broker_scanner` and `max_broker_concurrency'.` + +* 导入报错:`failed to send batch` 或 `TabletWriter add batch with unknown id` + + Refer to **General System Configuration** in **BE Configuration** in the Import Manual (./load-manual.md), and modify `tablet_writer_rpc_timeout_sec` and `streaming_load_rpc_max_alive_time_sec` appropriately. diff --git a/docs/documentation/en/administrator-guide/load-data/index.rst b/docs/documentation/en/administrator-guide/load-data/index.rst new file mode 100644 index 00000000000000..f4807d1df86b61 --- /dev/null +++ b/docs/documentation/en/administrator-guide/load-data/index.rst @@ -0,0 +1,12 @@ +============= +数据导入 +============= + +.. toctree:: + :maxdepth: 2 + + load-manual.md + broker-load-manual.md + stream-load-manual.md + routine-load-manual.md + insert-into-manual.md diff --git a/docs/documentation/en/administrator-guide/load-data/insert-into-manual_EN.md b/docs/documentation/en/administrator-guide/load-data/insert-into-manual_EN.md new file mode 100644 index 00000000000000..8e8c46a40e7c98 --- /dev/null +++ b/docs/documentation/en/administrator-guide/load-data/insert-into-manual_EN.md @@ -0,0 +1,137 @@ +# Insert Into + +The use of Insert Into statements is similar to that of Insert Into statements in databases such as MySQL. But in Doris, all data writing is a separate import job. So Insert Into is also introduced here as an import method. + +The main Insert Into command contains the following two kinds; + +* INSERT INTO tbl SELECT ... +* INSERT INTO tbl (col1, col2, ...) VALUES (1, 2, ...), (1,3, ...); + +The second command is for Demo only, not in a test or production environment. + +## Basic operations + +### Create a Load + +The Insert Into command needs to be submitted through MySQL protocol. Creating an import request returns the import result synchronously. + +Grammar: + +``` +INSERT INTO table_name [partition_info] [col_list] [query_stmt] [VALUES]; +``` + +Examples: + +``` +INSERT INTO tbl2 SELECT * FROM tbl3; +INSERT INTO tbl1 VALUES ("qweasdzxcqweasdzxc"), ("a"); +``` + +The following is a brief introduction to the parameters used in creating import statements: + ++ partition\_info + + Import the target partition of the table. If the target partition is specified, only the data that matches the target partition will be imported. If not specified, the default value is all partitions of the table. + ++ col\_list + + The target column of the import table can exist in any order. If no target column is specified, the default value is all columns in this table. If a column in the table does not exist in the target column, the column needs a default value, otherwise Insert Into will fail. + + If the result column type of the query statement is inconsistent with the type of the target column, an implicit type conversion is invoked. If the conversion is not possible, the Insert Into statement will report a parsing error. + ++ query\_stmt + + Through a query statement, the results of the query statement are imported into other tables in Doris system. Query statements support any SQL query syntax supported by Doris. + ++ VALUES + + Users can insert one or more data through VALUES grammar. + + *Note: VALUES is only suitable for importing several pieces of data as DEMO. It is totally unsuitable for any test and production environment. Doris system itself is not suitable for single data import scenarios. It is recommended to use INSERT INTO SELECT for batch import.* + +### Load results + +Insert Into itself is an SQL command, so the return behavior is the same as the return behavior of the SQL command. + +If the import fails, the return statement fails to execute. If the import succeeds, the return statement executes successfully and a Label field is appended. + +Label is the identifier of the Insert Into import job. Each import job has a unique Label inside a single database. Insert Into's Label is generated by the system. Users can use the Label to asynchronously obtain the import status by querying the import command. + +## Relevant System Configuration + +### FE configuration + ++ time out + + The timeout time of the import task (in seconds) will be cancelled by the system if the import task is not completed within the set timeout time, and will become CANCELLED. + + At present, Insert Into does not support custom import timeout time. All Insert Into imports have a uniform timeout time. The default timeout time is 1 hour. If the imported source file cannot complete the import within the specified time, the parameter ``insert_load_default_timeout_second`` of FE needs to be adjusted. + + At the same time, the Insert Into statement receives the restriction of the Session variable `query_timeout`. You can increase the timeout time by `SET query_timeout = xxx;` in seconds. + +### Session 变量 + ++ enable\_insert\_strict + + The Insert Into import itself cannot control the tolerable error rate of the import. Users can only use the Session parameter `enable_insert_strict`. When this parameter is set to false, it indicates that at least one data has been imported correctly, and then it returns successfully. When this parameter is set to false, the import fails if there is a data error. The default is false. It can be set by `SET enable_insert_strict = true;`. + ++ query u timeout + + Insert Into itself is also an SQL command, so the Insert Into statement is also restricted by the Session variable `query_timeout`. You can increase the timeout time by `SET query_timeout = xxx;` in seconds. + +## Best Practices + +### Application scenarios +1. Users want to import only a few false data to verify the functionality of Doris system. The grammar of INSERT INTO VALUS is suitable at this time. +2. Users want to convert the data already in the Doris table into ETL and import it into a new Doris table, which is suitable for using INSERT INTO SELECT grammar. +3. Users can create an external table, such as MySQL external table mapping a table in MySQL system. Or create Broker external tables to map data files on HDFS. Then the data from the external table is imported into the Doris table for storage through the INSERT INTO SELECT grammar. + +### Data volume +Insert Into has no limitation on the amount of data, and large data imports can also be supported. However, Insert Into has a default timeout time, and the amount of imported data estimated by users is too large, so it is necessary to modify the system's Insert Into import timeout time. + +``` +Import data volume = 36G or less than 3600s*10M/s +Among them, 10M/s is the maximum import speed limit. Users need to calculate the average import speed according to the current cluster situation to replace 10M/s in the formula. +``` + +### Complete examples + +Users have a table store sales in the database sales. Users create a table called bj store sales in the database sales. Users want to import the data recorded in the store sales into the new table bj store sales. The amount of data imported is about 10G. + +``` +large sales scheme +(id, total, user_id, sale_timestamp, region) + +Order large sales schedule: +(id, total, user_id, sale_timestamp) + +``` + +Cluster situation: The average import speed of current user cluster is about 5M/s + ++ Step1: Determine whether you want to modify the default timeout of Insert Into + + ``` + Calculate the approximate time of import + 10G / 5M /s = 2000s + + Modify FE configuration + insert_load_default_timeout_second = 2000 + ``` + ++ Step2: Create Import Tasks + + Since users want to ETL data from a table and import it into the target table, they should use the Insert in query\\stmt mode to import it. + + ``` + INSERT INTO bj_store_sales SELECT id, total, user_id, sale_timestamp FROM store_sales where region = "bj"; + ``` + +## Common Questions + +* View the wrong line + + Because Insert Into can't control the error rate, it can only tolerate or ignore the error data completely by `enable_insert_strict`. So if `enable_insert_strict` is set to true, Insert Into may fail. If `enable_insert_strict` is set to false, then only some qualified data may be imported. However, in either case, Doris is currently unable to provide the ability to view substandard data rows. Therefore, the user cannot view the specific import error through the Insert Into statement. + + The causes of errors are usually: source data column length exceeds destination data column length, column type mismatch, partition mismatch, column order mismatch, etc. When it's still impossible to check for problems. At present, it is only recommended that the SELECT command in the Insert Into statement be run to export the data to a file, and then import the file through Stream load to see the specific errors. diff --git a/docs/documentation/en/administrator-guide/load-data/load-manual_EN.md b/docs/documentation/en/administrator-guide/load-data/load-manual_EN.md new file mode 100644 index 00000000000000..4c7f43de5cf2a6 --- /dev/null +++ b/docs/documentation/en/administrator-guide/load-data/load-manual_EN.md @@ -0,0 +1,172 @@ +# Introduction Overview + +The Load function is to import the user's raw data into Doris. After successful import, users can query data through Mysql client. + +Doris supports multiple imports. It is recommended to read this document in full first, and then to view the detailed documents of their respective import modes according to the selected import mode. + +## Basic concepts + +1. Frontend (FE): Metadata and scheduling nodes of Doris system. In the import process, it is mainly responsible for the generation of import planning and the scheduling of import tasks. +2. Backend (BE): The computing and storage nodes of Doris system. In the import process, it is mainly responsible for ETL and storage of data. +3. Broker: Broker is an independent stateless process. It encapsulates the file system interface and provides Doris with the ability to read files in the remote storage system. +4. Load job: The import job reads the source data submitted by the user, transforms or cleans it, and imports the data into the Doris system. After the import is completed, the data can be queried by the user. +5. Label: All import jobs have a Label. Label is unique in a database and can be specified by the user or automatically generated by the system to identify an import job. The same Label can only be used for a successful import job. +6. MySQL Protocol/HTTP Protocol: Doris provides two kinds of access protocol interfaces. MySQL protocol and HTTP protocol. Part of the import mode uses MySQL protocol interface to submit jobs, and part of the import mode uses HTTP protocol interface to submit jobs. + +## Load mode + +To adapt to different data import requirements, Doris system provides five different import methods. Each import mode supports different data sources and has different usage modes (asynchronous, synchronous). + +All import methods support CSV data format. Broker load also supports parquet data format. + +For instructions on each import mode, please refer to the operation manual for a single import mode. + +* Broker load + + Access and read external data sources (such as HDFS) through the Broker process and import them into Doris. The user submits the import job through Mysql protocol and executes it asynchronously. View the import results through the `SHOW LOAD` command. + +* Stream load + + Users submit requests through HTTP protocol and create imports with raw data. It is mainly used to quickly import data from local files or data streams into Doris. The Import command returns the import result synchronously. + +* Insert + + Similar to the Insert statement in MySQL, Doris provides `INSERT INTO tbl SELECT ...;`reading data from Doris's table and importing it into another table. Or by `INSERT INTO tbl VALUES (...);` Insert a single piece of data. + +* Multi load + + Users submit multiple import jobs through HTTP protocol. Multi Load guarantees the atomic validity of multiple import jobs. + +* Routine load + + Users submit routine import jobs through MySQL protocol, generate a resident thread, read and import data from data sources (such as Kafka) uninterruptedly into Doris. + +## Basic Principles + +### Import execution process + + +``` ++---------+ +---------+ +----------+ +-----------+ +| | | | | | | | +| PENDING +----->+ ETL +----->+ LOADING +----->+ FINISHED | +| | | | | | | | ++---------+ +---+-----+ +----+-----+ +-----------+ + | | | + | | | + | | | + | | | +-----------+ + | | | | | + +---------------+-----------------+------------> CANCELLED | + | | + +-----------+ + +``` + +As shown above, an import operation mainly goes through the four stages above. + ++ PENDING (not required): Only Broker Load has this stage. Broker Load is submitted by the user and stays at this stage for a short time until it is scheduled by Scheduler in FE. Scheduler's schedule interval is 5 seconds. + ++ ETL (not required): This stage exists before version 0.10.0 (included), mainly for transforming raw data according to user declaration and filtering raw data that does not meet the requirements. In the version after 0.10.0, the ETL phase no longer exists, and the work of data transformation is merged into the LOADING phase. + ++ LOADING: This stage is mainly used to push the transformed data into the corresponding BE storage before version 0.10.0 (including). In the version after 0.10.0, the data is cleaned and changed first, and then sent to BE storage. When all imported data are imported, the process of waiting for validity enters, and Load job is still LOADING. + ++ FINISHED: After all the data involved in Load Job takes effect, the state of Load Job becomes FINISHED. Data imported after FINISHED can be queried. + ++ CANCELLED: Before job FINISH, jobs may be cancelled and entered the CANCELLED state. For example, the user manually cancels, or imports errors. CANCELLED is also the final state of Load Job and cannot be executed again. + +In the above stage, except for the PENDING to LOADING stage, which is scheduled by Scheduler, the transfer before other stages is implemented by callback mechanism. + +### Label and Atomicity + +Doris provides atomic assurance for all import methods. It ensures that the data in the same import operation is valid for atoms. There will be no case of importing only part of the data. + +At the same time, each import job has a Label designated by the user or automatically generated by the system. Label is unique in a database. When an import job corresponding to a Label is successful enough, the import job cannot be submitted repeatedly using the Label. If the import job corresponding to Label fails, it can be reused. + +Users can use Label mechanism to ensure that the data corresponding to Label can be imported at most once, at the level of At-Most-One semantics. + + +## Synchronization and asynchronization + +Doris's current import methods fall into two categories, synchronous and asynchronous. If an external program accesses Doris's import function, it is necessary to determine which type of import mode is used and then determine the access logic. + +### Synchronization + +Synchronized import means that users create import tasks, Doris executes import synchronously, and returns user import results after execution. Users can directly determine whether the import is successful or not by synchronizing the results returned by creating the import task command. + +The import methods of synchronous type are **Stream load**, **Insert**. + +Operation steps: + +1. Users (external systems) create import tasks. +2. Doris returns the import result. +3. The user (external system) judges the import result and can submit the import task again if it fails. + +*Note: If the user returns the import synchronously and the amount of data imported is too large, it may take a long time to create the import request to return the result.* + +### Asynchronism +Asynchronous import means that after the user creates the import task, Doris directly returns to the successful creation. **Successful creation does not mean that data has been imported into**. The import task will be executed asynchronously. After successful creation, users need to send a polling command to check the status of the import job. If the creation fails, you can judge whether it needs to be created again based on the failure information. + +The ways to import asynchronous types are: **Broker load**, **Multi load**. + +Operation steps: + +1. Users (external systems) create import tasks. +2. Doris returns the import creation result. +3. User (external system) judges the result of import creation, success enters 4, failure returns to retry to create import, return to 1. +4. The user (external system) polls to see the import task until the status changes to FINISHED or CANCELLED. + +### Notes +Neither asynchronous nor synchronous import types should be retried endlessly after Doris returns an import failure or an import creation failure. **After a limited number of retries and failures, the external system retains the failure information. Most of the retries fail because of the problem of using method or data itself.** + +## Best Practices + +When users access Doris import, they usually use program access mode to ensure that data is imported into Doris regularly. Below is a brief description of the best practices for program access to Doris. + +1. Choose the appropriate import mode: According to the location of the data source, choose the import mode. For example, if raw data is stored on HDFS, import it using Broker load. +2. Protocol for determining the import mode: If Broker load import mode is selected, external systems need to be able to submit and view import jobs regularly using MySQL protocol. +3. Determine the type of import mode: import mode is synchronous or asynchronous. For example, Broker load is an asynchronous import mode. After submitting the creation import, the external system must call the check import command to determine whether the import is successful or not based on the results of the check import command. +4. Label generation strategy: Label generation strategy needs to be satisfied, and each batch of data is unique and fixed. Doris can then guarantee At-Most-Once. +5. The program itself guarantees At-Least-Once: The external system needs to guarantee its own At-Least-Once, so that Exactly-Once of the import process can be guaranteed. + +## General System Configuration + +The following sections explain several system-level configurations that are common to all imports. + +### FE configuration + +The following configuration belongs to the system configuration of FE, which can be modified by modifying the configuration file ``fe.conf``. + ++ max\_load\_timeout\_second 和 min\_load\_timeout\_second + + The two configurations mean the maximum import timeout time and the minimum import timeout time in seconds. The default maximum timeout time is 3 days and the default minimum timeout time is 1 second. User-defined import timeouts should not exceed this range. This parameter is applicable to all import modes. + ++ desired\_max\_waiting\_jobs + + The maximum number of imported tasks in the waiting queue is 100 by default. New import requests are rejected when the number of imports in the PENDING state (i.e. waiting for execution) in FE exceeds that value. + + This configuration is only valid for asynchronous execution of imports. When the number of import waiting for asynchronous execution exceeds the default value, subsequent creation of import requests will be rejected. + ++ max\_running\_txn\_num\_per\_db + + The implication of this configuration is that the maximum number of imports running in each database (no distinction between import types, uniform counting). When the current database is running more than the maximum number of imports, subsequent imports will not be executed. If the job is imported synchronously, the import will be rejected. If it is an asynchronous import job. The job will wait in the queue. + +### BE configuration + +The following configuration belongs to the BE system configuration, which can be modified by modifying the BE configuration file `be.conf`. + ++ push\_write\_mbytes\_per\_sec + + Writing speed limit for a single Tablet on BE. The default is 10, or 10MB/s. Usually the maximum write speed of BE to a single Tablet is between 10 and 30 MB/s, depending on Schema and the system. This parameter can be adjusted appropriately to control the import speed. + ++ write\_buffer\_size + + The imported data will be written to a memtable on BE, and the memtable will not be written back to disk until it reaches the threshold. The default size is 100MB. Too small threshold may result in a large number of small files on BE. This threshold can be increased appropriately to reduce the number of files. However, excessive thresholds can lead to RPC timeouts, as shown in the configuration instructions below. + ++ tablet\_writer\_rpc\_timeout\_sec + + During the import process, a Batch (1024 rows) RPC timeout is sent. Default 600 seconds. Because the RPC may involve multiple memtable writes, it may cause RPC timeouts, which can be adjusted appropriately to reduce timeout errors (such as `send batch fail`). At the same time, if the `write_buffer_size` configuration is increased, this parameter needs to be adjusted appropriately. + ++ streaming\_load\_rpc\_max\_alive\_time\_sec + + During the import process, Doris opens a Writer for each Tablet to receive and write data. This parameter specifies Writer's waiting timeout time. If Writer does not receive any data at this time, Writer will be destroyed automatically. When the system processing speed is slow, Writer may not receive the next batch of data for a long time, resulting in import error: `Tablet Writer add batch with unknown id`. This configuration can be increased appropriately at this time. The default is 600 seconds. diff --git a/docs/documentation/en/administrator-guide/load-data/routine-load-manual_EN.md b/docs/documentation/en/administrator-guide/load-data/routine-load-manual_EN.md new file mode 100644 index 00000000000000..e6aa10549f2e77 --- /dev/null +++ b/docs/documentation/en/administrator-guide/load-data/routine-load-manual_EN.md @@ -0,0 +1,220 @@ +# Routine Load + +Routine Load provides users with a function to automatically import data from a specified data source. + +This document mainly introduces the realization principle, usage and best practices of this function. + +## Noun Interpretation + +* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. +* BE: Backend, Doris's back-end node. Responsible for query execution and data storage. +* Routine LoadJob: A routine import job submitted by the user. +* Job Scheduler: Import job scheduler routinely for scheduling and splitting a Routine LoadJob into multiple Tasks. +* Task:RoutineLoadJob 被 JobScheduler 根据规则拆分的子任务。 +* Task Scheduler: Task Scheduler. It is used to schedule the execution of Task. + +## Principle + +``` + +---------+ + | Client | + +----+----+ + | ++-----------------------------+ +| FE | | +| +-----------v------------+ | +| | | | +| | Routine Load Job | | +| | | | +| +---+--------+--------+--+ | +| | | | | +| +---v--+ +---v--+ +---v--+ | +| | task | | task | | task | | +| +--+---+ +---+--+ +---+--+ | +| | | | | ++-----------------------------+ + | | | + v v v + +---+--+ +--+---+ ++-----+ + | BE | | BE | | BE | + +------+ +------+ +------+ + +``` + +As shown above, Client submits a routine import job to FE. + +FE splits an import job into several Tasks through Job Scheduler. Each Task is responsible for importing a specified portion of the data. Task is assigned by Task Scheduler to execute on the specified BE. + +On BE, a Task is considered a common import task, which is imported through the import mechanism of Stream Load. After importing, report to FE. + +Job Scheduler in FE continues to generate subsequent new Tasks based on the results reported, or retries failed Tasks. + +The whole routine import job completes the data uninterrupted import by continuously generating new Tasks. + +## Kafka routine load + +Currently we only support routine imports from Kafka systems. This section details Kafka's routine introduction usage and best practices. + +### Use restrictions + +1. Supporting unauthenticated Kafka access and the Kafka cluster authenticated by SSL. +2. The supported message format is CSV text format. Each message is a line, and line end **does not contain** newline characters. +3. Only Kafka version 0.10.0.0 (including) is supported. + +### Create routine import tasks + +After creating a detailed grammar for routine import tasks, you can connect to Doris and execute `HELP CREATE ROUTINE LOAD'; `Check grammar help. Here is a detailed description of the creation of the job note. + +* columns_mapping + + `colum_mapping` is mainly used to specify table structure and column mapping relationships in message, as well as transformation of some columns. If not specified, Doris defaults that columns in message and columns in table structure correspond one by one in order. Although under normal circumstances, if the source data is exactly one-to-one correspondence, it is not specified that normal data import can also be carried out. However, we strongly recommend that users **explicitly specify column mapping relationships**. In this way, when the table structure changes (such as adding a nullable column) or the source file changes (such as adding a column), the import task can continue. Otherwise, when the above changes occur, the import will report an error because the column mapping relationship no longer corresponds one to one. + + In `columns_mapping`, we can also use some built-in functions to convert columns. But you need to pay attention to the actual column types corresponding to the function parameters. Examples are given to illustrate: + + Assume that the user needs to import a table containing only `k1` columns of `int` type. And the null value in the source file needs to be converted to 0. This function can be implemented through the `ifnull` function. The correct way to use it is as follows: + + `COLUMNS (xx, k1=ifnull(xx, "3"))` + + Note that we use `3` instead of `3`, although the type of `k1` is `int`. Because the column types in the source data are `varchar` for the import task, the `xx` virtual column types here are also `varchar`. So we need to use `3` for matching, otherwise `ifnull` function can not find the function signature with parameter `(varchar, int)`, and there will be an error. + + Another example is to assume that the user needs to import a table containing only `k1` columns of `int` type. And the corresponding columns in the source file need to be processed: the negative number is converted to positive, and the positive number is multiplied by 100. This function can be implemented by the `case when` function, which should be correctly written as follows: + + `COLUMNS (xx, case when xx < 0 than cast(-xx as varchar) else cast((xx + '100') as varchar) end)` + + Note that we need to convert all the parameters in `case when'to varchar in order to get the desired results. + +* where_predicates + + The column type in `where_predicates` is already the actual column type, so there is no need to force the conversion to varchar type as `columns_mapping'. Write according to the actual column type. + +* desired\_concurrent\_number + + `desired_concurrent_number` is used to specify the expected concurrency of a routine job. That is, how many tasks are executed simultaneously for a job at most. For Kafka import, the current actual concurrency calculation is as follows: + + ``` + Min(partition num, desired_concurrent_number, alive_backend_num, Config.max_routine_load_task_concurrrent_num) + ``` + + Where `Config.max_routine_load_task_concurrent_num` is a default maximum concurrency limit for the system. This is a FE configuration, which can be adjusted by reconfiguration. The default is 5. + + The partition num refers to the number of partitions subscribed to Kafka topic. ` alive_backend_num` is the current normal number of BE nodes. + +* max\_batch\_interval/max\_batch\_rows/max\_batch\_size + + These three parameters are used to control the execution time of a single task. If any of these thresholds is reached, the task ends. Where `max_batch_rows` is used to record the number of data rows read from Kafka. ` Max_batch_size` is used to record the amount of data read from Kafka in bytes. At present, the consumption rate of a task is about 5-10MB/s. + + Assuming a row of data is 500B, the user wants a task every 100MB or 10 seconds. The expected processing time of 100MB is 10-20 seconds, and the corresponding number of rows is about 200,000 rows. A reasonable configuration is: + + ``` + "max_batch_interval" = "10", + "max_batch_rows" = "200000", + "max_batch_size" = "104857600" + ``` + + The parameters in the examples above are also default parameters for these configurations. + +* max\_error\_number + + ` max_error_number` is used to control the error rate. When the error rate is too high, the job will be automatically suspended. Because the whole job is oriented to data flow, and because of the boundless nature of data flow, we can not calculate the error rate through an error ratio like other import tasks. Therefore, a new computing method is provided to calculate the error ratio in the data stream. + + We set up a sampling window. The size of the window is `max_batch_rows * 10`. In a sampling window, if the number of error lines exceeds `max_error_number`, the job is suspended. If it does not exceed, the next window starts to recalculate the number of erroneous rows. + + Let's assume `max_batch_rows` is 200,000, and the window size is 2,000,000. Let `max_error_number` be 20,000, that is, 20,000 erroneous actions per 2,000,000 lines expected by the user. That is, the error rate is 1%. But because not every batch of tasks consumes 200,000 rows, the actual range of windows is [2000000, 2200000], that is, 10% statistical error. + + Error rows do not include rows filtered through where conditions. But it includes rows that do not have partitions in the corresponding Doris table. + +* data\_source\_properties + + ` Data_source_properties` can specify consumption specific Kakfa partition. If not specified, all partitions of the topic subscribed are consumed by default. + + Note that when partition is explicitly specified, the import job will no longer dynamically detect changes in Kafka partition. If not specified, the consumption partition will be dynamically adjusted according to the change of Kafka partition. + +#### Accessing the Kafka Cluster of SSL Authentication + +Accessing the Kafka cluster for SSL authentication requires the user to provide a certificate file (ca.pem) to authenticate the Kafka Broker public key. If the Kafka cluster opens client authentication at the same time, the client's public key (client. pem), key file (client. key), and key password are also required. The required files need to be uploaded to Doris by the `CREAE FILE` command,** and the catalog name is `kafka`**. The specific help of the `CREATE FILE` command can be found in `HELP CREATE FILE;'. An example is given here: + +1. Upload files + + ``` + CREATE FILE "ca.pem" PROPERTIES("url" = "https://example_url/kafka-key/ca.pem", "catalog" = "kafka"); + CREATE FILE "client.key" PROPERTIES("url" = "https://example_urlkafka-key/client.key", "catalog" = "kafka"); + CREATE FILE "client.pem" PROPERTIES("url" = "https://example_url/kafka-key/client.pem", "catalog" = "kafka"); + ``` + +2. Create routine import jobs + + ``` + CREATE ROUTINE LOAD db1.job1 on tbl1 + PROPERTIES + ( + "desired_concurrent_number"="1" + ) + FROM KAFKA + ( + "kafka_broker_list"= "broker1:9091,broker2:9091", + "kafka_topic" = "my_topic", + "property.security.protocol" = "ssl", + "property.ssl.ca.location" = "FILE:ca.pem", + "property.ssl.certificate.location" = "FILE:client.pem", + "property.ssl.key.location" = "FILE:client.key", + "property.ssl.key.password" = "abcdefg" + ); + ``` + +> Doris accesses the Kafka cluster through Kafka's C++ API `librdkafka`. The parameters supported by `librdkafka` can be consulted +> +> `https://github.com /edenhill /librdkafka /blob /master /CONFIGURATION.md ` + + +### View the status of import jobs + +Specific commands and examples for viewing the status of ** job ** can be viewed through `HELP SHOW ROUTINE LOAD;`commands. + +Specific commands and examples for viewing the running status of ** tasks ** can be viewed through `HELP SHOW ROUTINE LOAD TASK;`commands. + +You can only view tasks that are currently running. Tasks that have ended or not started cannot be viewed. + +### Job Control + +用户可以通过 `STOP/PAUSE/RESUME` 三个命令来控制作业的停止,暂停和重启。可以通过 `HELP STOP ROUTINE LOAD;`, `HELP PAUSE ROUTINE LOAD;` 以及 `HELP RESUME ROUTINE LOAD;` 三个命令查看帮助和示例。 + +## Other notes + +1. The relationship between routine import operations and ALTER TABLE operations + + * Routine imports do not block SCHEMA CHANGE and ROLLUP operations. But note that if SCHEMA CHANGE is completed, the column mapping relationship does not match, which will lead to a sharp increase in error data for jobs and eventually to job pause. It is recommended that such problems be reduced by explicitly specifying column mapping relationships in routine import operations and by adding Nullable columns or columns with Default values. + * Deleting the Partition of a table may cause the imported data to fail to find the corresponding Partition and the job to pause. + +2. Relations between routine import jobs and other import jobs (LOAD, DELETE, INSERT) + + * Routine imports do not conflict with other LOAD and INSERT operations. + * When performing a DELETE operation, the corresponding table partition cannot have any import tasks being performed. Therefore, before performing the DELETE operation, it may be necessary to suspend the routine import operation and wait for the task that has been issued to complete before DELETE can be executed. + +3. The relationship between routine import jobs and DROP DATABASE/TABLE operations + + When the corresponding database or table is deleted from the routine import, the job will automatically CANCEL. + +4. The relationship between Kafka type routine import and Kafka topic + + When the `kafka_topic'of the user creating the routine import declaration does not exist in the Kafka cluster. + + * If the broker of user Kafka cluster sets `auto.create.topics.enable = true`, then `kafka_topic` will be created automatically first. The number of partitions created automatically is determined by the broker configuration `num.partitions` in **user's Kafka cluster**. Routine jobs will read the topic data regularly and continuously. + * If the broker of the user Kafka cluster sets `auto.create.topics.enable = false`, topic will not be created automatically, and routine jobs will be suspended before any data is read, in the state of `PAUSED`. + + So, if users want to be created automatically by routine jobs when Kafka topic does not exist, they just need to set `auto.create.topics.enable = true` to the broker in **user's Kafka cluster**. + +## Relevant parameters + +Some system configuration parameters affect the use of routine imports. + +1. max\_routine\_load\_task\_concurrent\_num + + FE configuration item, default 5, can be modified at run time. This parameter limits the maximum number of subtasks concurrently imported routinely. It is recommended that the default values be maintained. Setting too large may lead to too many concurrent tasks and occupy cluster resources. + +2. max\_consumer\_num\_per\_group + + BE configuration item, default 3. This parameter represents the maximum number of consumers generated in a sub-task for data consumption. For Kafka data sources, a consumer may consume one or more Kafka partitions. Assuming that a task needs to consume six Kafka partitions, three consumers will be generated, and each consumer consumes two partitions. If there are only two partitions, only two consumers will be generated, and each consumer consumes one partition. + +3. push\_write\_mbytes\_per\_sec + + BE configuration item. The default is 10, or 10MB/s. This parameter is a general import parameter and is not limited to routine import operations. This parameter limits the speed at which imported data is written to disk. For high performance storage devices such as SSD, this speed limit can be increased appropriately. diff --git a/docs/documentation/en/administrator-guide/load-data/stream-load-manual_EN.md b/docs/documentation/en/administrator-guide/load-data/stream-load-manual_EN.md new file mode 100644 index 00000000000000..ebd692adb95903 --- /dev/null +++ b/docs/documentation/en/administrator-guide/load-data/stream-load-manual_EN.md @@ -0,0 +1,259 @@ +# Stream load + +Stream load is a synchronous way of importing. Users import local files or data streams into Doris by sending HTTP protocol requests. Stream load synchronously executes the import and returns the import result. Users can directly determine whether the import is successful by the return body of the request. + +Stream load is mainly suitable for importing local files or data from data streams through procedures. + +## Basic Principles + +The following figure shows the main flow of Stream load, omitting some import details. + +``` + ^ + + | | + | | 1A. User submit load to FE + | | + | +--v-----------+ + | | FE | +5. Return result to user | +--+-----------+ + | | + | | 2. Redirect to BE + | | + | +--v-----------+ + +---+Coordinator BE| 1B. User submit load to BE + +-+-----+----+-+ + | | | + +-----+ | +-----+ + | | | 3. Distrbute data + | | | + +-v-+ +-v-+ +-v-+ + |BE | |BE | |BE | + +---+ +---+ +---+ +``` + +In Stream load, Doris selects a node as the Coordinator node. This node is responsible for receiving data and distributing data to other data nodes. + +Users submit import commands through HTTP protocol. If submitted to FE, FE forwards the request to a BE via the HTTP redirect instruction. Users can also submit import commands directly to a specified BE. + +The final result of the import is returned to the user by Coordinator BE. + +## Basic operations +### Create a Load + +Stream load submits and transfers data through HTTP protocol. Here, the `curl` command shows how to submit an import. + +Users can also operate through other HTTP clients. + +``` +curl --location-trusted -u user:passwd [-H ""...] -T data.file -XPUT http://fe_host:http_port/api/{db}/{table}/_stream_load + +The following attributes are supported in Header: +label, column_separator, columns, where, max_filter_ratio, partitions +The format is: - H "key1: value1" +``` + +Examples: + +``` +curl --location-trusted -u root -T date -H "label:123" http://abc.com:8030/api/test/date/_stream_load +``` +The detailed syntax for creating imports helps to execute ``HELP STREAM LOAD`` view. The following section focuses on the significance of creating some parameters of Stream load. + +#### Signature parameters + ++ user/passwd + + Stream load uses the HTTP protocol to create the imported protocol and signs it through the Basic Access authentication. The Doris system verifies user identity and import permissions based on signatures. + +#### Import Task Parameters + +Stream load uses HTTP protocol, so all parameters related to import tasks are set in the header. The significance of some parameters of the import task parameters of Stream load is mainly introduced below. + ++ label + + Identity of import task. Each import task has a unique label inside a single database. Label is a user-defined name in the import command. With this label, users can view the execution of the corresponding import task. + + Another function of label is to prevent users from importing the same data repeatedly. **It is strongly recommended that users use the same label for the same batch of data. This way, repeated requests for the same batch of data will only be accepted once, guaranteeing at-Most-Once** + + When the corresponding import operation state of label is CANCELLED, the label can be used again. + ++ max\_filter\_ratio + + The maximum tolerance rate of the import task is 0 by default, and the range of values is 0-1. When the import error rate exceeds this value, the import fails. + + If the user wishes to ignore the wrong row, the import can be successful by setting this parameter greater than 0. + + The calculation formula is as follows: + + ``` (dpp.abnorm.ALL / (dpp.abnorm.ALL + dpp.norm.ALL ) ) > max_filter_ratio ``` + + ``` dpp.abnorm.ALL``` denotes the number of rows whose data quality is not up to standard. Such as type mismatch, column mismatch, length mismatch and so on. + + ``` dpp.norm.ALL ``` refers to the number of correct data in the import process. The correct amount of data for the import task can be queried by the ``SHOW LOAD` command. + +The number of rows in the original file = `dpp.abnorm.ALL + dpp.norm.ALL` + ++ where + + Import the filter conditions specified by the task. Stream load supports filtering of where statements specified for raw data. The filtered data will not be imported or participated in the calculation of filter ratio, but will be counted as `num_rows_unselected`. + ++ partition + + Partition information for tables to be imported will not be imported if the data to be imported does not belong to the specified Partition. These data will be included in `dpp.abnorm.ALL`. + ++ columns + + The function transformation configuration of data to be imported includes the sequence change of columns and the expression transformation, in which the expression transformation method is consistent with the query statement. + + ``` + Examples of column order transformation: There are two columns of original data, and there are also two columns (c1, c2) in the table at present. But the first column of the original file corresponds to the C2 column of the target table, while the second column of the original file corresponds to the C1 column of the target table, which is written as follows: + columns: c2,c1 + + Example of expression transformation: There are two columns in the original file and two columns in the target table (c1, c2). However, both columns in the original file need to be transformed by functions to correspond to the two columns in the target table. + columns: tmp_c1, tmp_c2, c1 = year(tmp_c1), c2 = mouth(tmp_c2) + Tmp_* is a placeholder, representing two original columns in the original file. + ``` + +### Return results + +Since Stream load is a synchronous import method, the result of the import is directly returned to the user by creating the return value of the import. + +Examples: + +``` +{ + "TxnId": 1003, + "Label": "b6f3bc78-0d2c-45d9-9e4c-faa0a0149bee", + "Status": "Success", + "Message": "OK", + "NumberTotalRows": 1000000, + "NumberLoadedRows": 1000000, + "NumberFilteredRows": 1, + "NumberUnselectedRows": 0, + "LoadBytes": 40888898, + "LoadTimeMs": 2144, + "ErrorURL": "http://192.168.1.1:8042/api/_load_error_log?file=__shard_0/error_log_insert_stmt_db18266d4d9b4ee5-abb00ddd64bdf005_db18266d4d9b4ee5_abb00ddd64bdf005" +} +``` + +The following main explanations are given for the Stream load import result parameters: + ++ TxnId: The imported transaction ID. Users do not perceive. + ++ Label: Import Label. User specified or automatically generated by the system. + ++ Status: Import completion status. + + "Success": Indicates successful import. + + "Publish Timeout": This state also indicates that the import has been completed, except that the data may be delayed and visible without retrying. + + "Label Already Exists":Label 重复,需更换 Label。 + + "Fail": Import failed. + ++ Message: Import error messages. + ++ NumberTotalRows: Number of rows imported for total processing. + ++ NumberLoadedRows: Number of rows successfully imported. + ++ NumberFilteredRows: Number of rows that do not qualify for data quality. + ++ NumberUnselectedRows: Number of rows filtered by where condition. + ++ LoadBytes: Number of bytes imported. + ++ LoadTimeMs: Import completion time. Unit milliseconds. + ++ ErrorURL: If you have data quality problems, visit this URL to see specific error lines. + +> Note: Since Stream load is a synchronous import mode, import information will not be recorded in Doris system. Users cannot see Stream load asynchronously by looking at import commands. You need to listen for the return value of the create import request to get the import result. + +### Cancel Load + +Users can't cancel Stream load manually. Stream load will be cancelled automatically by the system after a timeout or import error. + +## Relevant System Configuration + +### FE configuration + ++ stream\_load\_default\_timeout\_second + + The timeout time of the import task (in seconds) will be cancelled by the system if the import task is not completed within the set timeout time, and will become CANCELLED. + + At present, Stream load does not support custom import timeout time. All Stream load import timeout time is uniform. The default timeout time is 300 seconds. If the imported source file can no longer complete the import within the specified time, the FE parameter ```stream_load_default_timeout_second``` needs to be adjusted. + +### BE configuration + ++ streaming\_load\_max\_mb + + The maximum import size of Stream load is 10G by default, in MB. If the user's original file exceeds this value, the BE parameter ```streaming_load_max_mb``` needs to be adjusted. + +## Best Practices + +### Application scenarios + +The most appropriate scenario for using Stream load is that the original file is in memory or on disk. Secondly, since Stream load is a synchronous import method, users can also use this import if they want to obtain the import results in a synchronous manner. + +### Data volume + +Since Stream load is based on the BE initiative to import and distribute data, the recommended amount of imported data is between 1G and 10G. Since the default maximum Stream load import data volume is 10G, the configuration of BE ```streaming_load_max_mb``` needs to be modified if files exceeding 10G are to be imported. + +``` +For example, the size of the file to be imported is 15G +Modify the BE configuration streaming_load_max_mb to 16000 +``` + +Stream load default timeout is 300 seconds, according to Doris currently the largest import speed limit, about more than 3G files need to modify the import task default timeout. + +``` +Import Task Timeout = Import Data Volume / 10M / s (Specific Average Import Speed Requires Users to Calculate Based on Their Cluster Conditions) +For example, import a 10G file +Timeout = 1000s -31561;. 20110G / 10M /s +``` + +### Complete examples +Data situation: In the local disk path / home / store_sales of the sending and importing requester, the imported data is about 15G, and it is hoped to be imported into the table store\_sales of the database bj_sales. + +Cluster situation: The concurrency of Stream load is not affected by cluster size. + ++ Step 1: Does the import file size exceed the default maximum import size of 10G + + ``` + BE conf + streaming_load_max_mb = 16000 + ``` ++ Step 2: Calculate whether the approximate import time exceeds the default timeout value + + ``` + Import time 15000/10 = 1500s + Over the default timeout time, you need to modify the FE configuration + stream_load_default_timeout_second = 1500 + ``` + ++ Step 3: Create Import Tasks + + ``` + curl --location-trusted -u user:password -T /home/store_sales -H "label:abc" http://abc.com:8000/api/bj_sales/store_sales/_stream_load + ``` + +## Common Questions + +* Label Already Exists + + The Label repeat checking steps of Stream load are as follows: + + 1. Is there an import Label conflict that already exists with other import methods? + + Because imported Label in Doris system does not distinguish between import methods, there is a problem that other import methods use the same Label. + + Through ``SHOW LOAD WHERE LABEL = "xxx"'``, where XXX is a duplicate Label string, see if there is already a Label imported by FINISHED that is the same as the Label created by the user. + + 2. Are Stream loads submitted repeatedly for the same job? + + Since Stream load is an HTTP protocol submission creation import task, HTTP Clients in various languages usually have their own request retry logic. After receiving the first request, the Doris system has started to operate Stream load, but because the result is not returned to the Client side in time, the Client side will retry to create the request. At this point, the Doris system is already operating on the first request, so the second request will be reported to Label Already Exists. + + To sort out the possible methods mentioned above: Search FE Master's log with Label to see if there are two ``redirect load action to destination = ``redirect load action to destination'cases in the same Label. If so, the request is submitted repeatedly by the Client side. + + It is suggested that the user calculate the approximate import time according to the data quantity of the current request, and change the request time-out time of the Client end according to the import time-out time, so as to avoid the request being submitted by the Client end many times. diff --git a/docs/documentation/en/administrator-guide/operation/index.rst b/docs/documentation/en/administrator-guide/operation/index.rst new file mode 100644 index 00000000000000..978ac3d83f8503 --- /dev/null +++ b/docs/documentation/en/administrator-guide/operation/index.rst @@ -0,0 +1,9 @@ +============= +运维操作 +============= + +.. toctree:: + :maxdepth: 2 + :glob: + + * diff --git a/docs/documentation/en/administrator-guide/operation/metadata-operation_EN.md b/docs/documentation/en/administrator-guide/operation/metadata-operation_EN.md new file mode 100644 index 00000000000000..443a5cc475501b --- /dev/null +++ b/docs/documentation/en/administrator-guide/operation/metadata-operation_EN.md @@ -0,0 +1,297 @@ +# Metadata Operations and Maintenance + +This document focuses on how to manage Doris metadata in a real production environment. It includes the proposed deployment of FE nodes, some commonly used operational methods, and common error resolution methods. + +For the time being, read the [Doris metadata design document](../../internal/metadata-design.md) to understand how Doris metadata works. + +## Important tips + +* Current metadata design is not backward compatible. That is, if the new version has a new metadata structure change (you can see whether there is a new VERSION in the `FeMetaVersion. java'file in the FE code), it is usually impossible to roll back to the old version after upgrading to the new version. Therefore, before upgrading FE, be sure to test metadata compatibility according to the operations in the [Upgrade Document](../../installing/upgrade. md). + +## Metadata catalog structure + +Let's assume that the path of `meta_dir` specified in fe.conf is `path/to/palo-meta`. In a normal Doris cluster, the directory structure of metadata should be as follows: + +``` +/path/to/palo-meta/ + |-- bdb/ + | |-- 00000000.jdb + | |-- je.config.csv + | |-- je.info.0 + | |-- je.info.0.lck + | |-- je.lck + | `-- je.stat.csv + `-- image/ + |-- ROLE + |-- VERSION + `-- image.xxxx +``` + +1. bdb + + We use [bdbje] (https://www.oracle.com/technetwork/database/berkeleydb/overview/index-093405.html) as a distributed kV system to store metadata journal. This BDB directory is equivalent to the "data directory" of bdbje. + + The `.jdb` suffix is the data file of bdbje. These data files will increase with the increasing number of metadata journals. When Doris regularly completes the image, the old log is deleted. So normally, the total size of these data files varies from several MB to several GB (depending on how Doris is used, such as import frequency). When the total size of the data file is larger than 10GB, you may need to wonder whether the image failed or the historical journals that failed to distribute the image could not be deleted. + + ` je.info.0 ` is the running log of bdbje. The time in this log is UTC + 0 time zone. We may fix this in a later version. From this log, you can also see how some bdbje works. + +2. image directory + + The image directory is used to store metadata mirrors generated regularly by Doris. Usually, you will see a `image.xxxxx` mirror file. Where `xxxxx` is a number. This number indicates that the image contains all metadata journal before `xxxx`. And the generation time of this file (viewed through `ls -al`) is usually the generation time of the mirror. + + You may also see a `image.ckpt` file. This is a metadata mirror being generated. The `du -sh` command should show that the file size is increasing, indicating that the mirror content is being written to the file. When the mirror is written, it automatically renames itself to a new `image.xxxxx` and replaces the old image file. + + Only FE with a Master role will actively generate image files on a regular basis. After each generation, FE is pushed to other non-Master roles. When it is confirmed that all other FEs have received this image, Master FE deletes the metadata journal in bdbje. Therefore, if image generation fails or image push fails to other FEs, data in bdbje will accumulate. + + `ROLE` file records the type of FE (FOLLOWER or OBSERVER), which is a text file. + + `VERSION` file records the cluster ID of the Doris cluster and the token used to access authentication between nodes, which is also a text file. + + `ROLE` file and `VERSION` file may only exist at the same time, or they may not exist at the same time (e.g. at the first startup). + +## Basic operations + +### Start single node FE + +Single node FE is the most basic deployment mode. A complete Doris cluster requires at least one FE node. When there is only one FE node, the type of the node is Follower and the role is Master. + +1. First start-up + + 1. Suppose the path of `meta_dir` specified in fe.conf is `path/to/palo-meta`. + 2. Ensure that `path/to/palo-meta` already exists, that the permissions are correct and that the directory is empty. + 3. Start directly through `sh bin/start_fe.sh`. + 4. After booting, you should be able to see the following log in fe.log: + + * Palo FE starting... + * image does not exist: /path/to/palo-meta/image/image.0 + * transfer from INIT to UNKNOWN + * transfer from UNKNOWN to MASTER + * the very first time to open bdb, dbname is 1 + * start fencing, epoch number is 1 + * finish replay in xxx msec + * QE service start + * thrift server started + + The above logs are not necessarily strictly in this order, but they are basically similar. + + 5. The first start-up of a single-node FE usually does not encounter problems. If you haven't seen the above logs, generally speaking, you haven't followed the document steps carefully, please read the relevant wiki carefully. + +2. Restart + + 1. Stopped FE nodes can be restarted by using `sh bin/start_fe.sh`. + 2. After restarting, you should be able to see the following log in fe.log: + + * Palo FE starting... + * finished to get cluster id: xxxx, role: FOLLOWER and node name: xxxx + * If no image has been generated before reboot, you will see: + * image does not exist: /path/to/palo-meta/image/image.0 + + * If an image is generated before the restart, you will see: + * start load image from /path/to/palo-meta/image/image.xxx. is ckpt: false + * finished load image in xxx ms + + * transfer from INIT to UNKNOWN + * replayed journal id is xxxx, replay to journal id is yyyy + * transfer from UNKNOWN to MASTER + * finish replay in xxx msec + * master finish replay journal, can write now. + * begin to generate new image: image.xxxx + * start save image to /path/to/palo-meta/image/image.ckpt. is ckpt: true + * finished save image /path/to/palo-meta/image/image.ckpt in xxx ms. checksum is xxxx + * push image.xxx to other nodes. totally xx nodes, push successed xx nodes + * QE service start + * thrift server started + + The above logs are not necessarily strictly in this order, but they are basically similar. + +3. Common problems + + For the deployment of single-node FE, start-stop usually does not encounter any problems. If you have any questions, please refer to the relevant Wiki and check your operation steps carefully. + +### Add FE + +Adding FE processes is described in detail in the [Deployment and Upgrade Documents] (https://github.com/apache/incubator-doris/wiki/Doris-Deploy-%26-Upgrade) and will not be repeated. Here are some points for attention, as well as common problems. + +1. Notes + + * Before adding a new FE, make sure that the current Master FE runs properly (connection is normal, JVM is normal, image generation is normal, bdbje data directory is too large, etc.) + * The first time you start a new FE, you must make sure that the `-helper` parameter is added to point to Master FE. There is no need to add `-helper` when restarting. (If `-helper` is specified, FE will directly ask the helper node for its role. If not, FE will try to obtain information from `ROLE` and `VERSION` files in the `palo-meta/image/` directory. + * The first time you start a new FE, you must make sure that the `meta_dir` of the FE is created, has correct permissions and is empty. + * Starting a new FE and executing the `ALTER SYSTEM ADD FOLLOWER/OBSERVER` statement adds FE to metadata in a sequence that is not required. If a new FE is started first and no statement is executed, the `current node is not added to the group. Please add it first.` in the new FE log. When the statement is executed, it enters the normal process. + * Make sure that after the previous FE is added successfully, the next FE is added. + * 建议直接连接到 MASTER FE 执行 `ALTER SYSTEM ADD FOLLOWER/OBSERVER` 语句。 + +2. Common problems + + 1. this need is DETACHED + + When you first start a FE to be added, if the data in palo-meta/bdb on Master FE is large, you may see the words `this node is DETACHED`. in the FE log to be added. At this point, bdbje is copying data, and you can see that the `bdb/` directory of FE to be added is growing. This process usually takes several minutes (depending on the amount of data in bdbje). Later, there may be some bdbje-related error stack information in fe. log. If `QE service start` and `thrift server start` are displayed in the final log, the start is usually successful. You can try to connect this FE via mysql-client. If these words do not appear, it may be the problem of bdbje replication log timeout. At this point, restarting the FE directly will usually solve the problem. + + 2. Failure to add due to various reasons + + * If OBSERVER is added, because OBSERVER-type FE does not participate in the majority of metadata writing, it can theoretically start and stop at will. Therefore, for the case of adding OBSERVER failure. The process of OBSERVER FE can be killed directly. After clearing the metadata directory of OBSERVER, add the process again. + + * If FOLLOWER is added, because FOLLOWER is mostly written by participating metadata. So it is possible that FOLLOWER has joined the bdbje electoral team. If there are only two FOLLOWER nodes (including MASTER), then stopping one FE may cause another FE to quit because it cannot write most of the time. At this point, we should first delete the newly added FOLLOWER node from the metadata through the `ALTER SYSTEM DROP FOLLOWER` command, then kill the FOLLOWER process, empty the metadata and re-add the process. + + +### Delete FE + +The corresponding type of FE can be deleted by the `ALTER SYSTEM DROP FOLLOWER/OBSERVER` command. The following points should be noted: + +* For OBSERVER type FE, direct DROP is enough, without risk. + +* For FOLLOWER type FE. First, you should make sure that you start deleting an odd number of FOLLOWERs (three or more). + + 1. If the FE of non-MASTER role is deleted, it is recommended to connect to MASTER FE, execute DROP command, and then kill the process. + 2. If you want to delete MASTER FE, first confirm that there are odd FOLLOWER FE and it works properly. Then kill the MASTER FE process first. At this point, a FE will be elected MASTER. After confirming that the remaining FE is working properly, connect to the new MASTER FE and execute the DROP command to delete the old MASTER FE. + +## Advanced Operations + +### Failure recovery + +FE may fail to start bdbje and synchronize between FEs for some reasons. Phenomena include the inability to write metadata, the absence of MASTER, and so on. At this point, we need to manually restore the FE. The general principle of manual recovery of FE is to start a new MASTER through metadata in the current `meta_dir`, and then add other FEs one by one. Please follow the following steps strictly: + +1. First, stop all FE processes and all business access. Make sure that during metadata recovery, external access will not lead to other unexpected problems. + +2. Identify which FE node's metadata is up-to-date: + + * First of all, **be sure to back up all FE's `meta_dir` directories first.** + * Usually, Master FE's metadata is up to date. You can see the suffix of image.xxxx file in the `meta_dir/image` directory. The larger the number, the newer the metadata. + * Usually, by comparing all FOLLOWER FE image files, you can find the latest metadata. + * After that, we use the FE node with the latest metadata to recover. + * If using metadata of OBSERVER node to recover will be more troublesome, it is recommended to choose FOLLOWER node as far as possible. + +3. The following operations are performed on the FE nodes selected in step 2. + + 1. If the node is an OBSERVER, first change the `role=OBSERVER` in the `meta_dir/image/ROLE` file to `role=FOLLOWER`. (Recovery from the OBSERVER node will be more cumbersome, first follow the steps here, followed by a separate description) + 2. Add configuration in fe.conf: `metadata_failure_recovery=true`. + 3. Run `sh bin/start_fe.sh` to start the FE + 4. If normal, the FE will start in the role of MASTER, similar to the description in the previous section `Start a single node FE`. You should see the words `transfer from XXXX to MASTER` in fe.log. + 5. After the start-up is completed, connect to the FE first, and execute some query imports to check whether normal access is possible. If the operation is not normal, it may be wrong. It is recommended to read the above steps carefully and try again with the metadata previously backed up. If not, the problem may be more serious. + 6. If successful, through the `show frontends;` command, you should see all the FEs you added before, and the current FE is master. + 7. Delete the `metadata_failure_recovery=true` configuration item in fe.conf, or set it to `false`, and restart the FE (**Important**). + + + > If you are recovering metadata from an OBSERVER node, after completing the above steps, you will find that the current FE role is OBSERVER, but `IsMaster` appears as `true`. This is because the "OBSERVER" seen here is recorded in Doris's metadata, but whether it is master or not, is recorded in bdbje's metadata. Because we recovered from an OBSERVER node, there was inconsistency. Please take the following steps to fix this problem (we will fix it in a later version): + + > 1. First, all FE nodes except this "OBSERVER" are DROPed out. + > 2. A new FOLLOWER FE is added through the `ADD FOLLOWER` command, assuming that it is on hostA. + > 3. Start a new FE on hostA and join the cluster by `helper`. + > 4. After successful startup, you should see two FEs through the `show frontends;` statement, one is the previous OBSERVER, the other is the newly added FOLLOWER, and the OBSERVER is the master. + > 5. After confirming that the new FOLLOWER is working properly, the new FOLLOWER metadata is used to perform a failure recovery operation again. + > 6. The purpose of the above steps is to manufacture a metadata of FOLLOWER node artificially, and then use this metadata to restart fault recovery. This avoids inconsistencies in recovering metadata from OBSERVER. + + >The meaning of `metadata_failure_recovery = true` is to empty the metadata of `bdbje`. In this way, bdbje will not contact other FEs before, but start as a separate FE. This parameter needs to be set to true only when restoring startup. After recovery, it must be set to false. Otherwise, once restarted, the metadata of bdbje will be emptied again, which will make other FEs unable to work properly. + +4. After the successful execution of step 3, we delete the previous FEs from the metadata by using the `ALTER SYSTEM DROP FOLLOWER/OBSERVER` command and add them again by adding new FEs. + +5. If the above operation is normal, it will be restored. + +### FE type change + +If you need to change the existing FOLLOWER/OBSERVER type FE to OBSERVER/FOLLOWER type, please delete FE in the way described above, and then add the corresponding type FE. + +### FE Migration + +If you need to migrate one FE from the current node to another, there are several scenarios. + +1. FOLLOWER, or OBSERVER migration for non-MASTER nodes + + After adding a new FOLLOWER / OBSERVER directly, delete the old FOLLOWER / OBSERVER. + +2. Single-node MASTER migration + + When there is only one FE, refer to the `Failure Recovery` section. Copy the palo-meta directory of FE to the new node and start the new MASTER in Step 3 of the `Failure Recovery` section + +3. A set of FOLLOWER migrates from one set of nodes to another set of new nodes + + Deploy FE on the new node and add the new node first by adding FOLLOWER. The old nodes can be dropped by DROP one by one. In the process of DROP-by-DROP, MASTER automatically selects the new FOLLOWER node. + +### Replacement of FE port + +FE currently has the following ports + +* Ed_log_port: bdbje's communication port +* http_port: http port, also used to push image +* rpc_port:FE 的 thrift server port +* query_port: Mysql connection port + +1. edit_log_port + + If this port needs to be replaced, it needs to be restored with reference to the operations in the `Failure Recovery` section. Because the port has been persisted into bdbje's own metadata (also recorded in Doris's own metadata), it is necessary to clear bdbje's metadata by setting `metadata_failure_recovery=true`. + +2. http_port + + All FE http_ports must be consistent. So if you want to modify this port, all FEs need to be modified and restarted. Modifying this port will be more complex in the case of multiple FOLLOWER deployments (involving laying eggs and laying hens...), so this operation is not recommended. If necessary, follow the operation in the `Failure Recovery` section directly. + +3. rpc_port + + After modifying the configuration, restart FE directly. Master FE informs BE of the new port through heartbeat. Only this port of Master FE will be used. However, it is still recommended that all FE ports be consistent. + +4. query_port + + After modifying the configuration, restart FE directly. This only affects mysql's connection target. + + +## Best Practices + +The deployment recommendation of FE is described in the Installation and [Deployment Document](../../installing/install-deploy.md). Here are some supplements. + +* **If you don't know the operation logic of FE metadata very well, or you don't have enough experience in the operation and maintenance of FE metadata, we strongly recommend that only one FOLLOWER-type FE be deployed as MASTER in practice, and the other FEs are OBSERVER, which can reduce many complex operation and maintenance problems.** Don't worry too much about the failure of MASTER single point to write metadata. First, if you configure it properly, FE as a java process is very difficult to hang up. Secondly, if the MASTER disk is damaged (the probability is very low), we can also use the metadata on OBSERVER to recover manually through `fault recovery`. + +* The JVM of the FE process must ensure sufficient memory. We **strongly recommend** that FE's JVM memory should be at least 10GB and 32GB to 64GB. And deploy monitoring to monitor JVM memory usage. Because if OOM occurs in FE, metadata writing may fail, resulting in some failures that **cannot recover**! + +* FE nodes should have enough disk space to prevent the excessive metadata from causing insufficient disk space. At the same time, FE logs also take up more than a dozen gigabytes of disk space. + +## Other common problems + +1. fe.log 中一直滚动 `meta out of date. current time: xxx, synchronized time: xxx, has log: xxx, fe type: xxx` + + This is usually because the FE cannot elect Master. For example, if three FOLLOWERs are configured, but only one FOLLOWER is started, this FOLLOWER will cause this problem. Usually, just start the remaining FOLLOWER. If the problem has not been solved after the start-up, manual recovery may be required in accordance with the way in the `Failure Recovery` section. + +2. `Clock delta: xxxx ms. between Feeder: xxxx and this Replica exceeds max permissible delta: xxxx ms.` + + Bdbje requires that clock errors between nodes should not exceed a certain threshold. If exceeded, the node will exit abnormally. The default threshold is 5000ms, which is controlled by FE parameter `max_bdbje_clock_delta_ms', and can be modified as appropriate. But we suggest using NTP and other clock synchronization methods to ensure the clock synchronization of Doris cluster hosts. + + +3. Mirror files in the `image/` directory have not been updated for a long time + + Master FE generates a mirror file by default for every 50,000 metadata journal. In a frequently used cluster, a new image file is usually generated every half to several days. If you find that the image file has not been updated for a long time (for example, more than a week), you can see the reasons in sequence as follows: + + 1. Search for `memory is not enough to do checkpoint. Committed memroy XXXX Bytes, used memory XXXX Bytes. ` in the fe.log of Master FE. If found, it indicates that the current FE's JVM memory is insufficient for image generation (usually we need to reserve half of the FE memory for image generation). Then you need to add JVM memory and restart FE before you can observe. Each time Master FE restarts, a new image is generated directly. This restart method can also be used to actively generate new images. Note that if there are multiple FOLLOWER deployments, then when you restart the current Master FE, another FOLLOWER FE will become MASTER, and subsequent image generation will be the responsibility of the new Master. Therefore, you may need to modify the JVM memory configuration of all FOLLOWER FE. + + 2. Search for `begin to generate new image: image.xxxx` in the fe.log of Master FE. If it is found, then the image is generated. Check the subsequent log of this thread, and if `checkpoint finished save image.xxxx` appears, the image is written successfully. If `Exception when generating new image file` occurs, the generation fails and specific error messages need to be viewed. + + +4. The size of the `bdb/` directory is very large, reaching several Gs or more. + + The BDB directory will remain large for some time after eliminating the error that the new image cannot be generated. Maybe it's because Master FE failed to push image. You can search `push image.XXXX to other nodes. totally XX nodes, push successed YY nodes` in the fe. log of Master FE. If YY is smaller than xx, then some FEs are not pushed successfully. You can see the specific error `Exception when pushing image file.url = xxx` in the fe. log. + + At the same time, you can add the configuration in the FE configuration file: `edit_log_roll_num = xxxx`. This parameter sets the number of metadata journals and makes an image once. The default is 50000. This number can be reduced appropriately to make images more frequent, thus speeding up the deletion of old journals. + +5. FOLLOWER FE hangs up one after another + + Because Doris's metadata adopts the majority writing strategy, that is, a metadata journal must be written to at least a number of FOLLOWER FEs (for example, three FOLLOWERs, two must be written successfully) before it can be considered successful. If the write fails, the FE process exits on its own initiative. So suppose there are three FOLLOWERs: A, B and C. C hangs up first, and then B hangs up, then A will hang up. So as described in the `Best Practices `section, if you don't have extensive experience in metadata operations and maintenance, it's not recommended to deploy multiple FOLLOWERs. + +6. fe.log 中出现 `get exception when try to close previously opened bdb database. ignore it` + + If there is the word `ignore it` behind it, there is usually no need to deal with it. If you are interested, you can search for this error in `BDBEnvironment.java`, and see the annotations. + +7. From `show frontends;` Look, the `Join` of a FE is listed as `true`, but actually the FE is abnormal. + + Through `show frontends;` see the `Join` information. If the column is `true`, it only means that the FE **has joined the** cluster. It does not mean that it still exists normally in the cluster. If `false`, it means that the FE **has never joined the** cluster. + +8. Configuration of FE `master_sync_policy`, `replica_sync_policy`, and `txn_rollback_limit.` + + `master_sync_policy` is used to specify whether fsync (), `replica_sync_policy` is called when Leader FE writes metadata log, and `replica_sync_policy` is used to specify whether other Follower FE calls fsync () when FE HA deploys synchronous metadata. In earlier versions of Oris, these two parameters defaulted to `WRITE_NO_SYNC`, i.e., fsync () was not called. In the latest version of Oris, the default has been changed to `SYNC`, that is, fsync () is called. Calling fsync () significantly reduces the efficiency of metadata disk writing. In some environments, IOPS may drop to several hundred and the latency increases to 2-3ms (but it's still enough for Doris metadata manipulation). Therefore, we recommend the following configuration: + + 1. For a single Follower FE deployment, `master_sync_policy` is set to `SYNC`, which prevents the loss of metadata due to the downtime of the FE system. + 2. For multi-Follower FE deployment, we can set `master_sync_policy` and `replica_sync_policy` to `WRITE_NO_SYNC`, because we think that the probability of simultaneous outage of multiple systems is very low. + + If `master_sync_policy` is set to `WRITE_NO_SYNC` in a single Follower FE deployment, then a FE system outage may occur, resulting in loss of metadata. At this point, if other Observer FE attempts to restart, it may report an error: + + ``` + Node xxx must rollback xx total commits(numPassedDurableCommits of which were durable) to the earliest point indicated by transaction xxxx in order to rejoin the replication group, but the transaction rollback limit of xxx prohibits this. + ``` + +This means that some transactions that have been persisted need to be rolled back, but the number of entries exceeds the upper limit. Here our default upper limit is 100, which can be changed by setting `txn_rollback_limit`. This operation is only used to attempt to start FE normally, but lost metadata cannot be recovered. diff --git a/docs/documentation/en/administrator-guide/operation/monitor-alert_EN.md b/docs/documentation/en/administrator-guide/operation/monitor-alert_EN.md new file mode 100644 index 00000000000000..784ece8d76cfc5 --- /dev/null +++ b/docs/documentation/en/administrator-guide/operation/monitor-alert_EN.md @@ -0,0 +1,283 @@ +# Monitoring and alarming + +This document mainly introduces Doris's monitoring items and how to collect and display them. And how to configure alarm (TODO) + +[Dashborad template click download](https://grafana.com/dashboards/9734/revisions) + +> Note: Before 0.9.0 (excluding), please use revision 1. For version 0.9.x, use revision 2. For version 0.10.x, use revision 3. + +Dashboard templates are updated from time to time. The way to update the template is shown in the last section. + +Welcome to provide better dashboard. + +## Components + +Doris uses [Prometheus] (https://prometheus.io/) and [Grafana] (https://grafana.com/) to collect and display input monitoring items. + +![](../../../../resources/images/dashboard_overview.png) + +1. Prometheus + + Prometheus is an open source system monitoring and alarm suite. It can collect monitored items by Pull or Push and store them in its own time series database. And through the rich multi-dimensional data query language, to meet the different data display needs of users. + +2. Grafana + + Grafana is an open source data analysis and display platform. Support multiple mainstream temporal database sources including Prometheus. Through the corresponding database query statements, the display data is obtained from the data source. With flexible and configurable dashboard, these data can be quickly presented to users in the form of graphs. + +> Note: This document only provides a way to collect and display Doris monitoring data using Prometheus and Grafana. In principle, these components are not developed or maintained. For more details on these components, please step through the corresponding official documents. + +## Monitoring data + +Doris's monitoring data is exposed through the HTTP interface of Frontend and Backend. Monitoring data is presented in the form of key-value text. Each Key may also be distinguished by different Labels. When the user has built Doris, the monitoring data of the node can be accessed in the browser through the following interfaces: + +* Frontend: `fe_host:fe_http_port/metrics` +* Backend: `be_host:be_web_server_port/metrics` +* Broker: Not available for now + +Users will see the following monitoring item results (for example, FE partial monitoring items): + + ``` + # HELP jvm_heap_size_bytes jvm heap stat + # TYPE jvm_heap_size_bytes gauge + jvm_heap_size_bytes{type="max"} 41661235200 + jvm_heap_size_bytes{type="committed"} 19785285632 + jvm_heap_size_bytes{type="used"} 10113221064 + # HELP jvm_non_heap_size_bytes jvm non heap stat + # TYPE jvm_non_heap_size_bytes gauge + jvm_non_heap_size_bytes{type="committed"} 105295872 + jvm_non_heap_size_bytes{type="used"} 103184784 + # HELP jvm_young_size_bytes jvm young mem pool stat + # TYPE jvm_young_size_bytes gauge + jvm_young_size_bytes{type="used"} 6505306808 + jvm_young_size_bytes{type="peak_used"} 10308026368 + jvm_young_size_bytes{type="max"} 10308026368 + # HELP jvm_old_size_bytes jvm old mem pool stat + # TYPE jvm_old_size_bytes gauge + jvm_old_size_bytes{type="used"} 3522435544 + jvm_old_size_bytes{type="peak_used"} 6561017832 + jvm_old_size_bytes{type="max"} 30064771072 + # HELP jvm_direct_buffer_pool_size_bytes jvm direct buffer pool stat + # TYPE jvm_direct_buffer_pool_size_bytes gauge + jvm_direct_buffer_pool_size_bytes{type="count"} 91 + jvm_direct_buffer_pool_size_bytes{type="used"} 226135222 + jvm_direct_buffer_pool_size_bytes{type="capacity"} 226135221 + # HELP jvm_young_gc jvm young gc stat + # TYPE jvm_young_gc gauge + jvm_young_gc{type="count"} 2186 + jvm_young_gc{type="time"} 93650 + # HELP jvm_old_gc jvm old gc stat + # TYPE jvm_old_gc gauge + jvm_old_gc{type="count"} 21 + jvm_old_gc{type="time"} 58268 + # HELP jvm_thread jvm thread stat + # TYPE jvm_thread gauge + jvm_thread{type="count"} 767 + jvm_thread{type="peak_count"} 831 + ... + ``` + +This is a monitoring data presented in [Promethus Format] (https://prometheus.io/docs/practices/naming/). We take one of these monitoring items as an example to illustrate: + +``` +# HELP jvm_heap_size_bytes jvm heap stat +# TYPE jvm_heap_size_bytes gauge +jvm_heap_size_bytes{type="max"} 41661235200 +jvm_heap_size_bytes{type="committed"} 19785285632 +jvm_heap_size_bytes{type="used"} 10113221064 +``` + +1. Behavior commentary line at the beginning of "#". HELP is the description of the monitored item; TYPE represents the data type of the monitored item, and Gauge is the scalar data in the example. There are also Counter, Histogram and other data types. Specifically, you can see [Prometheus Official Document] (https://prometheus.io/docs/practices/instrumentation/#counter-vs.-gauge,-summary-vs.-histogram). +2. `jvm_heap_size_bytes` is the name of the monitored item (Key); `type= "max"` is a label named `type`, with a value of `max`. A monitoring item can have multiple Labels. +3. The final number, such as `41661235200`, is the monitored value. + +## Monitoring Architecture + +The entire monitoring architecture is shown in the following figure: + +![](../../../../resources/images/monitor_arch.png) + +1. The yellow part is Prometheus related components. Prometheus Server is the main process of Prometheus. At present, Prometheus accesses the monitoring interface of Doris node by Pull, and then stores the time series data in the time series database TSDB (TSDB is included in the Prometheus process, and need not be deployed separately). Prometheus also supports building [Push Gateway] (https://github.com/prometheus/pushgateway) to allow monitored data to be pushed to Push Gateway by Push by monitoring system, and then data from Push Gateway by Prometheus Server through Pull. +2. [Alert Manager] (https://github.com/prometheus/alertmanager) is a Prometheus alarm component, which needs to be deployed separately (no solution is provided yet, but can be built by referring to official documents). Through Alert Manager, users can configure alarm strategy, receive mail, short messages and other alarms. +3. The green part is Grafana related components. Grafana Server is the main process of Grafana. After startup, users can configure Grafana through Web pages, including data source settings, user settings, Dashboard drawing, etc. This is also where end users view monitoring data. + + +## Start building + +Please start building the monitoring system after you have completed the deployment of Doris. + +Prometheus + +1. Download the latest version of Proetheus on the [Prometheus Website] (https://prometheus.io/download/). Here we take version 2.3.2-linux-amd64 as an example. +2. Unzip the downloaded tar file on the machine that is ready to run the monitoring service. +3. Open the configuration file promethues.yml. Here we provide an example configuration and explain it (the configuration file is in YML format, pay attention to uniform indentation and spaces): + + Here we use the simplest way of static files to monitor configuration. Prometheus supports a variety of [service discovery] (https://prometheus.io/docs/prometheus/latest/configuration/configuration/), which can dynamically sense the addition and deletion of nodes. + + ``` + # my global config + global: + scrape_interval: 15s # Global acquisition interval, default 1 m, set to 15s + evaluation_interval: 15s # Global rule trigger interval, default 1 m, set 15s here + + # Alertmanager configuration + alerting: + alertmanagers: + - static_configs: + - targets: + # - alertmanager:9093 + + # A scrape configuration containing exactly one endpoint to scrape: + # Here it's Prometheus itself. + scrape_configs: + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: 'PALO_CLUSTER' # Each Doris cluster, we call it a job. Job can be given a name here as the name of Doris cluster in the monitoring system. + metrics_path: '/metrics' # Here you specify the restful API to get the monitors. With host: port in the following targets, Prometheus will eventually collect monitoring items through host: port/metrics_path. + static_configs: # Here we begin to configure the target addresses of FE and BE, respectively. All FE and BE are written into their respective groups. + - targets: ['fe_host1:8030', 'fe_host2:8030', 'fe_host3:8030'] + labels: + group: fe # Here configure the group of fe, which contains three Frontends + + - targets: ['be_host1:8040', 'be_host2:8040', 'be_host3:8040'] + labels: + group: be # Here configure the group of be, which contains three Backends + + - job_name: 'PALO_CLUSTER_2' # We can monitor multiple Doris clusters in a Prometheus, where we begin the configuration of another Doris cluster. Configuration is the same as above, the following is outlined. + metrics_path: '/metrics' + static_configs: + - targets: ['fe_host1:8030', 'fe_host2:8030', 'fe_host3:8030'] + labels: + group: fe + + - targets: ['be_host1:8040', 'be_host2:8040', 'be_host3:8040'] + labels: + group: be + + ``` + +4. start Promethues + + Start Promethues with the following command: + + `nohup ./prometheus --web.listen-address="0.0.0.0:8181" &` + + This command will run Prometheus in the background and specify its Web port as 8181. After startup, data is collected and stored in the data directory. + +5. stop Promethues + + At present, there is no formal way to stop the process, kill - 9 directly. Of course, Prometheus can also be set as a service to start and stop in a service way. + +6. access Prometheus + + Prometheus can be easily accessed through web pages. The page of Prometheus can be accessed by opening port 8181 through browser. Click on the navigation bar, `Status` -> `Targets`, and you can see all the monitoring host nodes of the grouped Jobs. Normally, all nodes should be `UP`, indicating that data acquisition is normal. Click on an `Endpoint` to see the current monitoring value. If the node state is not UP, you can first access Doris's metrics interface (see previous article) to check whether it is accessible, or query Prometheus related documents to try to resolve. + +7. So far, a simple Prometheus has been built and configured. For more advanced usage, see [Official Documents] (https://prometheus.io/docs/introduction/overview/) + +### Grafana + +1. Download the latest version of Grafana on [Grafana's official website] (https://grafana.com/grafana/download). Here we take version 5.2.1.linux-amd64 as an example. + +2. Unzip the downloaded tar file on the machine that is ready to run the monitoring service. + +3. Open the configuration file conf/defaults.ini. Here we only list the configuration items that need to be changed, and the other configurations can be used by default. + + ``` + # Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used) + data = data + + # Directory where grafana can store logs + logs = data/log + + # Protocol (http, https, socket) + protocal = http + + # The ip address to bind to, empty will bind to all interfaces + http_addr = + + # The http port to use + http_port = 8182 + ``` + +4. start Grafana + + Start Grafana with the following command + + `nohuo ./bin/grafana-server &` + + This command runs Grafana in the background, and the access port is 8182 configured above. + +5. stop Grafana + + At present, there is no formal way to stop the process, kill - 9 directly. Of course, you can also set Grafana as a service to start and stop as a service. + +6. access Grafana + + Through the browser, open port 8182, you can start accessing the Grafana page. The default username password is admin. + +7. Configure Grafana + + For the first landing, you need to set up the data source according to the prompt. Our data source here is Proetheus, which was configured in the previous step. + + The Setting page of the data source configuration is described as follows: + + 1. Name: Name of the data source, customized, such as doris_monitor_data_source + 2. Type: Select Prometheus + 3. URL: Fill in the web address of Prometheus, such as http://host:8181 + 4. Access: Here we choose the Server mode, which is to access Prometheus through the server where the Grafana process is located. + 5. The other options are available by default. + 6. Click `Save & Test` at the bottom. If `Data source is working`, it means that the data source is available. + 7. After confirming that the data source is available, click on the + number in the left navigation bar and start adding Dashboard. Here we have prepared Doris's dashboard template (at the beginning of this document). When the download is complete, click `New dashboard` -> `Import dashboard` -> `Upload.json File` above to import the downloaded JSON file. + 8. After importing, you can name Dashboard by default `Doris Overview`. At the same time, you need to select the data source, where you select the `doris_monitor_data_source` you created earlier. + 9. Click `Import` to complete the import. Later, you can see Doris's dashboard display. + +8. So far, a simple Grafana has been built and configured. For more advanced usage, see [Official Documents] (http://docs.grafana.org/) + + +## Dashboard + +Here we briefly introduce Doris Dashboard. The content of Dashboard may change with the upgrade of version. This document is not guaranteed to be the latest Dashboard description. + +1. Top Bar + + ![](../../../../resources/images/dashboard_navibar.png) + + * The upper left corner is the name of Dashboard. + * The upper right corner shows the current monitoring time range. You can choose different time ranges by dropping down. You can also specify a regular refresh page interval. + * Cluster name: Each job name in the Prometheus configuration file represents a Doris cluster. Select a different cluster, and the chart below shows the monitoring information for the corresponding cluster. + * fe_master: The Master Frontend node corresponding to the cluster. + * fe_instance: All Frontend nodes corresponding to the cluster. Select a different Frontend, and the chart below shows the monitoring information for the Frontend. + * be_instance: All Backend nodes corresponding to the cluster. Select a different Backend, and the chart below shows the monitoring information for the Backend. + * Interval: Some charts show rate-related monitoring items, where you can choose how much interval to sample and calculate the rate (Note: 15s interval may cause some charts to be unable to display). + +2. Row. + + ![](../../../../resources/images/dashboard_row.png) + + In Grafana, the concept of Row is a set of graphs. As shown in the figure above, Overview and Cluster Overview are two different Rows. Row can be folded by clicking Row. Currently Dashboard has the following Rows (in continuous updates): + + 1. Overview: A summary display of all Doris clusters. + 2. Cluster Overview: A summary display of selected clusters. + 3. Query Statistic: Query-related monitoring of selected clusters. + 4. FE JVM: Select Frontend's JVM monitoring. + 5. BE: A summary display of the backends of the selected cluster. + 6. BE Task: Display of Backends Task Information for Selected Clusters. + +3. Charts + + ![](../../../../resources/images/dashboard_panel.png) + + A typical icon is divided into the following parts: + + 1. Hover the I icon in the upper left corner of the mouse to see the description of the chart. + 2. Click on the illustration below to view a monitoring item separately. Click again to display all. + 3. Dragging in the chart can select the time range. + 4. The selected cluster name is displayed in [] of the title. + 5. Some values correspond to the Y-axis on the left and some to the right, which can be distinguished by the `-right` at the end of the legend. + 6. Click on the name of the chart -> `Edit` to edit the chart. + +## Dashboard Update + +1. Click on `+` in the left column of Grafana and `Dashboard`. +2. Click `New dashboard` in the upper left corner, and `Import dashboard` appears on the right. +3. Click `Upload .json File` to select the latest template file. +4. Selecting Data Sources +5. Click on `Import (Overwrite)` to complete the template update. diff --git a/docs/documentation/en/administrator-guide/operation/multi-tenant_EN.md b/docs/documentation/en/administrator-guide/operation/multi-tenant_EN.md new file mode 100644 index 00000000000000..9583c2ddc51375 --- /dev/null +++ b/docs/documentation/en/administrator-guide/operation/multi-tenant_EN.md @@ -0,0 +1,212 @@ +# Multi-tenancy(Exprimental) + +This function is experimental and is not recommended for use in production environment. + +## Background +Doris, as a PB-level online report and multi-dimensional analysis database, provides cloud-based database services through open cloud, and deploys a physical cluster for each client in the cloud. Internally, a physical cluster deploys multiple services, and separately builds clusters for services with high isolation requirements. In view of the above problems: + +- Deployment of multiple physical cluster maintenance costs a lot (upgrade, functionality on-line, bug repair). +- A user's query or a bug caused by a query often affects other users. +- In the actual production environment, only one BE process can be deployed on a single machine. Multiple BEs can better solve the problem of fat nodes. And for join, aggregation operations can provide higher concurrency. + +Together with the above three points, Doris needs a new multi-tenant scheme, which not only can achieve better resource isolation and fault isolation, but also can reduce the cost of maintenance to meet the needs of common and private clouds. + +## Design Principles + +- Easy to use +- Low Development Cost +- Convenient migration of existing clusters + +## Noun Interpretation + +- FE: Frontend, the module for metadata management or query planning in Doris. +- BE: Backend, the module used to store and query data in Doris. +- Master: A role for FE. A Doris cluster has only one Master and the other FE is Observer or Follower. +- instance: A BE process is an instance in time. +- host: a single physical machine +- Cluster: A cluster consisting of multiple instances. +- Tenant: A cluster belongs to a tenant. Cluster is a one-to-one relationship with tenants. +- database: A user-created database + +## Main Ideas + +- Deploy instances of multiple BEs on a host to isolate resources at the process level. +- Multiple instances form a cluster, and a cluster is assigned to a business-independent tenant. +- FE adds cluster level and is responsible for cluster management. +- CPU, IO, memory and other resources are segregated by cgroup. + +## Design scheme + +In order to achieve isolation, the concept of **virtual cluster** is introduced. + +1. Cluster represents a virtual cluster consisting of instances of multiple BEs. Multiple clusters share FE. +2. Multiple instances can be started on a host. When a cluster is created, an arbitrary number of instances are selected to form a cluster. +3. While creating a cluster, an account named superuser is created, which belongs to the cluster. Super user can manage clusters, create databases, assign privileges, and so on. +4. After Doris starts, the sink creates a default cluster: default_cluster. If the user does not want to use the function of multi-cluster, the default cluster is provided and other operational details of multi-cluster are hidden. + +The concrete structure is as follows: +![](../../../../resources/images/multi_tenant_arch.png) + +## SQL interface + +- Login + + Default cluster login name: user_name@default_cluster or user_name + + Custom cluster login name: user_name@cluster_name + + `mysqlclient -h host -P port -u user_name@cluster_name -p password` + +- Add, delete, decommission and cancel BE + + `ALTER SYSTEM ADD BACKEND "host:port"` + `ALTER SYSTEM DROP BACKEND "host:port"` + `ALTER SYSTEM DECOMMISSION BACKEND "host:port"` + `CANCEL DECOMMISSION BACKEND "host:port"` + + It is strongly recommended to use DECOMMISSION instead of DROP to delete BACKEND. The DECOMMISSION operation will first need to copy data from the offline node to other instances in the cluster. After that, they will be offline. + +- Create a cluster and specify the password for the superuser account + + `CREATE CLUSTER cluster_name PROPERTIES ("instance_num" = "10") identified by "password"` + +- Enter a cluster + + `ENTER cluster name` + +- Cluster Expansion and Shrinkage + + `ALTER CLUSTER cluster_name PROPERTIES ("instance_num" = "10")` + + When the number of instances specified is more than the number of existing be in cluster, it is expanded and if less than it is condensed. + +- Link, migrate DB + + `LINK DATABASE src_cluster_name.db_name dest_cluster_name.db_name` + + Soft-chain dB of one cluster to dB of another cluster can be used by users who need temporary access to dB of other clusters but do not need actual data migration. + + `MIGRATE DATABASE src_cluster_name.db_name dest_cluster_name.db_name` + + If you need to migrate dB across clusters, after linking, migrate the actual migration of data. + + Migration does not affect the query, import and other operations of the current two dbs. This is an asynchronous operation. You can see the progress of migration through `SHOW MIGRATIONS`. + +- Delete clusters + + `DROP CLUSTER cluster_name` + + Deleting a cluster requires that all databases in the cluster be deleted manually first. + +- Others + + `SHOW CLUSTERS` + + Show clusters that have been created in the system. Only root users have this permission. + + `SHOW BACKENDS` + + View the BE instance in the cluster. + + `SHOW MIGRATIONS` + + Show current DB migration tasks. After the migration of DB is completed, you can view the progress of the migration through this command. + +## Detailed design + +1. Namespace isolation + + In order to introduce multi-tenant, the namespaces between clusters in the system need to be isolated. + + Doris's existing metadata is image + Journal (metadata is designed in related documents). Doris records operations involving metadata as a journal, and then regularly writes images in the form of **Fig. 1** and reads them in the order in which they are written when loaded. But this brings a problem that the format that has been written is not easy to modify. For example, the metadata format for recording data distribution is: database + table + tablet + replica nesting. If we want to isolate the namespace between clusters in the past way, we need to add a layer of cluster on the database and the level of internal metadata. Change to cluster + database + table + tablet + replica, as shown in **Figure 2**. But the problems brought about by adding one layer are as follows: + + - The change of metadata brought by adding one layer is incompatible. It needs to be written in cluster+db+table+tablet+replica level in the way of Figure 2. This changes the way of metadata organization in the past. The upgrading of the old version will be more troublesome. The ideal way is to write cluster in the order of Figure 3 in the form of existing metadata. Metadata. + + - All the DB and user used in the code need to add a layer of cluster. There are many workload changes and deep levels. Most of the code acquires db. The existing functions almost need to be changed, and a layer of cluster locks need to be nested on the basis of DB locks. + + ![](../../../../resources/images/palo_meta.png) + + To sum up, we adopt a prefix to DB and user names to isolate the internal problems caused by the conflict of DB and user names between clusters. + + As follows, all SQL input involves db name and user name, and all SQL input needs to spell the full name of DB and user according to their cluster. + + ![](../../../../resources/images/cluster_namaspace.png) + + In this way, the above two problems no longer exist. Metadata is also organized in a relatively simple way. That is to say, use ** Figure 3 ** to record db, user and nodes belonging to their own cluster. + +2. BE 节点管理 + + Each cluster has its own set of instances, which can be viewed through `SHOW BACKENDS`. In order to distinguish which cluster the instance belongs to and how it is used, BE introduces several states: + + - Free: When a BE node is added to the system, be is idle when it does not belong to any cluster. + - Use: When creating a cluster or expanding capacity is selected into a cluster, it is in use. + - Cluster decommission: If a shrinkage is performed, the be that is executing the shrinkage is in this state. After that, the be state becomes free. + - System decommission: be is offline. When the offline is completed, the be will be permanently deleted. + + Only root users can check whether all be in the cluster is used through the cluster item in `SHOW PROC "/backends"`. To be free is to be idle, otherwise to be in use. `SHOW BACKENDS `can only see the nodes in the cluster. The following is a schematic diagram of the state changes of be nodes. + + ![](../../../../resources/images/backend_state.png) + +3. Creating Clusters + + Only root users can create a cluster and specify any number of BE instances. + + Supports selecting multiple instances on the same machine. The general principle of selecting instance is to select be on different machines as much as possible and to make the number of be used on all machines as uniform as possible. + + For use, each user and DB belongs to a cluster (except root). To create user and db, you first need to enter a cluster. When a cluster is created, the system defaults to the manager of the cluster, the superuser account. Supuser has the right to create db, user, and view the number of be nodes in the cluster to which it belongs. All non-root user logins must specify a cluster, namely `user_name@cluster_name`. + + Only root users can view all clusters in the system through `SHOW CLUSTER', and can enter different clusters through @ different cluster names. User clusters are invisible except root. + + In order to be compatible with the old version of Doris, a cluster named default_cluster was built in, which could not be used when creating the cluster. + + ![](../../../../resources/images/user_authority.png) + +4. Cluster Expansion + + The process of cluster expansion is the same as that of cluster creation. BE instance on hosts that are not outside the cluster is preferred. The selected principles are the same as creating clusters. + +5. 集群缩容、CLUSTER DECOMMISSION + + Users can scale clusters by setting instance num of clusters. + + Cluster shrinkage takes precedence over downlining instances on hosts with the largest number of BE instances. + + Users can also directly use `ALTER CLUSTER DECOMMISSION BACKEND` to specify BE for cluster scaling. + +![](../../../../resources/images/replica_recover.png) + +6. Create table + + To ensure high availability, each fragmented copy must be on a different machine. So when building a table, the strategy of choosing the be where the replica is located is to randomly select a be on each host. Then, the number of be copies needed is randomly selected from these be. On the whole, it can distribute patches evenly on each machine. + + Therefore, adding a fragment that needs to create a 3-copy fragment, even if the cluster contains three or more instances, but only two or less hosts, still cannot create the fragment. + +7. Load Balancing + + The granularity of load balancing is cluster level, and there is no load balancing between clusters. However, the computing load is carried out at the host level, and there may be BE instances of different clusters on a host. In the cluster, the load is calculated by the number of fragments on each host and the utilization of storage, and then the fragments on the machine with high load are copied to the machine with low load (see the load balancing documents for details). + +8. LINK DATABASE (Soft Chain) + + Multiple clusters can access each other's data through a soft chain. The link level is dB for different clusters. + + DB in other clusters is accessed by adding DB information of other clusters that need to be accessed in one cluster. + + When querying the linked db, the computing and storage resources used are those of the cluster where the source DB is located. + + DB that is soft-chained cannot be deleted in the source cluster. Only when the linked DB is deleted can the source DB be deleted. Deleting link DB will not delete source db. + +9. MIGRATE DATABASE + + DB can be physically migrated between clusters. + + To migrate db, you must first link db. After migration, the data will migrate to the cluster where the linked DB is located, and after migration, the source DB will be deleted and the link will be disconnected. + + Data migration reuses the process of replicating data in load balancing and replica recovery (see load balancing related documents for details). Specifically, after executing the `MIRAGTE` command, Doris will modify the cluster of all copies of the source DB to the destination cluster in the metadata. + + Doris regularly checks whether machines in the cluster are balanced, replicas are complete, and redundant replicas are available. The migration of DB borrows this process, checking whether the be where the replica is located belongs to the cluster while checking the replica is complete, and if not, it is recorded in the replica to be restored. And when the duplicate is redundant to be deleted, it will first delete the duplicate outside the cluster, and then choose according to the existing strategy: the duplicate of the downtime be -> the duplicate of clone -> the duplicate of the backward version - > the duplicate on the host with high load, until the duplicate is not redundant. + +![](../../../../resources/images/cluster_link_and_migrate_db.png) + +10. BE process isolation + + In order to isolate the actual cpu, IO and memory between be processes, we need to rely on the deployment of be. When deploying, you need to configure the CGroup on the periphery and write all the processes of be to be deployed to the cgroup. If the physical isolation of IO between the data storage paths of each be configuration requires different disks, there is no much introduction here. diff --git a/docs/documentation/en/administrator-guide/operation/tablet-meta-tool_EN.md b/docs/documentation/en/administrator-guide/operation/tablet-meta-tool_EN.md new file mode 100644 index 00000000000000..a82de78072ab03 --- /dev/null +++ b/docs/documentation/en/administrator-guide/operation/tablet-meta-tool_EN.md @@ -0,0 +1,86 @@ +# Tablet metadata management tool + +## Background + +In the latest version of the code, we introduced RocksDB in BE to store meta-information of tablet, in order to solve various functional and performance problems caused by storing meta-information through header file. Currently, each data directory (root path) has a corresponding RocksDB instance, in which all tablets on the corresponding root path are stored in the key-value manner. + +To facilitate the maintenance of these metadata, we provide an online HTTP interface and an offline meta tool to complete related management operations. + +The HTTP interface is only used to view tablet metadata online, and can be used when the BE process is running. + +However, meta tool is only used for off-line metadata management operations. BE must be stopped before it can be used. + +The meta tool tool is stored in the Lib / directory of BE. + +## Operation + +### View Tablet Meta + +Viewing Tablet Meta information can be divided into online and offline methods + +#### On-line + +Access BE's HTTP interface to obtain the corresponding Tablet Meta information: + +api: + +`http://{host}:{port}/api/meta/header/{tablet_id}/{schema_hash}` + + +> Host: be Hostname +> +> port: BE's HTTP port +> +> tablet id: tablet id +> +> schema hash: tablet schema hash + +Give an example: + +`http://be_host:8040/api/meta/header/14156/2458238340` + +If the final query is successful, the Tablet Meta will be returned as json. + +#### Offline + +Get Tablet Meta on a disk based on the meta\ tool tool. + +Order: + +``` +./lib/meta_tool --root_path=/path/to/root_path --operation=get_header --tablet_id=xxx --schema_hash=xxx +``` + +> root_path: The corresponding root_path path path configured in be.conf. + +The result is also a presentation of Tablet Meta in JSON format. + +### Load header + +The function of loading header is provided to realize manual migration of tablet. This function is based on Tablet Meta in JSON format, so if changes in the shard field and version information are involved, they can be changed directly in the JSON content of Tablet Meta. Then use the following commands to load. + +Order: + +``` +./lib/meta_tool --operation=load_header --root_path=/path/to/root_path --json_header_path=path +``` + +### Delete header + +In order to realize the function of deleting a tablet from a disk of a be. + +Order: + +``` +./lib/meta_tool --operation=delete_header --root_path=/path/to/root_path --tablet_id=xxx --schema_hash=xxx` +``` + +### TabletMeta in Pb format + +This command is to view the old file-based management PB format Tablet Meta, and to display Tablet Meta in JSON format. + +Order: + +``` +./lib/meta_tool --operation=show_header --root_path=/path/to/root_path --pb_header_path=path +``` diff --git a/docs/documentation/en/administrator-guide/operation/tablet-repair-and-balance_EN.md b/docs/documentation/en/administrator-guide/operation/tablet-repair-and-balance_EN.md new file mode 100644 index 00000000000000..804f0c006f2e8e --- /dev/null +++ b/docs/documentation/en/administrator-guide/operation/tablet-repair-and-balance_EN.md @@ -0,0 +1,641 @@ +# Data replica management + +Beginning with version 0.9.0, Doris introduced an optimized replica management strategy and supported a richer replica status viewing tool. This document focuses on Doris data replica balancing, repair scheduling strategies, and replica management operations and maintenance methods. Help users to more easily master and manage the replica status in the cluster. + +> Repairing and balancing copies of tables with Collocation attributes can be referred to `docs/documentation/cn/administrator-guide/colocation-join.md'.` + +## Noun Interpretation + +1. Tablet: The logical fragmentation of a Doris table, where a table has multiple fragmentations. +2. Replica: A sliced copy, defaulting to three copies of a slice. +3. Healthy Replica: A healthy copy that survives at Backend and has a complete version. +4. Tablet Checker (TC): A resident background thread that scans all Tablets regularly, checks the status of these Tablets, and decides whether to send them to Tablet Scheduler based on the results. +5. Tablet Scheduler (TS): A resident background thread that handles Tablets sent by Tablet Checker that need to be repaired. At the same time, cluster replica balancing will be carried out. +6. Tablet SchedCtx (TSC): is a tablet encapsulation. When TC chooses a tablet, it encapsulates it as a TSC and sends it to TS. +7. Storage Medium: Storage medium. Doris supports specifying different storage media for partition granularity, including SSD and HDD. The replica scheduling strategy is also scheduled for different storage media. + +``` + + +--------+ +-----------+ + | Meta | | Backends | + +---^----+ +------^----+ + | | | 3. Send clone tasks + 1. Check tablets | | | + +--------v------+ +-----------------+ + | TabletChecker +--------> TabletScheduler | + +---------------+ +-----------------+ + 2. Waiting to be scheduled + + +``` +The figure above is a simplified workflow. + + +## Duplicate status + +Multiple copies of a Tablet may cause state inconsistencies due to certain circumstances. Doris will attempt to automatically fix the inconsistent copies of these states so that the cluster can recover from the wrong state as soon as possible. + +**The health status of a Replica is as follows:** + +1. BAD + + That is, the copy is damaged. Includes, but is not limited to, the irrecoverable damaged status of copies caused by disk failures, BUGs, etc. + +2. VERSION\_MISSING + + Version missing. Each batch of imports in Doris corresponds to a data version. A copy of the data consists of several consecutive versions. However, due to import errors, delays and other reasons, the data version of some copies may be incomplete. + +3. HEALTHY + + Health copy. That is, a copy of the normal data, and the BE node where the copy is located is in a normal state (heartbeat is normal and not in the offline process). + +**The health status of a Tablet is determined by the status of all its copies. There are the following categories:** + +1. REPLICA\_MISSING + + The copy is missing. That is, the number of surviving copies is less than the expected number of copies. + +2. VERSION\_INCOMPLETE + + The number of surviving copies is greater than or equal to the number of expected copies, but the number of healthy copies is less than the number of expected copies. + +3. REPLICA\_RELOCATING + + Have a full number of live copies of the replication num version, but the BE nodes where some copies are located are in unavailable state (such as decommission) + +4. REPLICA\_MISSING\_IN\_CLUSTER + + When using multi-cluster, the number of healthy replicas is greater than or equal to the expected number of replicas, but the number of replicas in the corresponding cluster is less than the expected number of replicas. + +5. REDUNDANT + + Duplicate redundancy. Healthy replicas are in the corresponding cluster, but the number of replicas is larger than the expected number. Or there's a spare copy of unavailable. + +6. FORCE\_REDUNDANT + + This is a special state. It only occurs when the number of expected replicas is greater than or equal to the number of available nodes, and when the Tablet is in the state of replica missing. In this case, you need to delete a copy first to ensure that there are available nodes for creating a new copy. + +7. COLOCATE\_MISMATCH + + Fragmentation status of tables for Collocation attributes. Represents that the distribution of fragmented copies is inconsistent with the specified distribution of Colocation Group. + +8. COLOCATE\_REDUNDANT + + Fragmentation status of tables for Collocation attributes. Represents the fragmented copy redundancy of the Colocation table. + +8. HEALTHY + + Healthy fragmentation, that is, conditions [1-5] are not satisfied. + +## Replica Repair + +As a resident background process, Tablet Checker regularly checks the status of all fragments. For unhealthy fragmentation, it will be sent to Tablet Scheduler for scheduling and repair. The actual operation of repair is accomplished by clone task on BE. FE is only responsible for generating these clone tasks. + +> Note 1: The main idea of replica repair is to make the number of fragmented replicas reach the desired value by creating or completing them first. Then delete the redundant copy. +> +> Note 2: A clone task is to complete the process of copying specified data from a specified remote end to a specified destination. + +For different states, we adopt different repair methods: + +1. REPLICA\_MISSING/REPLICA\_RELOCATING + + Select a low-load, available BE node as the destination. Choose a healthy copy as the source. Clone tasks copy a complete copy from the source to the destination. For replica completion, we will directly select an available BE node, regardless of the storage medium. + +2. VERSION\_INCOMPLETE + + Select a relatively complete copy as the destination. Choose a healthy copy as the source. The clone task attempts to copy the missing version from the source to the destination. + +3. REPLICA\_MISSING\_IN\_CLUSTER + + This state processing method is the same as REPLICAMISSING. + +4. REDUNDANT + + Usually, after repair, there will be redundant copies in fragmentation. We select a redundant copy to delete it. The selection of redundant copies follows the following priorities: + 1. The BE where the copy is located has been offline. + 2. The copy is damaged + 3. The copy is lost in BE or offline + 4. The replica is in the CLONE state (which is an intermediate state during clone task execution) + 5. The copy has version missing + 6. The cluster where the copy is located is incorrect + 7. The BE node where the replica is located has a high load + +5. FORCE\_REDUNDANT + + Unlike REDUNDANT, because at this point Tablet has a copy missing, because there are no additional available nodes for creating new copies. So at this point, a copy must be deleted to free up a available node for creating a new copy. + The order of deleting copies is the same as REDUNDANT. + +6. COLOCATE\_MISMATCH + + Select one of the replica distribution BE nodes specified in Colocation Group as the destination node for replica completion. + +7. COLOCATE\_REDUNDANT + + Delete a copy on a BE node that is distributed by a copy specified in a non-Colocation Group. + + Doris does not deploy a copy of the same Tablet on a different BE of the same host when selecting a replica node. It ensures that even if all BEs on the same host are deactivated, all copies will not be lost. + +### Scheduling priority + +Waiting for the scheduled fragments in Tablet Scheduler gives different priorities depending on the status. High priority fragments will be scheduled first. There are currently several priorities. + +1. VERY\_HIGH + + * REDUNDANT. For slices with duplicate redundancy, we give priority to them. Logically, duplicate redundancy is the least urgent, but because it is the fastest to handle and can quickly release resources (such as disk space, etc.), we give priority to it. + * FORCE\_REDUNDANT. Ditto. + +2. HIGH + + * REPLICA\_MISSING and most copies are missing (for example, 2 copies are missing in 3 copies) + * VERSION\_INCOMPLETE and most copies are missing + * COLOCATE\_MISMATCH We hope that the fragmentation related to the Collocation table can be repaired as soon as possible. + * COLOCATE\_REDUNDANT + +3. NORMAL + + * REPLICA\_MISSING, but most survive (for example, three copies lost one) + * VERSION\_INCOMPLETE, but most copies are complete + * REPLICA\_RELOCATING and relocate is required for most replicas (e.g. 3 replicas with 2 replicas) + +4. LOW + + * REPLICA\_MISSING\_IN\_CLUSTER + * REPLICA\_RELOCATING most copies stable + +### Manual priority + +The system will automatically determine the scheduling priority. Sometimes, however, users want the fragmentation of some tables or partitions to be repaired faster. So we provide a command that the user can specify that a slice of a table or partition is repaired first: + +`ADMIN REPAIR TABLE tbl [PARTITION (p1, p2, ...)];` + +This command tells TC to give VERY HIGH priority to the problematic tables or partitions that need to be repaired first when scanning Tablets. + +> Note: This command is only a hint, which does not guarantee that the repair will be successful, and the priority will change with the scheduling of TS. And when Master FE switches or restarts, this information will be lost. + +Priority can be cancelled by the following commands: + +`ADMIN CANCEL REPAIR TABLE tbl [PARTITION (p1, p2, ...)];` + +### Priority scheduling + +Priority ensures that severely damaged fragments can be repaired first, and improves system availability. But if the high priority repair task fails all the time, the low priority task will never be scheduled. Therefore, we will dynamically adjust the priority of tasks according to the running status of tasks, so as to ensure that all tasks have the opportunity to be scheduled. + +* If the scheduling fails for five consecutive times (e.g., no resources can be obtained, no suitable source or destination can be found, etc.), the priority will be lowered. +* If not scheduled for 30 minutes, priority will be raised. +* The priority of the same tablet task is adjusted at least five minutes apart. + +At the same time, in order to ensure the weight of the initial priority, we stipulate that the initial priority is VERY HIGH, and the lowest is lowered to NORMAL. When the initial priority is LOW, it is raised to HIGH at most. The priority adjustment here also adjusts the priority set manually by the user. + +## Duplicate Equilibrium + +Doris automatically balances replicas within the cluster. The main idea of balancing is to create a replica of some fragments on low-load nodes, and then delete the replicas of these fragments on high-load nodes. At the same time, because of the existence of different storage media, there may or may not exist one or two storage media on different BE nodes in the same cluster. We require that fragments of storage medium A be stored in storage medium A as far as possible after equalization. So we divide the BE nodes of the cluster according to the storage medium. Then load balancing scheduling is carried out for different BE node sets of storage media. + +Similarly, replica balancing ensures that a copy of the same table will not be deployed on the BE of the same host. + +### BE Node Load + +We use Cluster LoadStatistics (CLS) to represent the load balancing of each backend in a cluster. Tablet Scheduler triggers cluster equilibrium based on this statistic. We currently calculate a load Score for each BE as the BE load score by using **disk usage** and **number of copies**. The higher the score, the heavier the load on the BE. + +Disk usage and number of copies have a weight factor, which is **capacityCoefficient** and **replicaNumCoefficient**, respectively. The sum of them is **constant to 1**. Among them, capacityCoefficient will dynamically adjust according to actual disk utilization. When the overall disk utilization of a BE is below 50%, the capacityCoefficient value is 0.5, and if the disk utilization is above 75% (configurable through the FE configuration item `capacity_used_percent_high_water`), the value is 1. If the utilization rate is between 50% and 75%, the weight coefficient increases smoothly. The formula is as follows: + +`capacityCoefficient = 2 * Disk Utilization - 0.5` + +The weight coefficient ensures that when disk utilization is too high, the backend load score will be higher to ensure that the BE load is reduced as soon as possible. + +Tablet Scheduler updates CLS every 1 minute. + +### Equilibrium strategy + +Tablet Scheduler uses Load Balancer to select a certain number of healthy fragments as candidate fragments for balance in each round of scheduling. In the next scheduling, balanced scheduling will be attempted based on these candidate fragments. + +## Resource control + +Both replica repair and balancing are accomplished by replica copies between BEs. If the same BE performs too many tasks at the same time, it will bring a lot of IO pressure. Therefore, Doris controls the number of tasks that can be performed on each node during scheduling. The smallest resource control unit is the disk (that is, a data path specified in be.conf). By default, we configure two slots per disk for replica repair. A clone task occupies one slot at the source and one slot at the destination. If the number of slots is zero, no more tasks will be assigned to this disk. The number of slots can be configured by FE's `schedule_slot_num_per_path` parameter. + +In addition, by default, we provide two separate slots per disk for balancing tasks. The purpose is to prevent high-load nodes from losing space by balancing because slots are occupied by repair tasks. + +## Duplicate Status View + +Duplicate status view mainly looks at the status of the duplicate, as well as the status of the duplicate repair and balancing tasks. Most of these states **exist only in** Master FE nodes. Therefore, the following commands need to be executed directly to Master FE. + +### Duplicate status + +1. Global state checking + + Through `SHOW PROC'/ statistic'; `commands can view the replica status of the entire cluster. + + ``` + +----------+-----------------------------+----------+--------------+----------+-----------+------------+--------------------+-----------------------+ + | DbId | DbName | TableNum | PartitionNum | IndexNum | TabletNum | ReplicaNum | UnhealthyTabletNum | InconsistentTabletNum | + +----------+-----------------------------+----------+--------------+----------+-----------+------------+--------------------+-----------------------+ + | 35153636 | default_cluster:DF_Newrisk | 3 | 3 | 3 | 96 | 288 | 0 | 0 | + | 48297972 | default_cluster:PaperData | 0 | 0 | 0 | 0 | 0 | 0 | 0 | + | 5909381 | default_cluster:UM_TEST | 7 | 7 | 10 | 320 | 960 | 1 | 0 | + | Total | 240 | 10 | 10 | 13 | 416 | 1248 | 1 | 0 | + +----------+-----------------------------+----------+--------------+----------+-----------+------------+--------------------+-----------------------+ + ``` + + The `UnhealthyTabletNum` column shows how many Tablets are in an unhealthy state in the corresponding database. `The Inconsistent Tablet Num` column shows how many Tablets are in an inconsistent replica state in the corresponding database. The last `Total` line counts the entire cluster. Normally `Unhealth Tablet Num` and `Inconsistent Tablet Num` should be 0. If it's not zero, you can further see which Tablets are there. As shown in the figure above, one table in the UM_TEST database is not healthy, you can use the following command to see which one is. + + `SHOW PROC '/statistic/5909381';` + + Among them `5909381'is the corresponding DbId. + + ``` + +------------------+---------------------+ + | UnhealthyTablets | InconsistentTablets | + +------------------+---------------------+ + | [40467980] | [] | + +------------------+---------------------+ + ``` + + The figure above shows the specific unhealthy Tablet ID (40467980). Later we'll show you how to view the status of each copy of a specific Tablet. + +2. Table (partition) level status checking + + Users can view the status of a copy of a specified table or partition through the following commands and filter the status through a WHERE statement. If you look at table tbl1, the state on partitions P1 and P2 is a copy of NORMAL: + + `ADMIN SHOW REPLICA STATUS FROM tbl1 PARTITION (p1, p2) WHERE STATUS = "NORMAL";` + + ``` + +----------+-----------+-----------+---------+-------------------+--------------------+------------------+------------+------------+-------+--------+--------+ + | TabletId | ReplicaId | BackendId | Version | LastFailedVersion | LastSuccessVersion | CommittedVersion | SchemaHash | VersionNum | IsBad | State | Status | + +----------+-----------+-----------+---------+-------------------+--------------------+------------------+------------+------------+-------+--------+--------+ + | 29502429 | 29502432 | 10006 | 2 | -1 | 2 | 1 | -1 | 2 | false | NORMAL | OK | + | 29502429 | 36885996 | 10002 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK | + | 29502429 | 48100551 | 10007 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK | + | 29502433 | 29502434 | 10001 | 2 | -1 | 2 | 1 | -1 | 2 | false | NORMAL | OK | + | 29502433 | 44900737 | 10004 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK | + | 29502433 | 48369135 | 10006 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK | + +----------+-----------+-----------+---------+-------------------+--------------------+------------------+------------+------------+-------+--------+--------+ + ``` + + The status of all copies is shown here. Where `IsBad` is listed as `true`, the copy is damaged. The `Status` column displays other states. Specific status description, you can see help through `HELP ADMIN SHOW REPLICA STATUS`. + + ` The ADMIN SHOW REPLICA STATUS `command is mainly used to view the health status of copies. Users can also view additional information about copies of a specified table by using the following commands: + + `SHOW TABLET FROM tbl1;` + + ``` + +----------+-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------- -+--------------+----------------------+ + | TabletId | ReplicaId | BackendId | Version | VersionHash | LstSuccessVersion | LstSuccessVersionHash | LstFailedVersion | LstFailedVersionHash | LstFailedTime | DataSize | RowCount | State | LstConsistencyCheckTime | CheckVersion | CheckVersionHash | VersionCount | PathHash | + +----------+-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------- -+--------------+----------------------+ + | 29502429 | 29502432 | 10006 | 2 | 0 | 2 | 0 | -1 | 0 | N/A | 784 | 0 | NORMAL | N/A | -1 | -1 | 2 | -5822326203532286804 | + | 29502429 | 36885996 | 10002 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | 784 | 0 | NORMAL | N/A | -1 | -1 | 2 | -1441285706148429853 | + | 29502429 | 48100551 | 10007 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | 784 | 0 | NORMAL | N/A | -1 | -1 | 2 | -4784691547051455525 | + +----------+-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------- -+--------------+----------------------+ + ``` + + The figure above shows some additional information, including copy size, number of rows, number of versions, where the data path is located. + + > Note: The contents of the `State'column shown here do not represent the health status of the replica, but the status of the replica under certain tasks, such as CLONE, SCHEMA CHANGE, ROLLUP, etc. + + In addition, users can check the distribution of replicas in a specified table or partition by following commands. + + `ADMIN SHOW REPLICA DISTRIBUTION FROM tbl1;` + + ``` + +-----------+------------+-------+---------+ + | BackendId | ReplicaNum | Graph | Percent | + +-----------+------------+-------+---------+ + | 10000 | 7 | | 7.29 % | + | 10001 | 9 | | 9.38 % | + | 10002 | 7 | | 7.29 % | + | 10003 | 7 | | 7.29 % | + | 10004 | 9 | | 9.38 % | + | 10005 | 11 | > | 11.46 % | + | 10006 | 18 | > | 18.75 % | + | 10007 | 15 | > | 15.62 % | + | 10008 | 13 | > | 13.54 % | + +-----------+------------+-------+---------+ + ``` + + Here we show the number and percentage of replicas of table tbl1 on each BE node, as well as a simple graphical display. + +4. Tablet level status checking + + When we want to locate a specific Tablet, we can use the following command to view the status of a specific Tablet. For example, check the tablet with ID 2950253: + + `SHOW TABLET 29502553;` + + ``` + +------------------------+-----------+---------------+-----------+----------+----------+-------------+----------+--------+---------------------------------------------------------------------------+ + | DbName | TableName | PartitionName | IndexName | DbId | TableId | PartitionId | IndexId | IsSync | DetailCmd | + +------------------------+-----------+---------------+-----------+----------+----------+-------------+----------+--------+---------------------------------------------------------------------------+ + | default_cluster:test | test | test | test | 29502391 | 29502428 | 29502427 | 29502428 | true | SHOW PROC '/dbs/29502391/29502428/partitions/29502427/29502428/29502553'; | + +------------------------+-----------+---------------+-----------+----------+----------+-------------+----------+--------+---------------------------------------------------------------------------+ + ``` + + The figure above shows the database, tables, partitions, roll-up tables and other information corresponding to this tablet. The user can copy the command in the `DetailCmd` command to continue executing: + + `Show Proc'/DBS/29502391/29502428/Partitions/29502427/29502428/29502553;` + + ``` + +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+ + | ReplicaId | BackendId | Version | VersionHash | LstSuccessVersion | LstSuccessVersionHash | LstFailedVersion | LstFailedVersionHash | LstFailedTime | SchemaHash | DataSize | RowCount | State | IsBad | VersionCount | PathHash | + +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+ + | 43734060 | 10004 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | -1 | 784 | 0 | NORMAL | false | 2 | -8566523878520798656 | + | 29502555 | 10002 | 2 | 0 | 2 | 0 | -1 | 0 | N/A | -1 | 784 | 0 | NORMAL | false | 2 | 1885826196444191611 | + | 39279319 | 10007 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | -1 | 784 | 0 | NORMAL | false | 2 | 1656508631294397870 | + +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+ + ``` + + The figure above shows all replicas of the corresponding Tablet. The content shown here is the same as `SHOW TABLET FROM tbl1;`. But here you can clearly see the status of all copies of a specific Tablet. + +### Duplicate Scheduling Task + +1. View tasks waiting to be scheduled + + `SHOW PROC '/cluster_balance/pending_tablets';` + + ``` + +----------+--------+-----------------+---------+----------+----------+-------+---------+--------+----------+---------+---------------------+---------------------+---------------------+----------+------+-------------+---------------+---------------------+------------+---------------------+--------+---------------------+-------------------------------+ + | TabletId | Type | Status | State | OrigPrio | DynmPrio | SrcBe | SrcPath | DestBe | DestPath | Timeout | Create | LstSched | LstVisit | Finished | Rate | FailedSched | FailedRunning | LstAdjPrio | VisibleVer | VisibleVerHash | CmtVer | CmtVerHash | ErrMsg | + +----------+--------+-----------------+---------+----------+----------+-------+---------+--------+----------+---------+---------------------+---------------------+---------------------+----------+------+-------------+---------------+---------------------+------------+---------------------+--------+---------------------+-------------------------------+ + | 4203036 | REPAIR | REPLICA_MISSING | PENDING | HIGH | LOW | -1 | -1 | -1 | -1 | 0 | 2019-02-21 15:00:20 | 2019-02-24 11:18:41 | 2019-02-24 11:18:41 | N/A | N/A | 2 | 0 | 2019-02-21 15:00:43 | 1 | 0 | 2 | 0 | unable to find source replica | + +----------+--------+-----------------+---------+----------+----------+-------+---------+--------+----------+---------+---------------------+---------------------+---------------------+----------+------+-------------+---------------+---------------------+------------+---------------------+--------+---------------------+-------------------------------+ + ``` + + The specific meanings of each column are as follows: + + * TabletId: The ID of the Tablet waiting to be scheduled. A scheduling task is for only one Tablet + * Type: Task type, which can be REPAIR (repair) or BALANCE (balance) + * Status: The current status of the Tablet, such as REPLICAMISSING (copy missing) + * State: The status of the scheduling task may be PENDING/RUNNING/FINISHED/CANCELLED/TIMEOUT/UNEXPECTED + * OrigPrio: Initial Priority + * DynmPrio: Current dynamically adjusted priority + * SrcBe: ID of the BE node at the source end + * SrcPath: hash value of the path of the BE node at the source end + * DestBe: ID of destination BE node + * DestPath: hash value of the path of the destination BE node + * Timeout: When the task is scheduled successfully, the timeout time of the task is displayed here in units of seconds. + * Create: The time when the task was created + * LstSched: The last time a task was scheduled + * LstVisit: The last time a task was accessed. Here "accessed" refers to the processing time points associated with the task, including scheduling, task execution reporting, and so on. + * Finished: Task End Time + * Rate: Clone Task Data Copy Rate + * Failed Sched: Number of Task Scheduling Failures + * Failed Running: Number of task execution failures + * LstAdjPrio: Time of last priority adjustment + * CmtVer/CmtVerHash/VisibleVer/VisibleVerHash: version information for clone tasks + * ErrMsg: Error messages that occur when tasks are scheduled and run + +2. View running tasks + + `SHOW PROC '/cluster_balance/running_tablets';` + + The columns in the result have the same meaning as `pending_tablets`. + +3. View completed tasks + + `SHOW PROC '/cluster_balance/history_tablets';` + + By default, we reserve only the last 1,000 completed tasks. The columns in the result have the same meaning as `pending_tablets`. If `State` is listed as `FINISHED`, the task is normally completed. For others, you can see the specific reason based on the error information in the `ErrMsg` column. + +## Viewing Cluster Load and Scheduling Resources + +1. Cluster load + + You can view the current load of the cluster by following commands: + + `SHOW PROC '/cluster_balance/cluster_load_stat';` + + First of all, we can see the division of different storage media: + + ``` + +---------------+ + | StorageMedium | + +---------------+ + | HDD | + | SSD | + +---------------+ + ``` + + Click on a storage medium to see the equilibrium state of the BE node that contains the storage medium: + + `SHOW PROC '/cluster_balance/cluster_load_stat/HDD';` + + ``` + +----------+-----------------+-----------+---------------+----------------+-------------+------------+----------+-----------+--------------------+-------+ + | BeId | Cluster | Available | UsedCapacity | Capacity | UsedPercent | ReplicaNum | CapCoeff | ReplCoeff | Score | Class | + +----------+-----------------+-----------+---------------+----------------+-------------+------------+----------+-----------+--------------------+-------+ + | 10003 | default_cluster | true | 3477875259079 | 19377459077121 | 17.948 | 493477 | 0.5 | 0.5 | 0.9284678149967587 | MID | + | 10002 | default_cluster | true | 3607326225443 | 19377459077121 | 18.616 | 496928 | 0.5 | 0.5 | 0.948660871419998 | MID | + | 10005 | default_cluster | true | 3523518578241 | 19377459077121 | 18.184 | 545331 | 0.5 | 0.5 | 0.9843539990641831 | MID | + | 10001 | default_cluster | true | 3535547090016 | 19377459077121 | 18.246 | 558067 | 0.5 | 0.5 | 0.9981869446537612 | MID | + | 10006 | default_cluster | true | 3636050364835 | 19377459077121 | 18.764 | 547543 | 0.5 | 0.5 | 1.0011489897614072 | MID | + | 10004 | default_cluster | true | 3506558163744 | 15501967261697 | 22.620 | 468957 | 0.5 | 0.5 | 1.0228319835582569 | MID | + | 10007 | default_cluster | true | 4036460478905 | 19377459077121 | 20.831 | 551645 | 0.5 | 0.5 | 1.057279369420761 | MID | + | 10000 | default_cluster | true | 4369719923760 | 19377459077121 | 22.551 | 547175 | 0.5 | 0.5 | 1.0964036415787461 | MID | + +----------+-----------------+-----------+---------------+----------------+-------------+------------+----------+-----------+--------------------+-------+ + ``` + + Some of these columns have the following meanings: + + * Available: True means that BE heartbeat is normal and not offline. + * UsedCapacity: Bytes, the size of disk space used on BE + * Capacity: Bytes, the total disk space size on BE + * UsedPercent: Percentage, disk space utilization on BE + * ReplicaNum: Number of copies on BE + * CapCoeff/ReplCoeff: Weight Coefficient of Disk Space and Copy Number + * Score: Load score. The higher the score, the heavier the load. + * Class: Classified by load, LOW/MID/HIGH. Balanced scheduling moves copies from high-load nodes to low-load nodes + + Users can further view the utilization of each path on a BE, such as the BE with ID 10001: + + `SHOW PROC '/cluster_balance/cluster_load_stat/HDD/10001';` + + ``` + +------------------+------------------+---------------+---------------+---------+--------+----------------------+ + | RootPath | DataUsedCapacity | AvailCapacity | TotalCapacity | UsedPct | State | PathHash | + +------------------+------------------+---------------+---------------+---------+--------+----------------------+ + | /home/disk4/palo | 498.757 GB | 3.033 TB | 3.525 TB | 13.94 % | ONLINE | 4883406271918338267 | + | /home/disk3/palo | 704.200 GB | 2.832 TB | 3.525 TB | 19.65 % | ONLINE | -5467083960906519443 | + | /home/disk1/palo | 512.833 GB | 3.007 TB | 3.525 TB | 14.69 % | ONLINE | -7733211489989964053 | + | /home/disk2/palo | 881.955 GB | 2.656 TB | 3.525 TB | 24.65 % | ONLINE | 4870995507205544622 | + | /home/disk5/palo | 694.992 GB | 2.842 TB | 3.525 TB | 19.36 % | ONLINE | 1916696897889786739 | + +------------------+------------------+---------------+---------------+---------+--------+----------------------+ + ``` + + The disk usage of each data path on the specified BE is shown here. + +2. Scheduling resources + + Users can view the current slot usage of each node through the following commands: + + `SHOW PROC '/cluster_balance/working_slots';` + + ``` + +----------+----------------------+------------+------------+-------------+----------------------+ + | BeId | PathHash | AvailSlots | TotalSlots | BalanceSlot | AvgRate | + +----------+----------------------+------------+------------+-------------+----------------------+ + | 10000 | 8110346074333016794 | 2 | 2 | 2 | 2.459007474009069E7 | + | 10000 | -5617618290584731137 | 2 | 2 | 2 | 2.4730105014001578E7 | + | 10001 | 4883406271918338267 | 2 | 2 | 2 | 1.6711402709780257E7 | + | 10001 | -5467083960906519443 | 2 | 2 | 2 | 2.7540126380326536E7 | + | 10002 | 9137404661108133814 | 2 | 2 | 2 | 2.417217089806745E7 | + | 10002 | 1885826196444191611 | 2 | 2 | 2 | 1.6327378456676323E7 | + +----------+----------------------+------------+------------+-------------+----------------------+ + ``` + + In this paper, data path is used as granularity to show the current use of slot. Among them, `AvgRate'is the copy rate of clone task in bytes/seconds on the path of historical statistics. + +3. Priority repair view + + The following command allows you to view the priority repaired tables or partitions set by the `ADMIN REPAIR TABLE'command. + + `SHOW PROC '/cluster_balance/priority_repair'`; + + Among them, `Remaining TimeMs'indicates that these priority fixes will be automatically removed from the priority fix queue after this time. In order to prevent resources from being occupied due to the failure of priority repair. + +### Scheduler Statistical Status View + +We have collected some statistics of Tablet Checker and Tablet Scheduler during their operation, which can be viewed through the following commands: + +`SHOW PROC '/cluster_balance/sched_stat'`; + +``` ++---------------------------------------------------+-------------+ +| Item | Value | ++---------------------------------------------------+-------------+ +| num of tablet check round | 12041 | +| cost of tablet check(ms) | 7162342 | +| num of tablet checked in tablet checker | 18793506362 | +| num of unhealthy tablet checked in tablet checker | 7043900 | +| num of tablet being added to tablet scheduler | 1153 | +| num of tablet schedule round | 49538 | +| cost of tablet schedule(ms) | 49822 | +| num of tablet being scheduled | 4356200 | +| num of tablet being scheduled succeeded | 320 | +| num of tablet being scheduled failed | 4355594 | +| num of tablet being scheduled discard | 286 | +| num of tablet priority upgraded | 0 | +| num of tablet priority downgraded | 1096 | +| num of clone task | 230 | +| num of clone task succeeded | 228 | +| num of clone task failed | 2 | +| num of clone task timeout | 2 | +| num of replica missing error | 4354857 | +| num of replica version missing error | 967 | +| num of replica relocating | 0 | +| num of replica redundant error | 90 | +| num of replica missing in cluster error | 0 | +| num of balance scheduled | 0 | ++---------------------------------------------------+-------------+ +``` + +The meanings of each line are as follows: + +* num of tablet check round:Tablet Checker 检查次数 +* cost of tablet check(ms):Tablet Checker 检查总耗时 +* num of tablet checked in tablet checker:Tablet Checker 检查过的 tablet 数量 +* num of unhealthy tablet checked in tablet checker:Tablet Checker 检查过的不健康的 tablet 数量 +* num of tablet being added to tablet scheduler:被提交到 Tablet Scheduler 中的 tablet 数量 +* num of tablet schedule round:Tablet Scheduler 运行次数 +* cost of tablet schedule(ms):Tablet Scheduler 运行总耗时 +* num of tablet being scheduled:被调度的 Tablet 总数量 +* num of tablet being scheduled succeeded:被成功调度的 Tablet 总数量 +* num of tablet being scheduled failed:调度失败的 Tablet 总数量 +* num of tablet being scheduled discard:调度失败且被抛弃的 Tablet 总数量 +* num of tablet priority upgraded:优先级上调次数 +* num of tablet priority downgraded:优先级下调次数 +* num of clone task: number of clone tasks generated +* num of clone task succeeded:clone 任务成功的数量 +* num of clone task failed:clone 任务失败的数量 +* num of clone task timeout:clone 任务超时的数量 +* num of replica missing error: the number of tablets whose status is checked is the missing copy +* num of replica version missing error:检查的状态为版本缺失的 tablet 的数量(该统计值包括了 num of replica relocating 和 num of replica missing in cluster error) +*num of replica relocation *29366;* 24577;*replica relocation tablet * +* num of replica redundant error: Number of tablets whose checked status is replica redundant +* num of replica missing in cluster error:检查的状态为不在对应 cluster 的 tablet 的数量 +* num of balance scheduled:均衡调度的次数 + +> Note: The above states are only historical accumulative values. We also print these statistics regularly in the FE logs, where the values in parentheses represent the number of changes in each statistical value since the last printing dependence of the statistical information. + +## Relevant configuration instructions + +### Adjustable parameters + +The following adjustable parameters are all configurable parameters in fe.conf. + +* use\_new\_tablet\_scheduler + + * Description: Whether to enable the new replica scheduling mode. The new replica scheduling method is the replica scheduling method introduced in this document. If turned on, `disable_colocate_join` must be `true`. Because the new scheduling strategy does not support data fragmentation scheduling of co-locotion tables for the time being. + * Default value:true + * Importance: High + +* tablet\_repair\_delay\_factor\_second + + * Note: For different scheduling priorities, we will delay different time to start repairing. In order to prevent a large number of unnecessary replica repair tasks from occurring in the process of routine restart and upgrade. This parameter is a reference coefficient. For HIGH priority, the delay is the reference coefficient * 1; for NORMAL priority, the delay is the reference coefficient * 2; for LOW priority, the delay is the reference coefficient * 3. That is, the lower the priority, the longer the delay waiting time. If the user wants to repair the copy as soon as possible, this parameter can be reduced appropriately. + * Default value: 60 seconds + * Importance: High + +* schedule\_slot\_num\_per\_path + + * Note: The default number of slots allocated to each disk for replica repair. This number represents the number of replica repair tasks that a disk can run simultaneously. If you want to repair the copy faster, you can adjust this parameter appropriately. The higher the single value, the greater the impact on IO. + * Default value: 2 + * Importance: High + +* balance\_load\_score\_threshold + + * Description: Threshold of Cluster Equilibrium. The default is 0.1, or 10%. When the load core of a BE node is not higher than or less than 10% of the average load core, we think that the node is balanced. If you want to make the cluster load more even, you can adjust this parameter appropriately. + * Default value: 0.1 + * Importance: + +* storage\_high\_watermark\_usage\_percent 和 storage\_min\_left\_capacity\_bytes + + * Description: These two parameters represent the upper limit of the maximum space utilization of a disk and the lower limit of the minimum space remaining, respectively. When the space utilization of a disk is greater than the upper limit or the remaining space is less than the lower limit, the disk will no longer be used as the destination address for balanced scheduling. + * Default values: 0.85 and 1048576000 (1GB) + * Importance: + +* disable\_balance + + * Description: Control whether to turn off the balancing function. When replicas are in equilibrium, some functions, such as ALTER TABLE, will be banned. Equilibrium can last for a long time. Therefore, if the user wants to do the prohibited operation as soon as possible. This parameter can be set to true to turn off balanced scheduling. + * Default value:true + * Importance: + +### Unadjustable parameters + +The following parameters do not support modification for the time being, just for illustration. + +* Tablet Checker scheduling interval + + Tablet Checker schedules checks every 20 seconds. + +* Tablet Scheduler scheduling interval + + Tablet Scheduler schedules every five seconds + +* Number of Tablet Scheduler Schedules per Batch + + Tablet Scheduler schedules up to 50 tablets at a time. + +* Tablet Scheduler Maximum Waiting Schedule and Number of Tasks in Operation + + The maximum number of waiting tasks and running tasks is 2000. When over 2000, Tablet Checker will no longer generate new scheduling tasks to Tablet Scheduler. + +* Tablet Scheduler Maximum Balanced Task Number + + The maximum number of balanced tasks is 500. When more than 500, there will be no new balancing tasks. + +* Number of slots per disk for balancing tasks + + The number of slots per disk for balancing tasks is 2. This slot is independent of the slot used for replica repair. + +* Update interval of cluster equilibrium + + Tablet Scheduler recalculates the load score of the cluster every 20 seconds. + +* Minimum and Maximum Timeout for Clone Tasks + + A clone task timeout time range is 3 minutes to 2 hours. The specific timeout is calculated by the size of the tablet. The formula is (tablet size)/ (5MB/s). When a clone task fails three times, the task terminates. + +* Dynamic Priority Adjustment Strategy + + The minimum priority adjustment interval is 5 minutes. When a tablet schedule fails five times, priority is lowered. When a tablet is not scheduled for 30 minutes, priority is raised. + +## Relevant issues + +* In some cases, the default replica repair and balancing strategy may cause the network to be full (mostly in the case of gigabit network cards and a large number of disks per BE). At this point, some parameters need to be adjusted to reduce the number of simultaneous balancing and repair tasks. + +* Current balancing strategies for copies of Colocate Table do not guarantee that copies of the same Tablet will not be distributed on the BE of the same host. However, the repair strategy of the copy of Colocate Table detects this distribution error and corrects it. However, it may occur that after correction, the balancing strategy regards the replicas as unbalanced and rebalances them. As a result, the Colocate Group can not achieve stability because of the continuous alternation between the two states. In view of this situation, we suggest that when using Colocate attribute, we try to ensure that the cluster is isomorphic, so as to reduce the probability that replicas are distributed on the same host. diff --git a/docs/documentation/en/administrator-guide/privilege_EN.md b/docs/documentation/en/administrator-guide/privilege_EN.md new file mode 100644 index 00000000000000..dbff3c8eb93f9a --- /dev/null +++ b/docs/documentation/en/administrator-guide/privilege_EN.md @@ -0,0 +1,188 @@ +# Authority Management + +Doris's new privilege management system refers to Mysql's privilege management mechanism, achieves table-level fine-grained privilege control, role-based privilege access control, and supports whitelist mechanism. + +## Noun Interpretation + +1. user_identity + + In a permission system, a user is identified as a User Identity. User ID consists of two parts: username and userhost. Username is a user name, which is composed of English upper and lower case. Userhost represents the IP from which the user link comes. User_identity is presented as username@'userhost', representing the username from userhost. + + Another expression of user_identity is username@['domain'], where domain is the domain name, which can be resolved into a set of IPS by DNS BNS (Baidu Name Service). The final expression is a set of username@'userhost', so we use username@'userhost'to represent it. + +2. Privilege + + The objects of permissions are nodes, databases or tables. Different permissions represent different operating permissions. + +3. Role + + Doris can create custom named roles. Roles can be seen as a set of permissions. When a newly created user can be assigned a role, the role's permissions are automatically granted. Subsequent changes in the role's permissions will also be reflected in all user permissions that belong to the role. + +4. user_property + + User attributes are directly attached to a user, not to a user identity. That is, both cmy@'192.%'and cmy@['domain'] have the same set of user attributes, which belong to user cmy, not cmy@'192.%' or cmy@['domain']. + + User attributes include, but are not limited to, the maximum number of user connections, import cluster configuration, and so on. + +## Supported operations + +1. Create users:CREATE USER +2. Delete users: DROP USER +3. Authorization: GRANT +4. Withdrawal: REVOKE +5. Create role:CREATE ROLE +6. Delete Roles: DROP ROLE +7. View current user privileges: SHOW GRANTS +8. View all user privilegesSHOW ALL GRANTS; +9. View the created roles: SHOW ROELS +10. View user attributes: SHOW PROPERTY + +For detailed help with the above commands, you can use help + command to get help after connecting Doris through the MySQL client. For example `HELP CREATE USER`. + +## Permission type + +Doris currently supports the following permissions + +1. Node_priv + + Nodes change permissions. Including FE, BE, BROKER node addition, deletion, offline operations. Currently, this permission can only be granted to Root users. + +2. Grant_priv + + Permissions change permissions. Allow the execution of operations including authorization, revocation, add/delete/change user/role, etc. + +3. Select_priv + + Read-only access to databases and tables. + +4. Load_priv + + Write permissions to databases and tables. Including Load, Insert, Delete and so on. + +5. Alter_priv + + Change permissions on databases and tables. It includes renaming libraries/tables, adding/deleting/changing columns, and adding/deleting partitions. + +6. Create_priv + + The right to create databases, tables, and views. + +7. Drop_priv + + Delete permissions for databases, tables, and views. + +## Permission hierarchy + +At the same time, according to the scope of application of permissions, we divide them into three levels: + +1. GLOBAL LEVEL: Global permissions. That is, permissions on `*.*` granted by GRANT statements. The granted permissions apply to any table in any database. +2. DATABASE LEVEL: Database-level permissions. That is, permissions on `db.*` granted by GRANT statements. The granted permissions apply to any table in the specified database. +3. TABLE LEVEL: Table-level permissions. That is, permissions on `db.tbl` granted by GRANT statements. The permissions granted apply to the specified tables in the specified database. + + +## ADMIN /GRANT + +ADMIN\_PRIV and GRANT\_PRIV have the authority of **"grant authority"** at the same time, which is more special. The operations related to these two privileges are described here one by one. + +1. CREATE USER + + * Users with ADMIN or GRANT privileges at any level can create new users. + +2. DROP USER + + * Only ADMIN privileges can delete users. + +3. CREATE/DROP ROLE + + * Only ADMIN privileges can create roles. + +4. GRANT /REVOKE + + * Users with ADMIN or GLOBAL GRANT privileges can grant or revoke the privileges of any user. + * Users with GRANT privileges at the DATABASE level can grant or revoke the privileges of any user on the specified database. + * Users with GRANT privileges at TABLE level can grant or revoke the privileges of any user on the specified tables in the specified database. + +5. SET PASSWORD + + * Users with ADMIN or GLOBAL GRANT privileges can set any user's password. + * Ordinary users can set their corresponding User Identity password. The corresponding User Identity can be viewed by `SELECT CURRENT_USER();`command. + * Users with GRANT privileges at non-GLOBAL level can not set the password of existing users, but can only specify the password when creating users. + + +## Some explanations + +1. When Doris initializes, the following users and roles are automatically created: + + 1. Operator role: This role has Node\_priv and Admin\_priv, i.e. all permissions for Doris. In a subsequent upgrade version, we may restrict the role's permissions to Node\_priv, which is to grant only node change permissions. To meet some cloud deployment requirements. + + 2. admin role: This role has Admin\_priv, which is all permissions except for node changes. + + 3. root@'%': root user, which allows login from any node, with the role of operator. + + 4. admin@'%': admin user, allowing login from any node, role admin. + +2. It is not supported to delete or change the permissions of default created roles or users. + +3. The user of the operator role has one and only one user. Users of admin roles can create multiple. + +4. Operational instructions for possible conflicts + + 1. Conflict between domain name and ip: + + Assume that the following users are created: + + CREATE USER cmy@['domain']; + + And authorize: + + GRANT SELECT_PRIV ON \*.\* TO cmy@['domain'] + + The domain is resolved into two ips: IP1 and IP2 + + Let's assume that we have a separate authorization for cmy@'ip1': + + GRANT ALTER_PRIV ON \*.\* TO cmy@'ip1'; + + The permissions of CMY @'ip1'will be changed to SELECT\_PRIV, ALTER\_PRIV. And when we change the permissions of cmy@['domain'] again, cmy@'ip1' will not follow. + + 2. duplicate IP conflicts: + + Assume that the following users are created: + + CREATE USER cmy@'%' IDENTIFIED BY "12345"; + + CREATE USER cmy@'192.%' IDENTIFIED BY "abcde"; + + In priority,'192.%'takes precedence over'%', so when user CMY tries to login Doris with password '12345' from 192.168.1.1, the machine will be rejected. + +5. Forget passwords + + If you forget your password and cannot log in to Doris, you can log in to Doris without a password using the following command on the machine where the Doris FE node is located: + + `mysql-client -h 127.0.0.1 -P query_port -uroot` + + After login, the password can be reset through the SET PASSWORD command. + +6. No user can reset the password of the root user except the root user himself. + +7. ADMIN\_PRIV permissions can only be granted or revoked at the GLOBAL level. + +8. Having GRANT\_PRIV at GLOBAL level is actually equivalent to having ADMIN\_PRIV, because GRANT\_PRIV at this level has the right to grant arbitrary permissions, please use it carefully. + +## Best Practices + +Here are some usage scenarios of Doris privilege system. + +1. Scene 1 + + The users of Doris cluster are divided into Admin, RD and Client. Administrators have all the rights of the whole cluster, mainly responsible for cluster building, node management and so on. The development engineer is responsible for business modeling, including database building, data import and modification. Users access different databases and tables to get data. + + In this scenario, ADMIN or GRANT privileges can be granted to administrators. Give RD CREATE, DROP, ALTER, LOAD, SELECT permissions to any or specified database tables. Give Client SELECT permission to any or specified database table. At the same time, it can also simplify the authorization of multiple users by creating different roles. + +2. Scene 2 + + There are multiple services in a cluster, and each business may use one or more data. Each business needs to manage its own users. In this scenario. Administrator users can create a user with GRANT privileges at the DATABASE level for each database. The user can only authorize the specified database for the user. + + + + diff --git a/docs/documentation/en/administrator-guide/small-file-mgr_EN.md b/docs/documentation/en/administrator-guide/small-file-mgr_EN.md new file mode 100644 index 00000000000000..297f85ef58740e --- /dev/null +++ b/docs/documentation/en/administrator-guide/small-file-mgr_EN.md @@ -0,0 +1,78 @@ +# File Manager + +Some functions in Doris require some user-defined files. For example, public keys, key files, certificate files and so on are used to access external data sources. The File Manager provides a function that allows users to upload these files in advance and save them in Doris system, which can then be referenced or accessed in other commands. + +## Noun Interpretation + +* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. +* BE: Backend, Doris's back-end node. Responsible for query execution and data storage. +* BDBJE: Oracle Berkeley DB Java Edition. Distributed embedded database for persistent metadata in FE. +* SmallFileMgr: File Manager. Responsible for creating and maintaining user files. + +## Basic concepts + +Files are files created and saved by users in Doris. + +A file is located by `database`, `catalog`, `file_name`. At the same time, each file also has a globally unique ID (file_id), which serves as the identification in the system. + +File creation and deletion can only be performed by users with `admin` privileges. A file belongs to a database. Users who have access to a database (queries, imports, modifications, etc.) can use the files created under the database. + +## Specific operation + +File management has three main commands: `CREATE FILE`, `SHOW FILE` and `DROP FILE`, creating, viewing and deleting files respectively. The specific syntax of these three commands can be viewed by connecting to Doris and executing `HELP cmd;`. + +1. CREATE FILE + + In the command to create a file, the user must provide the following information: + + * file_name: File name. User-defined, unique within a catalog. + * Catalog: Category of files. User-defined, unique within a database. + + > Doris also has some special classification names for specific commands. + + > 1. Kafka + + > When the data source is specified as Kafka in the routine Import command and the file needs to be referenced, Doris defaults to looking for the file from the catalog category named "kafka". + + * url: the download address of the file. Currently, only unauthenticated HTTP download addresses are supported. This download address is only used to download files from this address when executing the create file command. When the file is successfully created and saved in Doris, the address will no longer be used. + * md5: optional. The MD5 value of the file. If the user provides this value, the MD5 value will be checked after the file is downloaded. File creation fails if validation fails. + + When the file is created successfully, the file-related information will be persisted in Doris. Users can view successfully created files through the `SHOW FILE` command. + +2. SHOW FILE + + This command allows you to view files that have been created successfully. Specific operations see: `HELP SHOW FILE;` + +3. DROP FILE + + This command can delete a file that has been created. Specific operations see: `HELP DROP FILE;` + +## Implementation details + +### Create and delete files + +When the user executes the `CREATE FILE` command, FE downloads the file from a given URL. The contents of the file are stored in FE memory directly in the form of Base64 encoding. At the same time, the file content and meta-information related to the file will be persisted in BDBJE. All created files, their meta-information and file content reside in FE memory. If the FE goes down and restarts, meta information and file content will also be loaded into memory from the BDBJE. When a file is deleted, the relevant information is deleted directly from FE memory and persistent information is deleted from BDBJE. + +### Use of documents + +If the FE side needs to use the created file, SmallFileMgr will directly save the data in FE memory as a local file, store it in the specified directory, and return the local file path for use. + +If the BE side needs to use the created file, BE will download the file content to the specified directory on BE through FE's HTTP interface `api/get_small_file` for use. At the same time, BE also records the information of the files that have been downloaded in memory. When BE requests a file, it first checks whether the local file exists and verifies it. If the validation passes, the local file path is returned directly. If the validation fails, the local file is deleted and downloaded from FE again. When BE restarts, local files are preloaded into memory. + +## Use restrictions + +Because the file meta-information and content are stored in FE memory. So by default, only files with size less than 1MB can be uploaded. And the total number of files is limited to 100. The configuration items described in the next section can be modified. + +## Relevant configuration + +1. FE configuration + +* `Small_file_dir`: The path used to store uploaded files, defaulting to the `small_files/` directory of the FE runtime directory. +* `max_small_file_size_bytes`: A single file size limit in bytes. The default is 1MB. File creation larger than this configuration will be rejected. +* `max_small_file_number`: The total number of files supported by a Doris cluster. The default is 100. When the number of files created exceeds this value, subsequent creation will be rejected. + + > If you need to upload more files or increase the size limit of a single file, you can modify the `max_small_file_size_bytes` and `max_small_file_number` parameters by using the `ADMIN SET CONFIG` command. However, the increase in the number and size of files will lead to an increase in FE memory usage. + +2. BE 配置 + +* `Small_file_dir`: The path used to store files downloaded from FE by default is in the `lib/small_files/` directory of the BE runtime directory. diff --git a/docs/documentation/en/community/gitter_EN.md b/docs/documentation/en/community/gitter_EN.md new file mode 100644 index 00000000000000..9da88f7392ef46 --- /dev/null +++ b/docs/documentation/en/community/gitter_EN.md @@ -0,0 +1,37 @@ +# Gitter 使用指南 + +## Gitter introduction + +Gitter is a Markdown-enabled instant messaging software for developers. It can be seamlessly linked to github, PR on Github can be linked in chat, relevant historical records of discussions can be retained, historical records can be queried, and Chinese and English can be supported. + +Like many other open source projects, Doris can use Gitter as an instant messaging medium for technology exchange and community development. This article describes how to use Gitter to participate in Doris's open source development and community development. + +## Log in using links + +Entering [https://gitter.im/apache-doris/Lobby](https://gitter.im/apache-doris/Lobby) in the browser automatically jumps to the Doris community chat room interface on Gitter. + +Click on the `SIGN IN TO START TALKING` below to login. It can support two login modes, Github account or Twitter account. The author uses Github account to login, as follows: + +![](../../../resources/images/login-gitter1.png) + +After clicking on the red circle, enter the Github account and password to log into the chat room and start technical or community discussions: + +![](../../../resources/images/login-gitter2.PNG) + +You can use Gitter as well as Wechat, and get functions that are more comfortable for developers and technicians than Wechat, such as directly mentioning an activity for discussion, directly searching history chat records, etc. + +Don't forget to click on the Pentagon in the upper right corner to collect, which will make the chat room easier for you to find. + +For more gitter usage tips, you can refer to: + +[http://www.gitter.net.cn/book/gitter/roomsettings-1.html](http://www.gitter.net.cn/book/gitter/roomsettings-1.html) + +## Install Mobile Client + +You can download Gitter's mobile client and participate in technical discussions on your mobile phone at any time and anywhere. Download links: + +[https://gitter.im/home](https://gitter.im/home) + +## Search Gitter and join Doris Community Chat Room + +Partners already using Gitter log in directly to search for `apache-doris` and can join the chat room when they find it. Other functions are used in the same chapter, which is not discussed here. diff --git a/docs/documentation/en/community/how-to-contribute_EN.md b/docs/documentation/en/community/how-to-contribute_EN.md new file mode 100644 index 00000000000000..8b72d716f66c85 --- /dev/null +++ b/docs/documentation/en/community/how-to-contribute_EN.md @@ -0,0 +1,55 @@ +# Contribute to Doris + +Thank you very much for your interest in the Doris project. We welcome your suggestions, comments (including criticisms), comments and contributions to the Doris project. + +Your suggestions, comments and comments on Doris can be made directly through GitHub's [Issues] (https://github.com/apache/incubator-doris/issues/new/selection). + +There are many ways to participate in and contribute to Doris projects: code implementation, test writing, process tool improvement, document improvement, and so on. Any contribution will be welcomed and you will be added to the list of contributors. Further, with sufficient contributions, you will have the opportunity to become a Commiter of Aapche with Apache mailbox and be included in the list of [Apache Commiters] (http://people.apache.org/committer-index.html). + +Any questions, you can contact us to get timely answers, including Wechat, Gitter (GitHub instant messaging tool), e-mail and so on. + +## Initial contact + +For the first time in Doris community, you can: + +* 关注 [Doris Github](https://github.com/apache/incubator-doris) +* Subscribe to our [mailing list] (./subscribe-mail-list.md); +* Join Doris Wechat Group (add micro-signal: morningman-cmy, note: join Doris Group) and ask questions at any time. +* Enter Doris's [Gitter] (./gitter.md) chat room; + +Learn the development trends of Doris project in time and give your opinions on the topics you are concerned about. + +## Doris's code and documentation + +As you can see from [GitHub] (https://github.com/apache/incubator-doris), Apache Doris (incubating) code base mainly consists of three parts: Frontend (FE), Backend (BE) and Broker (to support file reading on external storage systems such as HDFS). Documents are mainly the wiki on Doris website and GitHub, as well as the online help manual when running Doris. Details of these components can be found in the following table: + +| Component Name | Component Description | Related Language| +|--------|----------------------------|----------| +| [Frontend daemon (FE)] (https://github.com/apache/incubator-doris) | consists of a query coordinator and a metadata manager | Java| +| [Backend daemon (BE)] (https://github.com/apache/incubator-doris) | Responsible for storing data and executing query fragments | C++| +| [Broker] (https://github.com/apache/incubator-doris) | Read HDFS data to Doris | Java| +| [Website](https://github.com/apache/incubator-doris-website) | Doris 网站 | Markdown | ++ [Github Wiki] (https://github.com/apache/incubator-doris/wiki); Doris Github Wiki; Markdown_; +| Doris Runtime Help Document | Online Help Manual at Doris Runtime | Markdown| + +## Improving documentation + +Documentation is the most important way for you to understand Apache Doris, and it's where we need help most! + +Browse the document, you can deepen your understanding of Doris, can also help you understand Doris's function and technical details, if you find that the document has problems, please contact us in time; + +If you are interested in improving the quality of documents, whether it is revising the address of a page, correcting a link, and writing a better introductory document, we are very welcome! + +Most of our documents are written in markdown format, and you can modify and submit document changes directly through `docs/` in [GitHub] (https://github.com/apache/incubator-doris). If you submit code changes, you can refer to [Pull Request] (./pull-request.md). + +## If a Bug or problem is found + +If a Bug or problem is found, you can directly raise a new Issue through GitHub's [Issues] (https://github.com/apache/incubator-doris/issues/new/select), and we will have someone deal with it regularly. + +You can also fix it yourself by reading the analysis code (of course, it's better to talk to us before that, maybe someone has fixed the same problem) and submit a [Pull Request] (./pull-request.md). + +## Modify the code and submit PR (Pull Request) + +You can download the code, compile and install it, deploy and run it for a try (refer to the [compilation document] (./installing/compilation.md)) to see if it works as you expected. If you have problems, you can contact us directly, ask questions or fix them by reading and analyzing the source code. + +Whether it's fixing Bugs or adding Features, we're all very welcome. If you want to submit code to Doris, you need to create a new branch for your submitted code from the fork code library on GitHub to your project space, add the source project upstream, and submit PR. diff --git a/docs/documentation/en/community/index.rst b/docs/documentation/en/community/index.rst new file mode 100644 index 00000000000000..26ae323869baf2 --- /dev/null +++ b/docs/documentation/en/community/index.rst @@ -0,0 +1,8 @@ +=========== +Apache 社区 +=========== + +.. toctree:: + :glob: + + * diff --git a/docs/documentation/en/community/members_EN.md b/docs/documentation/en/community/members_EN.md new file mode 100644 index 00000000000000..f052d7dd401b1f --- /dev/null +++ b/docs/documentation/en/community/members_EN.md @@ -0,0 +1,31 @@ +# PMC Members & Committer + +## Mentors + +|id |githubUsername |public name | +|--------|-----------|----------| +|wave |dave2wave |Dave Fisher | +|lukehan | |Luke Han +|shaofengshi |shaofengshi| Shao Feng Shi | +|ningjiang |WillemJiang |Willem Ning Jiang| + + +## PPMC (13) +(the listing below excludes mentors) + +See table below: + +|id |githubUsername |public name | +|--------|-----------|----------| +|lingbin| lingbin |Bin Ling | +|lichaoyong |chaoyli |Chaoyong Li | +|zhaoc |imay |Chun Zhao | +|lide |lide-reed, doris-ci |De Li | +|chenhao |chenhao7253886 |Hao Chen | +|morningman |morningman |Mingyu Chen| +|maruyue || Ruyue Ma | +|sijie |sijie |Sijie Guo | +|zshao |zshao |Zheng Shao| + +## Committers (13) +All committers are members of the PPMC diff --git a/docs/documentation/en/community/pull-request_EN.md b/docs/documentation/en/community/pull-request_EN.md new file mode 100644 index 00000000000000..3218e3df68d6d7 --- /dev/null +++ b/docs/documentation/en/community/pull-request_EN.md @@ -0,0 +1,233 @@ +# Code Submission Guide + +[Pull Request (PR)] (https://help.github.com/articles/about-pull-requests/) can be easily submitted on [Github] (https://github.com/apache/incubator-doris). The PR method of Doris project is described below. + +## Fork Repository + +Go to the [github page] (https://github.com/apache/incubator-doris) of apache/incubator-doris , and click the button `Fork` in the upper right corner for Fork. + +![Fork](../../../resources/images/fork-repo.png) + +### 2. Configuring GIT and submitting modifications + +#### (1) Clone the code locally: + +``` +git clone https://github.com//incubator-doris.git +``` + +Note: Please replace your GitHub name with your yourgithubname\\\\\\\\\\\\\\. + +When clone is completed, origin defaults to the remote fork address on github. + +#### (2) Add apache/incubator-doris to the remote branch upstream of the local warehouse: + +``` +cd incubator-doris +git remote add upstream https://github.com/apache/incubator-doris.git +``` + +#### (3) Check remote warehouse settings: + +``` +git remote -v +origin https://github.com//incubator-doris.git (fetch) +origin https://github.com//incubator-doris.git (push) +upstream https://github.com/apache/incubator-doris.git (fetch) +upstream https://github.com/apache/incubator-doris.git (push) +``` + +#### (4) New branches to modify them: + +``` +git checkout -b +``` + +Note: \ name is customized for you. + +Code changes can be made after creation. + +#### (5) Submit code to remote branch: + +``` +git commit -a -m "" +git push origin +``` + +For more git usage, please visit: [git usage] (https://www.atlassian.com/git/tutorials/set-up-a-repository), not to mention here. + +### 3. Create PR + +#### (1) New PR +Switch to your GitHub page in the browser, switch to the submitted branch yourbranchname\\ and click the `New pull request` button to create it, as shown in the following figure: + +![new PR](../../../resources/images/new-pr.png) + +#### (2) preparation branch +At this time, the `Create pull request` button will appear. If not, please check whether the branch is selected correctly or click on `compare across forks' to re-select the repo and branch. + +![create PR](../../../resources/images//create-pr.png) + +#### (3)填写 Commit Message +Here, please fill in the summary and details of the comment, and then click `Create pull request` to create it. + +For how to write Commit Message, here are some Tips: + +* Please use the form of English verb + object. The verb does not use the past tense and the sentence uses imperative sentence. +* Subject and body should be written, and they should be separated by blank lines (fill in separately on GitHub PR interface). +* Message topic length should not exceed **50** characters; +* Message content should not exceed **72** characters per line, and the excess should be replaced manually. +* Message content is used to explain what has been done, why and how. +* The first letter of the message subject should be **capitalized**, and the end of the sentence **should not** have a full stop. +* The message content specifies the associated issue (if any), such as # 233; + +For more details, see . + +![create PR](../../../resources/images/create-pr2.png) + +#### (4) Complete the creation +After successful creation, you can see that Doris project needs review, you can wait for us to review and join, you can also contact us directly. + +![create PR](../../../resources/images/create-pr3.png) + +So far, your PR creation is complete. Read more about PR [collaborating-with-issues-and-pull-requests] (https://help.github.com/categories/collaborating-with-issues-and-pull-requests/). + +### 4. Conflict Resolution + +When submitting PR, code conflicts are usually caused by multiple people editing the same file. The main steps to resolve conflicts are as follows: + +#### (1) Switch to the main branch + +``` +git checkout master +``` + +#### (2) Synchronize remote main branch to local + +``` +git pull upstream master +``` + +#### (3) Switch back to the previous branch (assuming the branch is named fix) + +``` +git checkout fix +``` + +#### (4) rebase + +``` +git rebase -i master +``` + +At this point, a file that modifies the record will pop up and can be saved directly. Then, we will prompt which files have conflicts. At this time, we can open the conflict file to modify the conflict part. After all the conflicts of the conflict files are resolved, we will execute them. + +``` +git add . +git rebase --continue +``` + +Then you can go back and forth until the screen appears something like * rebase successful * and then you can update the branch that submitted PR: + +``` +git push -f origin fix +``` + +### 5. An example + +#### (1) fetch to the latest code for the local branch of upstream that has been configured + +``` +$ git branch +* master + +$ git fetch upstream +remote: Counting objects: 195, done. +remote: Compressing objects: 100% (68/68), done. +remote: Total 141 (delta 75), reused 108 (delta 48) +Receiving objects: 100% (141/141), 58.28 KiB, done. +Resolving deltas: 100% (75/75), completed with 43 local objects. +From https://github.com/apache/incubator-doris + 9c36200..0c4edc2 master -> upstream/master +``` + +#### (2) rebase + +``` +$ git rebase upstream/master +First, rewinding head to replay your work on top of it... +Fast-forwarded master to upstream/master. +``` + +#### (3) Check to see if other submissions are not synchronized to their own repo submissions + +``` +$ git status +# On branch master +# Your branch is ahead of 'origin/master' by 8 commits. +# +# Untracked files: +# (use "git add ..." to include in what will be committed) +# +# custom_env.sh +nothing added to commit but untracked files present (use "git add" to track) +``` + +#### (4) Merge code submitted by others into their own repo + +``` +$ git push origin master +Counting objects: 195, done. +Delta compression using up to 32 threads. +Compressing objects: 100% (41/41), done. +Writing objects: 100% (141/141), 56.66 KiB, done. +Total 141 (delta 76), reused 140 (delta 75) +remote: Resolving deltas: 100% (76/76), completed with 44 local objects. +To https://lide-reed:fc35ff925bd8fd6629be3f6412bacee99d4e5f97@github.com/lide-reed/incubator-doris.git + 9c36200..0c4edc2 master -> master +``` + +#### (5) New branch, ready for development + +``` +$ git checkout -b my_branch +Switched to a new branch 'my_branch' + +$ git branch + master +* my_branch +``` + +#### (6) Prepare to submit after code modification is completed + +``` +$ git add -u +``` + +#### (7) Fill in the message and submit it it to the new local branch + +``` +$ git commit -m "Fix a typo" +[my_branch 55e0ba2] Fix a typo +1 files changed, 2 insertions(+), 2 deletions(-) +``` + +#### (8) Push the branch into GitHub's own repo far away + +``` +$ git push origin my_branch +Counting objects: 11, done. +Delta compression using up to 32 threads. +Compressing objects: 100% (6/6), done. +Writing objects: 100% (6/6), 534 bytes, done. +Total 6 (delta 4), reused 0 (delta 0) +remote: Resolving deltas: 100% (4/4), completed with 4 local objects. +remote: +remote: Create a pull request for 'my_branch' on GitHub by visiting: +remote: https://github.com/lide-reed/incubator-doris/pull/new/my_branch +remote: +To https://lide-reed:fc35ff925bd8fd6629be3f6412bacee99d4e5f97@github.com/lide-reed/incubator-doris.git + * [new branch] my_branch -> my_branch +``` + +At this point, you can create PR according to the previous process. diff --git a/docs/documentation/en/community/release-process_EN.md b/docs/documentation/en/community/release-process_EN.md new file mode 100644 index 00000000000000..71c12fa945b82a --- /dev/null +++ b/docs/documentation/en/community/release-process_EN.md @@ -0,0 +1,566 @@ +# Publish of Apache Doris + +Apache publishing must be at least an IPMC member, a commiter with Apache mailboxes, a role called release manager. + +The general process of publication is as follows: + +1. Launching DISCUSS in the community; +2. Preparing branches and tagging; +3. Packing tag for signature; +4. Upload the signature package to the DEV directory of Apache SVN +3. Send community voting email +4. Result e-mail after the vote is passed +5. Send an email to general@incubator.apache.org for a vote. +6. Email Result to general@incubator.apache.org +7. Upload the signature package to the release directory of Apache SVN and generate relevant links +8. Prepare release note and send Announce mail to general@incubator.apache.org +9. Publish download links on Doris website and GitHub + +Release manager needs Mr. A to sign his own public key before publishing and upload it to the public key server. Then he can use this public key to sign the package ready for publication. + +## 1. Prepare for release + +### 1.1 Launching DISCUSS in the Community + +If you think you've fixed a lot of bugs and developed more important features, any IPMC member can initiate DISCUSS discussions to release a new version. +An e-mail entitled [DISCUSS] x.y.z release can be launched to discuss within the community what bugs have been fixed and what features have been developed. +If DISCUSS mail is supported, we can proceed to the next step. + +### 1.2 Preparatory Branch + +Before publishing, we need to build a new branch, which needs to be fully tested to make functions available, bug convergence, and important bugs repaired. + +For example: + +``` +$ git checkout -b branch-0.9 + +``` + +### 1.3 dozen Tags + +When the above branches are stable, tags can be made on them. +Remember to modify the `build_version` variable in `gensrc/script/gen_build_version.sh` when creating tags. For example, `build_version='0.10.0-release'.` + +For example: + +``` +$ git checkout branch-0.9 +$ git tag -a 0.9.0-rc01 -m "0.9.0 release candidate 01" +$ git push origin 0.9.0-rc01 +Counting objects: 1, done. +Writing objects: 100% (1/1), 165 bytes | 0 bytes/s, done. +Total 1 (delta 0), reused 0 (delta 0) +To git@github.com:apache/incubator-doris.git + * [new tag] 0.9.0-rc01 -> 0.9.0-rc01 + +$ git tag +``` + +## 2. Installation and configuration of signature software GnuPG +### 2.1 GnuPG + +In 1991, programmer Phil Zimmermann developed the encryption software PGP to avoid government surveillance. This software is very useful, spread quickly, has become a necessary tool for many programmers. However, it is commercial software and cannot be used freely. So the Free Software Foundation decided to develop a replacement for PGP, called GnuPG. This is the origin of GPG. + +### 2.2 Installation Configuration + +CentOS installation command: + +``` +yum install gnupg +``` +After installation, the default configuration file gpg.conf will be placed in the home directory. + +``` +~/.gnupg /gpg.conf +``` + +If this directory or file does not exist, you can create an empty file directly. +Edit gpg.conf, modify or add KeyServer configuration: + +``` +keyserver hkp http://keys.gnupg.net +``` + +Apache signature recommends SHA512, which can be done by configuring gpg. +Edit gpg.conf and add the following three lines: + +``` +personal-digest-preferences SHA512 +cert -digest -something SHA512 +default-preference-list SHA512 SHA384 SHA256 SHA224 AES256 AES192 AES CAST5 ZLIB BZIP2 ZIP Uncompressed +``` + +## 3. Generating new signatures + +### 3.1 Prepare to Sign + +Recommended settings for generating new signatures: + +We must log in to user account directly through SecureCRT and other terminals. We can't transfer it through Su - user or ssh. Otherwise, the password input box will not show up and make an error. + +Let's first look at the version of GPG and whether it supports SHA512. + +``` +$ gpg --version +gpg (GnuPG) 2.0.22 +libgcrypt 1.5.3 +Copyright (C) 2013 Free Software Foundation, Inc. +License GPLv3+: GNU GPL version 3 or later +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. + +Home: ~/.gnupg +Supported algorithms: +Pubkey: RSA, ?, ?, ELG, DSA +Cipher: IDEA, 3DES, CAST5, BLOWFISH, AES, AES192, AES256, TWOFISH, + CAMELLIA128, CAMELLIA192, CAMELLIA256 +Hash: MD5, SHA1, RIPEMD160, SHA256, SHA384, SHA512, SHA224 +Compression: Uncompressed, ZIP, ZLIB, BZIP2 +``` + +### 3.2 Generating new signatures + +``` +$ gpg --gen-key +gpg (GnuPG) 2.0.22; Copyright (C) 2013 Free Software Foundation, Inc. +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. + +Please select what kind of key you want: + (1) RSA and RSA (default) + (2) DSA and Elgamal + (3) DSA (sign only) + (4) RSA (sign only) +Your selection? 1 +RSA keys may be between 1024 and 4096 bits long. +What keysize do you want? (2048) 4096 +Requested keysize is 4096 bits +Please specify how long the key should be valid. + 0 = key does not expire + = key expires in n days + w = key expires in n weeks + m = key expires in n months + y = key expires in n years +Key is valid for? (0) +Key does not expire at all +Is this correct? (y/N) y + +GnuPG needs to construct a user ID to identify your key. + +Real name: xxx +Name must be at least 5 characters long +Real name: xxx-yyy +Email address: xxx@apache.org +Comment: xxx's key +You selected this USER-ID: + "xxx-yyy (xxx's key) " + +Change (N)ame, (C)omment, (E)mail or (O)kay/(Q)uit? o +``` + +Real name needs to be consistent with the ID shown in ID. apache. org. +Email address is apache's mailbox. + +### 3.3 View and Output + +The first line shows the name of the public key file (pubring. gpg), the second line shows the public key characteristics (4096 bits, Hash string and generation time), the third line shows the "user ID", and the fourth line shows the private key characteristics. + +``` +$ gpg --list-keys +/home/lide/.gnupg/pubring.gpg +----------------------------- +pub 4096R/33DBF2E0 2018-12-06 +uid xxx-yyy (xxx's key) +sub 4096R/0E8182E6 2018-12-06 +``` + +xxx-yy is the user ID. + +gpg --armor --output public-key.txt --export [用户ID] + +``` +$ gpg --armor --output public-key.txt --export xxx-yyy +$ cat public-key.txt +-----BEGIN PGP PUBLIC KEY BLOCK----- +Version: GnuPG V2.0.22 (GNU /Linux) + +mQINBFwJEQ0BEACwqLluHfjBqD/RWZ4uoYxNYHlIzZvbvxAlwS2mn53BirLIU/G3 +9opMWNplvmK+3+gNlRlFpiZ7EvHsF/YJOAP59HmI2Z... +``` + +## 4. Upload signature public key +Public key servers are servers that store users'public keys exclusively on the network. The send-keys parameter uploads the public key to the server. + +gpg --send-keys xxxx + +Where XXX is the last step -- the string after pub in the list-keys result, as shown above: 33DBF2E0 + +You can also upload the contents of the above public-key.txt through the following website: + +``` +http://keys.gnupg.net +``` + +After successful upload, you can query the website and enter 0x33DBF2E0: + +http://keys.gnupg.net + +Queries on the site are delayed and may take an hour. + +## 5. Generate fingerprint and upload it to Apache user information +Because the public key server has no checking mechanism, anyone can upload the public key in your name, so there is no way to guarantee the reliability of the public key on the server. Usually, you can publish a public key fingerprint on the website and let others check whether the downloaded public key is true or not. + +Fingerprint parameter generates public key fingerprints: + +``` +gpg --fingerprint [用户ID] +``` + +``` +$ gpg --fingerprint xxx-yyy +pub 4096R/33DBF2E0 2018-12-06 + Key fingerprint = 07AA E690 B01D 1A4B 469B 0BEF 5E29 CE39 33DB F2E0 +uid xxx-yyy (xxx's key) +sub 4096R/0E8182E6 2018-12-06 +``` + +Paste the fingerprint above (i.e. 07AA E690 B01D 1A4B 469B 0BEF 5E29 CE39 33DB F2E0) into your user information: + +https://id.apache.org +OpenPGP Public Key Primary Fingerprint: + +## 6. Generating keys + +Create a new file named KEYS and write it as follows (without any modification): + +``` +This file contains the PGP keys of various developers. + +Users: pgp < KEYS +or + gpg --import KEYS + +Developers: + pgp -kxa and append it to this file. +or + (pgpk -ll && pgpk -xa ) >> this file. +or + (gpg --list-sigs + && gpg --armor --export ) >> this file. +``` + +Then the generation adds the signature information to write: + +``` +gpg --list-sigs [User ID] >> KEYS +``` + +Finally, the public key addition is imported: + +``` +gpg --armor --export [User ID] >> KEYS +``` + +## 7. Packing Signature + +The following steps also need to log into user accounts directly through terminals such as SecureCRT, and can not be transferred through Su - user or ssh, otherwise the password input box will not show and error will be reported. + +``` +$ git checkout 0.9.0-rc01 + +$ git archive --format=tar 0.9.0-rc01 --prefix=apache-doris-0.9.0-incubating-src/ | gzip > apache-doris-0.9.0-incubating-src.tar.gz + +$ gpg -u xxx@apache.org --armor --output apache-doris-0.9.0-incubating-src.tar.gz.asc --detach-sign apache-doris-0.9.0-incubating-src.tar.gz + +$ gpg --verify apache-doris-0.9.0-incubating-src.tar.gz.asc apache-doris-0.9.0-incubating-src.tar.gz + +$ sha512sum apache-doris-0.9.0-incubating-src.tar.gz > apache-doris-0.9.0-incubating-src.tar.gz.sha512 + +$ sha512sum --check apache-doris-0.9.0-incubating-src.tar.gz.sha512 +``` + +## 8. Upload signature packages and KEYS files to DEV SVN + +First, download the SVN library: + +``` +svn co https://dist.apache.org/repos/dist/dev/incubator/doris/ +``` + +Organize all previous files into the following SVN paths + +``` +./doris/ +├── 0.9 +│   └── 0.9.0-rc1 +│   ├── apache-doris-0.9.0-incubating-src.tar.gz +│   ├── apache-doris-0.9.0-incubating-src.tar.gz.asc +│   ├── apache-doris-0.9.0-incubating-src.tar.gz.sha512 +│   └── KEYS +``` + +Upload these files + +``` +svn add 0.9.0-rc1 +svn commit -m "Release Apache Doris (incubating) 0.9.0 rc1" +``` + +## 9. Send community voting emails + +[VOTE] Release Apache Doris 0.9.0-incubating-rc01 + + +``` +Hi all, + +Please review and vote on Apache Doris 0.9.0-incubating-rc01 release. + +The release candidate has been tagged in GitHub as 0.9.0-rc01, available +here: +https://github.com/apache/incubator-doris/releases/tag/0.9.0-rc01 + +===== CHANGE LOG ===== + +New Features: +.... + +====================== + +Thanks to everyone who has contributed to this release. + +The artifacts (source, signature and checksum) corresponding to this release +candidate can be found here: +https://dist.apache.org/repos/dist/dev/incubator/doris/0.9/0.9.0-rc1/ + +This has been signed with PGP key 33DBF2E0, corresponding to +lide@apache.org. +KEYS file is available here: +https://dist.apache.org/repos/dist/dev/incubator/doris/KEYS +It is also listed here: +https://people.apache.org/keys/committer/lide.asc + +To verify and build, you can refer to following wiki: +https://github.com/apache/incubator-doris/wiki/How-to-verify-Apache-Release +https://wiki.apache.org/incubator/IncubatorReleaseChecklist + +The vote will be open for at least 72 hours. +[ ] +1 Approve the release +[ ] +0 No opinion +[ ] -1 Do not release this package because ... + +Best Regards, +xxx +``` + +## 10. Email Result after the vote is passed + +[Result][VOTE] Release Apache Doris 0.9.0-incubating-rc01 + +``` +Thanks to everyone, and this vote is now closed. + +It has passed with 4 +1 (binding) votes and no 0 or -1 votes. + +Binding: +Zhao Chun ++1 xxx ++ 1 Li Chaoyong ++1 Mingyu Chen + +Best Regards, +xxx + +``` + +## 11. Send an e-mail to general@incubator.apache.org for a vote. + +[VOTE] Release Apache Doris 0.9.0-incubating-rc01 + +``` +Hi all, + +Please review and vote on Apache Doris 0.9.0-incubating-rc01 release. + +Apache Doris is an MPP-based interactive SQL data warehousing for reporting and analysis. + +The Apache Doris community has voted on and approved this release: +https://lists.apache.org/thread.html/d70f7c8a8ae448bf6680a15914646005c6483564464cfa15f4ddc2fc@%3Cdev.doris.apache.org%3E + +The vote result email thread: +https://lists.apache.org/thread.html/64d229f0ba15d66adc83306bc8d7b7ccd5910ecb7e842718ce6a61da@%3Cdev.doris.apache.org%3E + +The release candidate has been tagged in GitHub as 0.9.0-rc01, available here: +https://github.com/apache/incubator-doris/releases/tag/0.9.0-rc01 + +There is no CHANGE LOG file because this is the first release of Apache Doris. +Thanks to everyone who has contributed to this release, and there is a simple release notes can be found here: +https://github.com/apache/incubator-doris/issues/406 + +The artifacts (source, signature and checksum) corresponding to this release candidate can be found here: +https://dist.apache.org/repos/dist/dev/incubator/doris/0.9/0.9.0-rc01/ + +This has been signed with PGP key 33DBF2E0, corresponding to lide@apache.org. +KEYS file is available here: +https://dist.apache.org/repos/dist/dev/incubator/doris/KEYS +It is also listed here: +https://people.apache.org/keys/committer/lide.asc + +The vote will be open for at least 72 hours. +[ ] +1 Approve the release +[ ] +0 No opinion +[ ] -1 Do not release this package because ... + +To verify and build, you can refer to following instruction: + +Firstly, you must be install and start docker service, and then you could build Doris as following steps: + +Step1: Pull the docker image with Doris building environment +$ docker pull apachedoris/doris-dev:build-env +You can check it by listing images, its size is about 3.28GB. + +Step2: Run the Docker image +You can run image directly: +$ docker run -it apachedoris/doris-dev:build-env + +Step3: Download Doris source +Now you should in docker environment, and you can download Doris source package. +(If you have downloaded source and it is not in image, you can map its path to image in Step2.) +$ wget https://dist.apache.org/repos/dist/dev/incubator/doris/0.9/0.9.0-rc01/apache-doris-0.9.0.rc01-incubating-src.tar.gz + +Step4: Build Doris +Now you can decompress and enter Doris source path and build Doris. +$ tar zxvf apache-doris-0.9.0.rc01-incubating-src.tar.gz +$ cd apache-doris-0.9.0.rc01-incubating-src +$ sh build.sh + +Best Regards, +xxx +``` + +The threaded connection for mail can be found here: + +`https://lists.apache.org/list.html?dev@doris.apache.org` + + +## 12. Email Result to general@incubator.apache.org + +[RESULT][VOTE] Release Apache Doris 0.9.0-incubating-rc01 + +``` +Hi, + +Thanks to everyone, and the vote for releasing Apache Doris 0.9.0-incubating-rc01 is now closed. + +It has passed with 4 +1 (binding) votes and no 0 or -1 votes. + +Binding: ++1 Willem Jiang ++1 Justin Mclean ++1 ShaoFeng Shi ++1 Makoto Yui + +The vote thread: +https://lists.apache.org/thread.html/da05fdd8d84e35de527f27200b5690d7811a1e97d419d1ea66562130@%3Cgeneral.incubator.apache.org%3E + +Best Regards, +xxx +``` + +## 13. 上传 package 到 release + +When the formal voting is successful, email [Result] first, and then prepare the release package. +Copy the source package, signature file and hash file from the corresponding RC folder published under dev to another directory 0.9.0-incubating. Note that the file name does not need rcxx (rename, but do not recalculate signatures, hash can recalculate, the results will not change) + +KEYS files also need to be copied if they are first released. Then add to SVN release. + +``` + +https://dist.apache.org/repos/dist/release/incubator/doris/0.9.0-incubating/ + +Eventually you can see it on apache's website: +http://www.apache.org/dist/incubator/doris/0.9.0-incubating/ + +``` + + +## 14. Send Announce e-mail to general@incubator.apache.org + +Title: + +``` +[ANNOUNCE] Apache Doris (incubating) 0.9.0 Release +``` + +Send mail group: + +``` +general@incubator.apache.org +dev@doris.apache.org +``` + +Mail text: + +``` +Hi All, + +We are pleased to announce the release of Apache Doris 0.9.0-incubating. + +Apache Doris (incubating) is an MPP-based interactive SQL data warehousing for reporting and analysis. + +The release is available at: +http://doris.apache.org/downloads.html + +Thanks to everyone who has contributed to this release, and the release note can be found here: +https://github.com/apache/incubator-doris/releases + +Best Regards, + +On behalf of the Doris team, +xxx +``` + +## 15. Publish links on Doris website and GitHub + +### 15.1 Create Download Links + +Download link: +http://www.apache.org/dyn/closer.cgi?filename=incubator/doris/0.9.0-incubating/apache-doris-0.9.0-incubating-src.tar.gz&action=download + +wget --trust-server-names "https://www.apache.org/dyn/mirrors/mirrors.cgi?action=download&filename=incubator/doris/0.9.0-incubating/apache-doris-0.9.0-incubating-src.tar.gz" + +Original location: +https://www.apache.org/dist/incubator/doris/0.9.0-incubating/ + +http://www.apache.org/dyn/closer.cgi/incubator/doris/0.9.0-incubating/apache-doris-0.9.0-incubating-src.tar.gz + +源码包(source package): +http://www.apache.org/dyn/closer.cgi/incubator/doris/0.9.0-incubating/apache-doris-0.9.0-incubating-src.tar.gz + +ASC: +http://archive.apache.org/dist/incubator/doris/0.9.0-incubating/apache-doris-0.9.0-incubating-src.tar.gz.asc + +sha512: +http://archive.apache.org/dist/incubator/doris/0.9.0-incubating/apache-doris-0.9.0-incubating-src.tar.gz.sha512 + +KEYS: +http://archive.apache.org /dist /incubator /doris /KEYS + +refer to: + +### 15.2 Prepare release note + +The following two areas need to be modified: + +1. Github's release page + +``` +https://github.com/apache/incubator-doris/releases/tag/0.9.0-rc01 +``` + +2. Doris Official Website Download Page + +``` +http://doris.apache.org /downloads.html +``` diff --git a/docs/documentation/en/community/subscribe-mail-list_EN.md b/docs/documentation/en/community/subscribe-mail-list_EN.md new file mode 100644 index 00000000000000..48a24e8e5026e5 --- /dev/null +++ b/docs/documentation/en/community/subscribe-mail-list_EN.md @@ -0,0 +1,43 @@ +# Subscribe to mailing lists + +Mail List is the most recognized form of communication in Apache community. Generally speaking, open source community questions and answers, technical discussions, transaction decisions are carried through mailing lists. The asynchronous and broadcasting features of mailing lists are also very suitable for communication in open source communities. So how do you subscribe to Apache Doris (incubating) mailing lists? It mainly includes the following five steps. + +## 1. Send Subscription Mail + +Open your own email, create a new email, and send an email to `dev-subscribe@doris.apache.org` (subject and content are arbitrary) + +![step1](../../../resources/images/subscribe-mail-list-step1.png) + +## 2. Receive confirmation emails from dev-help@doris.apache.org + +After the first step, you will receive a confirmation email from `dev-help@doris.apache.org`, which is shown below. (**If you fail to receive it for a long time, please confirm that the mail has been intercepted, or has been automatically grouped into "Subscribed Mail", "Spam Mail", "Promotional Mail" folders**) + +![step2](../../../resources/images/subscribe-mail-list-step2.png) + +## 3. Reply to confirmation mail + +For the mail received in the previous step, + +**a. Reply to this email directly** + +***or*** + +**B. Create a new `recipient` e-mail for the `reply address` in the previous step** + +Every subject is acceptable. + +![step3](../../../resources/images/subscribe-mail-list-step3.png) + + +## 4. Receiving Welcome Emails + +After completing the third step, you will receive a welcome email entitled **WELCOME to dev@doris.apache.org**. So far, the work of subscribing to mailing lists has been completed, and community dynamics will be notified by mail. + +![step4](../../../resources/images/subscribe-mail-list-step4.png) + + +## 5. Initiate e-mail discussion (optional) + +After successfully subscribing to the mailing list, if you want to initiate a discussion, send an email directly to `dev@doris.apache.org`. Anyone who subscribes to the mailing list receives the mail. +​ +​ diff --git a/docs/documentation/en/community/verify-apache-release_EN.md b/docs/documentation/en/community/verify-apache-release_EN.md new file mode 100644 index 00000000000000..4d810a2575a70f --- /dev/null +++ b/docs/documentation/en/community/verify-apache-release_EN.md @@ -0,0 +1,100 @@ +# Verify Apaceh Release + +To verify the release, following checklist can used to reference: + +1. [ ] Download links are valid. +2. [ ] Checksums and PGP signatures are valid. +3. [ ] DISCLAIMER is included. +4. [ ] Source code artifacts have correct names matching the current release. +5. [ ] LICENSE and NOTICE files are correct for the repository. +6. [ ] All files have license headers if necessary. +7. [ ] No compiled archives bundled in source archive. +8. [ ] Building is OK. + +## 1. Download source package, signature file, hash file and KEYS + +Download all artifacts, take 0.9.0-incubating-rc01 as an example: + +``` +wget https://dist.apache.org/repos/dist/dev/incubator/doris/0.9.0-incubating-rc01/apache-doris-0.9.0.rc01-incubating-src.tar.gz + +wget https://dist.apache.org/repos/dist/dev/incubator/doris/0.9.0-incubating-rc01/apache-doris-0.9.0.rc01-incubating-src.tar.gz.sha512 + +wget https://dist.apache.org/repos/dist/dev/incubator/doris/0.9.0-incubating-rc01/apache-doris-0.9.0.rc01-incubating-src.tar.gz.asc + +wget https://dist.apache.org/repos/dist/dev/incubator/doris/KEYS +``` + +## 2. Verify signature and hash + +GnuPG is recommended, which can install by yum install gnupg or apt-get install gnupg. + +``` +gpg --import KEYS +gpg --verify apache-doris-0.9.0.rc01-incubating-src.tar.gz.asc apache-doris-0.9.0.rc01-incubating-src.tar.gz +sha512sum --check apache-doris-0.9.0.rc01-incubating-src.tar.gz.sha512 +``` + +## 3. Verify license header + +Apache RAT is recommended to verify license headder, which can dowload as following command. + +``` +wget http://mirrors.tuna.tsinghua.edu.cn/apache//creadur/apache-rat-0.12/apache-rat-0.12-bin.tar.gz +tar zxvf apache -rat -0.12 -bin.tar.gz +``` + +Given your source dir is apache-doris-0.9.0.rc01-incubating-src, you can check with following command. +It will output a file list which don't include ASF license header, and these files used other licenses. + +``` +/usr/java/jdk/bin/java -jar apache-rat-0.12/apache-rat-0.12.jar -a -d apache-doris-0.10.0-incubating-src -e *.md *.MD .gitignore .gitmodules .travis.yml manifest **vendor** **licenses** | grep File: | grep -v "test_data" | grep -v "gutil" | grep -v "json" | grep -v "patch" | grep -v "xml" | grep -v "conf" | grep -v "svg" +``` + +## 4. Verify building + +Firstly, you must be install and start docker service. + +And then you could build Doris as following steps: + +#### Step1: Pull the docker image with Doris building environment + +``` +$ docker pull apachedoris/doris-dev:build-env +``` + +You can check it by listing images, for example: + +``` +$ docker images +REPOSITORY TAG IMAGE ID CREATED SIZE +apachedoris/doris-dev build-env f8bc5d4024e0 21 hours ago 3.28GB +``` + +#### Step2: Run the Docker image + +You can run image directyly: + +``` +$ docker run -it apachedoris/doris-dev:build-env +``` + +#### Step3: Download Doris source +Now you should in docker environment, and you can download Doris source by release package or by git clone in image. +(If you have downloaded source and it is not in image, you can map its path to image in Step2.) + +``` +$ wget https://dist.apache.org/repos/dist/dev/incubator/doris/xxx.tar.gz +``` + +#### Step4: Build Doris +Now you can enter Doris source path and build Doris. + +``` +$ cd incubator-doris +$ sh build.sh +``` + +After successfully building, it will install binary files in the directory output/. + +For more detail, you can refer to README.md in source package. diff --git a/docs/documentation/en/extending-doris/doris-on-es_EN.md b/docs/documentation/en/extending-doris/doris-on-es_EN.md new file mode 100644 index 00000000000000..66fd1157a382d4 --- /dev/null +++ b/docs/documentation/en/extending-doris/doris-on-es_EN.md @@ -0,0 +1,205 @@ +# Doris On ES + +Doris-On-ES combines Doris's distributed query planning capability with ES (Elastic search)'s full-text search capability to provide a more complete OLAP scenario solution: + +1. Multi-index Distributed Join Query in ES +2. Joint Query of Tables in Doris and ES, More Complex Full-Text Retrieval and Filtering +3. Aggregated queries for fields of ES keyword type: suitable for frequent changes in index, tens of millions or more of single fragmented documents, and the cardinality of the field is very large + +This document mainly introduces the realization principle and usage of this function. + +## Noun Interpretation + +* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. +* BE: Backend, Doris's back-end node. Responsible for query execution and data storage. +* Elastic search (ES): The most popular open source distributed search engine. +* DataNode: The data storage and computing node of ES. +* MasterNode: The Master node of ES, which manages metadata, nodes, data distribution, etc. +* scroll: The built-in data set cursor feature of ES for streaming scanning and filtering of data. + + +## How to use it + +### Create appearance + +``` +CREATE EXTERNAL TABLE `es_table` ( + `id` bigint(20) COMMENT "", + `k1` bigint(20) COMMENT "", + `k2` datetime COMMENT "", + `k3` varchar(20) COMMENT "", + `k4` varchar(100) COMMENT "", + `k5` float COMMENT "" +) ENGINE=ELASTICSEARCH +PARTITION BY RANGE(`id`) +() +PROPERTIES ( +"host" = "http://192.168.0.1:8200,http://192.168.0.2:8200", +"user" = "root", +"password" = "root", +"index" = "tindex”, +"type" = "doc" +); +``` + +Description of parameters: + +Parameter | description +---|--- +Host | ES Cluster Connection Address, which can specify one or more, through which Doris obtains the share distribution information of ES version number and index +User | Open the user name of the ES cluster authenticated by basic, you need to ensure that the user has access to: / cluster / state / nodes / HTTP and other path permissions and read permissions for index +Password | corresponding user's password information +The index name of the ES corresponding to the table in index | Doris can be alias +Type | Specifies the type of index, defaulting to _doc +Transport | Internal reservation, default to http + +### Query + +#### Basic Conditions Filtration + +``` +select * from es_table where k1 > 1000 and k3 ='term' or k4 like 'fu*z_' +``` + +#### Extended esquery SQL grammar +The first column name parameter of `esquery` is used to associate `index`, the second parameter is the JSON expression of the basic `Query DSL`, and the curly bracket `{}` is used to include `root` of json. There is and can only be one key of json, such as mat. Ch, geo_shape, bool, etc. + +Match query: + +``` +select * from es_table where esquery(k4, '{ + "match": { + "k4": "doris on elasticsearch" + } + }'); +``` +Geo related queries: + +``` +select * from es_table where esquery(k4, '{ + "geo_shape": { + "location": { + "shape": { + "type": "envelope", + "coordinates": [ + [ + 13, + 53 + ], + [ + 14, + 52 + ] + ] + }, + "relation": "within" + } + } + }'); +``` + +Bool query: + +``` +select * from es_table where esquery(k4, ' { + "bool": { + "must": [ + { + "terms": { + "k1": [ + 11, + 12 + ] + } + }, + { + "terms": { + "k2": [ + 100 + ] + } + } + ] + } + }'); +``` + + + +## Principle + +``` ++----------------------------------------------+ +| | +| Doris +------------------+ | +| | FE +--------------+-------+ +| | | Request Shard Location +| +--+-------------+-+ | | +| ^ ^ | | +| | | | | +| +-------------------+ +------------------+ | | +| | | | | | | | | +| | +----------+----+ | | +--+-----------+ | | | +| | | BE | | | | BE | | | | +| | +---------------+ | | +--------------+ | | | ++----------------------------------------------+ | + | | | | | | | + | | | | | | | + | HTTP SCROLL | | HTTP SCROLL | | ++-----------+---------------------+------------+ | +| | v | | v | | | +| | +------+--------+ | | +------+-------+ | | | +| | | | | | | | | | | +| | | DataNode | | | | DataNode +<-----------+ +| | | | | | | | | | | +| | | +<--------------------------------+ +| | +---------------+ | | |--------------| | | | +| +-------------------+ +------------------+ | | +| Same Physical Node | | +| | | +| +-----------------------+ | | +| | | | | +| | MasterNode +<-----------------+ +| ES | | | +| +-----------------------+ | ++----------------------------------------------+ + + +``` + +1. After the ES appearance is created, FE requests the host specified by the table to obtain HTTP port information of all nodes and share distribution information of index. If the request fails, it will traverse the host list sequentially until it succeeds or fails completely. + +2. When querying, the query plan will be generated and sent to the corresponding BE node according to some node information obtained by FE and metadata information of index. + +3. The BE node requests locally deployed ES nodes in accordance with the `proximity principle`. The BE receives data concurrently from each fragment of ES index in the `HTTP Scroll` mode. + +4. After calculating the result, return it to client + +## Push-Down operations +An important function of `Doris On Elastic` search is to push down filtering conditions: push ES under filtering conditions, so that only data that really meets the conditions can be returned, which can significantly improve query performance and reduce the CPU, memory and IO utilization of Doris and Elastic search. + +The following operators are optimized to push down filters as follows: + +| SQL syntax | ES 5.x+ syntax | +|-------|:---:| +| = | term query| +| in | terms query | +| > , < , >= , ⇐ | range | +| and | bool.filter | +| or | bool.should | +| not | bool.must_not | +| not in | bool.must_not + terms | +| esquery | ES Query DSL | + + +## Other notes + +1. ES Version Requirements + +The main version of ES is larger than 5. The scanning mode of ES data before 2. X and after 5. x is different. At present, the scanning mode of ES data after 5. x is supported. +2. Does ES Cluster Support X-Pack Authentication + +Support all ES clusters using HTTP Basic authentication +3. Some queries are much slower than requesting ES + +Yes, for example, query related to _count, etc., the ES internal will directly read the number of documents that meet the requirements of the relevant metadata, without the need to filter the real data. diff --git a/docs/documentation/en/extending-doris/index.rst b/docs/documentation/en/extending-doris/index.rst new file mode 100644 index 00000000000000..4fda2a01dfaa58 --- /dev/null +++ b/docs/documentation/en/extending-doris/index.rst @@ -0,0 +1,8 @@ +=========== +扩展功能 +=========== + +.. toctree:: + :glob: + + * diff --git a/docs/documentation/en/extending-doris/user-defined-function_EN.md b/docs/documentation/en/extending-doris/user-defined-function_EN.md new file mode 100644 index 00000000000000..fd62bc2337e21b --- /dev/null +++ b/docs/documentation/en/extending-doris/user-defined-function_EN.md @@ -0,0 +1,92 @@ +# User Define Function + +Users can extend Doris's capabilities through UDF mechanisms. Through this document, users can create their own UDF. + +## Writing UDF functions + +Before using UDF, users need to write their own UDF functions in Doris's UDF framework. In the `be/src/udf_samples/udf_sample.h | cpp` file, it is a simple UDF Demo. + +Writing a UDF function requires the following steps. + +### Writing functions + +Create the corresponding header file, CPP file, and implement the logic you need in the CPP file. The corresponding relationship between the format of implementation function in CPP file and UDF. + +#### Non-variable parameters + +For UDF with non-variable parameters, the corresponding relationship between them is very direct. +For example, `INT MyADD'(INT, INT) ` UDF corresponds to `IntVal AddUdf(FunctionContext* context, const IntVal & arg1, const IntVal & arg2)`. + +1. `AddUdf` can be any name, as long as it is specified when UDF is created. +2. The first parameter in the implementation function is always `FunctionContext*`. The implementer can obtain some query-related content and apply for some memory to be used through this structure. Specific interfaces can be defined in `udf/udf.h`. +3. Implementing functions from the second parameter requires one-to-one correspondence with UDF parameters, such as `IntVal` corresponding to `INT` type. All types in this section are referenced by `const`. +4. Return parameters should correspond to the type of UDF parameters. + +#### Variable parameters + +For variable parameters, see the following example, UDF `String md5sum (String,...)` corresponds to +`StringVal md5sumUdf (FunctionContext * ctx, int num args, const StringVal * args)` + +1. The `md5sumUdf` can also be changed at will. It can be specified at the time of creation. +2. The first parameter, like a non-variable parameter function, is passed in a `FunctionContext*`. +3. The variable parameter part consists of two parts. First, an integer is passed in, which shows that there are several parameters. Later, an array of variable parameter parts is passed in. + +#### Type correspondence + +|UDF Type|Argument Type| +|----|---------| +|TinyInt|TinyIntVal| +|SmallInt|SmallIntVal| +|Int|IntVal| +|BigInt|BigIntVal| +|LargeInt|LargeIntVal| +|Float|FloatVal| +|Double|DoubleVal| +|Date|DateTimeVal| +|Datetime|DateTimeVal| +|Char|StringVal| +|Varchar|StringVal| +|Decimal|DecimalVal| + +## Compiling UDF functions + +### Compile Doris + +Executing `sh build.sh` in the Doris root directory generates the corresponding `headers|libs` in `output/udf/` + +### 编写CMakeLists.txt + +Based on the `headers | libs` generated in the previous step, users can introduce the dependency using tools such as `CMakeLists`; in `CMakeLists`, dynamic libraries can be added by adding `-I|L` to `CMAKE_CXX_FLAGS`, respectively. For example, in `be/src/udf_samples/CMakeLists.txt`, a `udf sample` dynamic library is added using `add_library` (udfsample SHARED udf_sample.cpp). You need to write down all the source files involved later (no header files included). + +### Execute compilation + +Create a `build` directory under this directory and execute `cmake ../` generate `Makefile` under `build`, and execute `make` to generate corresponding dynamic libraries. + +## Create UDF functions + +Through the above steps, you can get a dynamic library. You need to put this dynamic library in a location that can be accessed through the HTTP protocol. Then execute the create UDF function to create a UDF inside the Doris system. You need AMDIN privileges to do this. + +``` +CREATE [AGGREGATE] FUNCTION + name ([argtype][,...]) + [RETURNS] rettype + PROPERTIES (["key"="value"][,...]) +``` +Explain: + +1. In PROPERTIES, `symbol` denotes the corresponding symbol for the execution of the entry function, which must be set. You can get the corresponding symbol by the `nm` command, such as `nm libudfsample.so`, `grep AddUdf`, `ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4`. +2. In PROPERTIES, `object_file` denotes where to download to the corresponding dynamic library. This parameter must be set. +3. name: A function belongs to a DB in the form of `dbName`. `funcName`. When `dbName` is not specified explicitly, the DB where the current session is located is used as `dbName`. + +For more details, see `CREATE FUNCTION`. + +## Using UDF + +Users using UDF/UDAF must have `SELECT` privileges for the corresponding database. + +UDF is used in the same way as normal functions. The only difference is that the scope of built-in functions is global, while the scope of UDF is internal to DB. When the link session is inside the data, using the UDF name directly will find the corresponding UDF within the current DB. Otherwise, the user needs to display the database name of the specified UDF, such as `dbName`. `funcName`. + + +## Delete UDF functions + +When you no longer need UDF functions, you can delete a UDF function by using the following command, referring to `DROP FUNCTION`. diff --git a/docs/documentation/en/getting-started/advance-usage_EN.md b/docs/documentation/en/getting-started/advance-usage_EN.md new file mode 100644 index 00000000000000..9b0d7fd609856b --- /dev/null +++ b/docs/documentation/en/getting-started/advance-usage_EN.md @@ -0,0 +1,246 @@ +# Advanced Use Guide + +Here we introduce some of Doris's advanced features. + +## Table 1 Structural Change + +Schema of the table can be modified using the ALTER TABLE command, including the following modifications: + +* Additional columns +* Delete columns +* Modify column type +* Changing column order + +Examples are given below. + +Schema of Table 1 is as follows: + +``` ++----------+-------------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++----------+-------------+------+-------+---------+-------+ +| siteid | int(11) | No | true | 10 | | +| citycode | smallint(6) | No | true | N/A | | +1242, 1242, 1246, Varchar (32), 1244, 1244, True'124; 124; +S/PV.124; PV.124; Bigint (20); No.124; fals_124; 0_124; Sum_124; ++----------+-------------+------+-------+---------+-------+ +``` + +We added a new column of uv, type BIGINT, aggregation type SUM, default value is 0: + +`ALTER TABLE table1 ADD COLUMN uv BIGINT SUM DEFAULT '0' after pv;` + +After successful submission, you can view the progress of the job by following commands: + +`SHOW ALTER TABLE COLUMN;` + +When the job state is FINISHED, the job is completed. The new Schema is in force. + +After ALTER TABLE is completed, you can view the latest Schema through `DESC TABLE'. + +``` +mysql> DESC table1; ++----------+-------------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++----------+-------------+------+-------+---------+-------+ +| siteid | int(11) | No | true | 10 | | +| citycode | smallint(6) | No | true | N/A | | +1242, 1242, 1246, Varchar (32), 1244, 1244, True'124; 124; +S/PV.124; PV.124; Bigint (20); No.124; fals_124; 0_124; Sum_124; +UV Bigint (20) False 0 Sum 20 False 0 0 Sum 20 False 20 False False 0 Ultraviolet Ultraviolet Ultraviolet Ultraviolet ++----------+-------------+------+-------+---------+-------+ +5 rows in set (0.00 sec) +``` + +The following command can be used to cancel the job currently being executed: + +`CANCEL ALTER TABLE COLUMN FROM table1` + +For more help, see `HELP ALTER TABLE'. + +## 2 Rollup + +Rollup can be understood as a physical index structure of Table. ** Physicalization ** is because its data is physically stored independently, and ** indexing ** means that Rollup can adjust column order to increase the hit rate of prefix index, or reduce key column to increase data aggregation. + +Examples are given below. + +Schema of Table 1 is as follows: + +``` ++----------+-------------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++----------+-------------+------+-------+---------+-------+ +| siteid | int(11) | No | true | 10 | | +| citycode | smallint(6) | No | true | N/A | | +1242, 1242, 1246, Varchar (32), 1244, 1244, True'124; 124; +S/PV.124; PV.124; Bigint (20); No.124; fals_124; 0_124; Sum_124; +UV Bigint (20) False 0 Sum 20 False 0 0 Sum 20 False 20 False False 0 Ultraviolet Ultraviolet Ultraviolet Ultraviolet ++----------+-------------+------+-------+---------+-------+ +``` + +For table1 detailed data, siteid, citycode and username form a set of keys, which aggregate the PV field. If the business side often has the need to see the total amount of PV in the city, it can build a rollup with only citycode and pv. + +`ALTER TABLE table1 ADD ROLLUP rollup_city(citycode, pv);` + +After successful submission, you can view the progress of the job by following commands: + +`SHOW ALTER TABLE ROLLUP;` + +When the job state is FINISHED, the job is completed. + +When Rollup is established, you can use `DESC table1 ALL'to view the Rollup information of the table. + +``` +mysql> desc table1 all; ++-------------+----------+-------------+------+-------+--------+-------+ +| IndexName | Field | Type | Null | Key | Default | Extra | ++-------------+----------+-------------+------+-------+---------+-------+ +| table1 | siteid | int(11) | No | true | 10 | | +| | citycode | smallint(6) | No | true | N/A | | +1.1.2.1.2.1.1.2.2.1.1.1.1.2.1.1.2 124; "Username" - 124; Varchar (32) 124; No.124; True_124; "124"; +Regulation 124; 1246; 12420; 12424; 12444; 1244; 1240; 1240; 1244; +UV = 124; UV = 124; Bigint (20) False 0 Sum +| | | | | | | | +| rollup_city | citycode | smallint(6) | No | true | N/A | | +Regulation 124; 1246; 12420; 12424; 12444; 1244; 1240; 1240; 1244; ++-------------+----------+-------------+------+-------+---------+-------+ +8 rows in set (0.01 sec) +``` + +The following command can be used to cancel the job currently being executed: + +`CANCEL ALTER TABLE ROLLUP FROM table1;` + +After Rollup is established, the query does not need to specify Rollup to query. Or specify the original table for query. The program automatically determines whether Rollup should be used. Whether Rollup is hit or not can be viewed by the `EXPLAIN your_sql;'command. + +For more help, see `HELP ALTER TABLE'. + +## 2 Query of Data Table + +### 2.1 Memory Limitation + +To prevent a user's query from consuming too much memory. Queries are controlled in memory. A query task uses no more than 2GB of memory by default on a single BE node. + +When users use it, if they find a `Memory limit exceeded'error, they usually exceed the memory limit. + +Users should try to optimize their SQL statements when they encounter memory overrun. + +If it is found that 2GB memory cannot be satisfied, the memory parameters can be set manually. + +Display query memory limits: + +``` +mysql> SHOW VARIABLES LIKE "%mem_limit%"; ++---------------+------------+ +1.1.1.1.2.2.1.2.2.2.2.2.1.1 Amended as follows: ++---------------+------------+ +*124th; execution *limit *124; 21474848 *124; ++---------------+------------+ +1 row in set (0.00 sec) +``` + +` The unit of exec_mem_limit is byte, and the value of `exec_mem_limit'can be changed by the `SET' command. If changed to 8GB. + +`SET exec_mem_limit = 8589934592;` + +``` +mysql> SHOW VARIABLES LIKE "%mem_limit%"; ++---------------+------------+ +1.1.1.1.2.2.1.2.2.2.2.2.1.1 Amended as follows: ++---------------+------------+ +124th; self -imposed limit 124; 8589934592 ++---------------+------------+ +1 row in set (0.00 sec) +``` + +>* The above modification is session level and is only valid within the current connection session. Disconnecting and reconnecting will change back to the default value. +>* If you need to modify the global variable, you can set it as follows: `SET GLOBAL exec_mem_limit = 8589934592;` When the setup is complete, disconnect the session and log in again, and the parameters will take effect permanently. + +### 2.2 Query timeout + +The current default query time is set to 300 seconds. If a query is not completed within 300 seconds, the query will be cancelled by the Doris system. Users can use this parameter to customize the timeout time of their applications and achieve a blocking mode similar to wait (timeout). + +View the current timeout settings: + +``` +mysql> SHOW VARIABLES LIKE "%query_timeout%"; ++---------------+-------+ +1.1.1.1.2.2.1.2.2.2.2.2.1.1 Amended as follows: ++---------------+-------+ +124QUERY WENT TIMEOUT $124; 300 1244; ++---------------+-------+ +1 row in set (0.00 sec) +``` + +Modify the timeout to 1 minute: + +SET query timeout =60;' + +>* The current timeout check interval is 5 seconds, so timeouts less than 5 seconds are not very accurate. +>* The above modifications are also session level. Global validity can be modified by `SET GLOBAL'. + +35; "3535; 2.3 broadcast / shufle join" + +By default, the system implements Join by conditionally filtering small tables, broadcasting them to the nodes where the large tables are located, forming a memory Hash table, and then streaming out the data of the large tables Hash Join. However, if the amount of data filtered by small tables can not be put into memory, Join will not be able to complete at this time. The usual error should be caused by memory overrun first. + +If you encounter the above situation, it is recommended to use Shuffle Join, also known as Partitioned Join. That is, small and large tables are Hash according to Join's key, and then distributed Join. This memory consumption is allocated to all computing nodes in the cluster. + +Use Broadcast Join (default): + +``` +mysql> select sum(table1.pv) from table1 join table2 where table1.siteid = 2; ++--------------------+ ++ 124; Sum (`Table1'`PV `124); ++--------------------+ +| 10 | ++--------------------+ +1 row in set (0.20 sec) +``` + +Use Broadcast Join (explicitly specified): + +``` +mysql> select sum(table1.pv) from table1 join [broadcast] table2 where table1.siteid = 2; ++--------------------+ ++ 124; Sum (`Table1'`PV `124); ++--------------------+ +| 10 | ++--------------------+ +1 row in set (0.20 sec) +``` + +20351;29992929292Join: + +``` +mysql> select sum(table1.pv) from table1 join [shuffle] table2 where table1.siteid = 2; ++--------------------+ ++ 124; Sum (`Table1'`PV `124); ++--------------------+ +| 10 | ++--------------------+ +1 row in set (0.15 sec) +``` + +### 2.4 Query Retry and High Availability + +When multiple FE nodes are deployed, users can deploy load balancing layers on top of multiple FEs to achieve high availability of Doris. + +Here are some highly available solutions: + +** The first ** + +I retry and load balancing in application layer code. For example, if a connection is found to be dead, it will automatically retry on other connections. Application-level code retry requires the application to configure multiple Doris front-end node addresses. + +** Second ** + +If you use MySQL JDBC connector to connect Doris, you can use jdbc's automatic retry mechanism: + +``` +jdbc:mysql:/[host:port],[host:port].../[database][? propertyName1][=propertyValue1][&propertyName2][=propertyValue2]... +``` + +** The third ** + +Applications can connect to and deploy MySQL Proxy on the same machine by configuring MySQL Proxy's Failover and Load Balance functions. + +`http://dev.mysql.com /doc /refman /5.6 /en /mysql -proxy -using.html ` diff --git a/docs/documentation/en/getting-started/basic-usage_EN.md b/docs/documentation/en/getting-started/basic-usage_EN.md new file mode 100644 index 00000000000000..811f5a82fc9b61 --- /dev/null +++ b/docs/documentation/en/getting-started/basic-usage_EN.md @@ -0,0 +1,355 @@ + +# Guidelines for Basic Use + +Doris uses MySQL protocol to communicate. Users can connect to Doris cluster through MySQL client or MySQL JDBC. When selecting the MySQL client version, it is recommended to use the version after 5.1, because user names of more than 16 characters can not be supported before 5.1. This paper takes MySQL client as an example to show users the basic usage of Doris through a complete process. + +## 1 Create Users + +### 1.1 Root User Logon and Password Modification + +Doris has built-in root and admin users, and the password is empty by default. After starting the Doris program, you can connect to the Doris cluster through root or admin users. +Use the following command to log in to Doris: + +``` +mysql -h FE_HOST -P9030 -uroot +``` + +>` fe_host` is the IP address of any FE node. ` 9030 ` is the query_port configuration in fe.conf. + +After login, you can modify the root password by following commands + +``` +SET PASSWORD FOR 'root' = PASSWORD('your_password'); +``` + +### 1.3 Creating New Users + +Create an ordinary user with the following command. + +``` +CREATE USER 'test' IDENTIFIED BY 'test_passwd'; +``` + +Follow-up login can be done through the following connection commands. + +``` +mysql -h FE_HOST -P9030 -utest -ptest_passwd +``` + +> By default, the newly created common user does not have any permissions. Permission grants can be referred to later permission grants. + +## 2 Data Table Creation and Data Import + +### 2.1 Create a database + +Initially, a database can be created through root or admin users: + +`CREATE DATABASE example_db;` + +> All commands can use'HELP command;'to see detailed grammar help. For example: `HELP CREATE DATABASE;'` + +> If you don't know the full name of the command, you can use "help command a field" for fuzzy query. If you type'HELP CREATE', you can match commands like `CREATE DATABASE', `CREATE TABLE', `CREATE USER', etc. + +After the database is created, you can view the database information through `SHOW DATABASES'. + +``` +MySQL> SHOW DATABASES; ++--------------------+ +| Database | ++--------------------+ +| example_db | +| information_schema | ++--------------------+ +2 rows in set (0.00 sec) +``` + +Information_schema exists to be compatible with MySQL protocol. In practice, information may not be very accurate. Therefore, information about specific databases is suggested to be obtained by directly querying the corresponding databases. + +### 2.2 Account Authorization + +After the example_db is created, the read and write permissions of example_db can be authorized to ordinary accounts, such as test, through the root/admin account. After authorization, the example_db database can be operated by logging in with the test account. + +`GRANT ALL ON example_db TO test;` + +### 2.3 Formulation + +Create a table using the `CREATE TABLE'command. More detailed parameters can be seen: + +`HELP CREATE TABLE;` + +First switch the database: + +`USE example_db;` + +Doris supports single partition and composite partition. + +In the composite partition: + +* The first level is called Partition, or partition. Users can specify a dimension column as a partition column (currently only integer and time type columns are supported), and specify the range of values for each partition. + +* The second stage is called Distribution, or bucket division. Users can specify one or more dimension columns and the number of buckets for HASH distribution of data. + +Composite partitioning is recommended for the following scenarios + +* There are time dimensions or similar dimensions with ordered values, which can be used as partition columns. The partition granularity can be evaluated according to the frequency of importation and the amount of partition data. +* Historic data deletion requirements: If there is a need to delete historical data (for example, only the last N days of data are retained). Using composite partitions, you can achieve this by deleting historical partitions. Data can also be deleted by sending a DELETE statement within a specified partition. +* Solve the data skew problem: Each partition can specify the number of buckets separately. If dividing by day, when the amount of data varies greatly every day, we can divide the data of different partitions reasonably by the number of buckets in the specified partition. Bucket columns recommend choosing columns with high degree of differentiation. + +Users can also use no composite partitions, even single partitions. Then the data are only distributed by HASH. + +Taking the aggregation model as an example, the following two partitions are illustrated separately. + +#### Single partition + +Create a logical table with the name table1. The number of barrels is 10. + +The schema of this table is as follows: + +* Siteid: Type is INT (4 bytes), default value is 10 +* citycode: The type is SMALLINT (2 bytes) +* username: The type is VARCHAR, the maximum length is 32, and the default value is an empty string. +* pv: Type is BIGINT (8 bytes), default value is 0; this is an index column, Doris will aggregate the index column internally, the aggregation method of this column is SUM. + +The TABLE statement is as follows: +``` +CREATE TABLE table1 +( +siteid INT DEFAULT '10', +citycode SMALLINT, +Username VARCHAR (32) DEFAULT', +pv BIGINT SUM DEFAULT '0' +) +AGGREGATE KEY(siteid, citycode, username) +DISTRIBUTED BY HASH(siteid) BUCKETS 10 +PROPERTIES("replication_num" = "1"); +``` + +#### Composite partition + +Create a logical table named table2. + +The schema of this table is as follows: + +* event_day: Type DATE, no default +* Siteid: Type is INT (4 bytes), default value is 10 +* citycode: The type is SMALLINT (2 bytes) +* username: The type is VARCHAR, the maximum length is 32, and the default value is an empty string. +* pv: Type is BIGINT (8 bytes), default value is 0; this is an index column, Doris will aggregate the index column internally, the aggregation method of this column is SUM. + +We use the event_day column as the partition column to create three partitions: p201706, p201707, and p201708. + +* p201706: Range [Minimum, 2017-07-01) +* p201707: Scope [2017-07-01, 2017-08-01) +* p201708: Scope [2017-08-01, 2017-09-01) + +> Note that the interval is left closed and right open. + +Each partition uses siteid to hash buckets, with a bucket count of 10 + +The TABLE statement is as follows: +``` +CREATE TABLE table2 +( +event /day DATE, +siteid INT DEFAULT '10', +citycode SMALLINT, +Username VARCHAR (32) DEFAULT', +pv BIGINT SUM DEFAULT '0' +) +AGGREGATE KEY(event_day, siteid, citycode, username) +PARTITION BY RANGE(event_day) +( +The distribution value of P201706 was lower than that of ("2017-07-01"). +The segmentation value of P201707 is lower than that of ("2017-08-01"). +The segmentation value of P201708 is lower than that of ("2017-09-01"). +) +DISTRIBUTED BY HASH(siteid) BUCKETS 10 +PROPERTIES("replication_num" = "1"); +``` + +After the table is built, you can view the information of the table in example_db: + +``` +MySQL> SHOW TABLES; ++----------------------+ +1.1.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.1.2.1.2.2.2.2.2.2.2.2.2.2.2.2.2.2. ++----------------------+ +| table1 | +| table2 | ++----------------------+ +2 rows in set (0.01 sec) + +MySQL> DESC table1; ++----------+-------------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++----------+-------------+------+-------+---------+-------+ +| siteid | int(11) | Yes | true | 10 | | +| citycode | smallint(6) | Yes | true | N/A | | +"124s; username"124s; varchar (32) "124s; Yes"124s; true "124s;"124s; "124s; ++ 124; PV = 124; Bigint (20) Yes False 0 Sum ++----------+-------------+------+-------+---------+-------+ +4 rows in set (0.00 sec) + +MySQL> DESC table2; ++-----------+-------------+------+-------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-----------+-------------+------+-------+---------+-------+ +| event_day | date | Yes | true | N/A | | +| siteid | int(11) | Yes | true | 10 | | +| citycode | smallint(6) | Yes | true | N/A | | +"124s; username"124s; varchar (32) "124s; Yes"124s; true "124s;"124s; "124s; ++ 124; PV = 124; Bigint (20) Yes False 0 Sum ++-----------+-------------+------+-------+---------+-------+ +5 rows in set (0.00 sec) +``` + +> Notes: +> +> 1. By setting replication_num, the above tables are all single-copy tables. Doris recommends that users adopt the default three-copy settings to ensure high availability. +> 2. Composite partition tables can be added or deleted dynamically. See the Partition section in `HELP ALTER TABLE'. +> 3. Data import can import the specified Partition. See `HELP LOAD'. +> 4. Schema of table can be dynamically modified. +> 5. Rollup can be added to Table to improve query performance. This section can be referred to the description of Rollup in Advanced Usage Guide. + +### 2.4 Import data + +Doris supports a variety of data import methods. Specifically, you can refer to the data import document. Here we use streaming import and Broker import as examples. + +#### Flow-in + +Streaming import transfers data to Doris via HTTP protocol. It can import local data directly without relying on other systems or components. Detailed grammar help can be found in `HELP STREAM LOAD;' + +Example 1: With "table1_20170707" as Label, import table1 tables using the local file table1_data. + +``` +curl --location-trusted -u test:test -H "label:table1_20170707" -H "column_separator:," -T table1_data http://FE_HOST:8030/api/example_db/table1/_stream_load +``` + +> 1. FE_HOST is the IP of any FE node and 8030 is http_port in fe.conf. +> 2. You can use the IP of any BE and the webserver_port in be.conf to connect the target left and right for import. For example: `BE_HOST:8040` + +The local file `table1_data'takes `, `as the separation between data, and the specific contents are as follows: + +``` +1,1,Jim,2 +2,1,grace,2 +3,2,tom,2 +4,3,bush,3 +5,3,helen,3 +``` + +Example 2: With "table2_20170707" as Label, import table2 tables using the local file table2_data. + +``` +curl --location-trusted -u test:test -H "label:table2_20170707" -H "column_separator:," -T table1_data http://127.0.0.1:8030/api/example_db/table2/_stream_load +``` + +The local file `table2_data'is separated by `t'. The details are as follows: + +``` +2017 -07 -03: 1st Jim +2017-07-05 2 1 grace 2 +2017-07-123 2 Tom 2 +2017 -07 -15 4 3 'bush' 3 +2017 -07 -12 5 3 'helen 3 +``` + +> Notes: +> +> 1. The recommended file size for streaming import is limited to 10GB. Excessive file size will result in higher cost of retry failure. +> 2. Each batch of imported data needs to take a Label. Label is best a string related to a batch of data for easy reading and management. Doris based on Label guarantees that the same batch of data can be imported only once in a database. Label for failed tasks can be reused. +> 3. Streaming imports are synchronous commands. The successful return of the command indicates that the data has been imported, and the failure of the return indicates that the batch of data has not been imported. + +'35;'35;' 35;'35; Broker'235488;' + +Broker imports import data from external storage through deployed Broker processes. For more help, see `HELP BROKER LOAD;'` + +Example: Import files on HDFS into table1 table with "table1_20170708" as Label + +``` +LOAD LABEL table1_20170708 +( +DATA INFILE("hdfs://your.namenode.host:port/dir/table1_data") +INTO TABLE table1 +) +WITH BROKER hdfs +( +"Username" = "HDFS\\ user" +"password"="hdfs_password" +) +PROPERTIES +( +Timeout ="3600", +"max_filter_ratio"="0.1" +); +``` + +Broker imports are asynchronous commands. Successful execution of the above commands only indicates successful submission of tasks. Successful imports need to be checked through `SHOW LOAD;' Such as: + +`SHOW LOAD WHERE LABLE = "table1_20170708";` + +In the return result, FINISHED in the `State'field indicates that the import was successful. + +关于 `SHOW LOAD` 的更多说明,可以参阅 `HELP SHOW LOAD;` + +Asynchronous import tasks can be cancelled before the end: + +'CANCEL LOAD WHERE LABEL ="Table 1'u 201708";` + +## 3 Data query + +### 3.1 Simple Query + +Examples: + +``` +MySQL> SELECT * FROM table1 LIMIT 3; ++--------+----------+----------+------+ +| siteid | citycode | username | pv | ++--------+----------+----------+------+ +1244; 2 *1244; 1 *1244;'grace '1242 *1244; +| 5 | 3 | 'helen' | 3 | +124; 3 $124; 2 `124tom '"124; 2 `1244; ++--------+----------+----------+------+ +5 rows in set (0.01 sec) + +MySQL> SELECT * FROM table1 ORDER BY citycode; ++--------+----------+----------+------+ +| siteid | citycode | username | pv | ++--------+----------+----------+------+ +1244; 2 *1244; 1 *1244;'grace '1242 *1244; +| 1 | 1 | 'jim' | 2 | +124; 3 $124; 2 `124tom '"124; 2 `1244; +1244; 4 *1243 *124bush "; 3 *1244; +| 5 | 3 | 'helen' | 3 | ++--------+----------+----------+------+ +5 rows in set (0.01 sec) +``` + +### 3.3 Join 查询 + +Examples: + +``` +MySQL> SELECT SUM(table1.pv) FROM table1 JOIN table2 WHERE table1.siteid = table2.siteid; ++--------------------+ ++ 124; Sum (`Table1'`PV `124); ++--------------------+ +| 12 | ++--------------------+ +1 row in set (0.20 sec) +``` + +### 3.4 Subquery + +Examples: + +``` +MySQL> SELECT SUM(pv) FROM table2 WHERE siteid IN (SELECT siteid FROM table1 WHERE siteid > 2); ++-----------+ ++ 124; Sum (`PV') 124; ++-----------+ +| 8 | ++-----------+ +1 row in set (0.13 sec) +``` diff --git a/docs/documentation/en/getting-started/best-practice_EN.md b/docs/documentation/en/getting-started/best-practice_EN.md new file mode 100644 index 00000000000000..d9e6c5f03a41e2 --- /dev/null +++ b/docs/documentation/en/getting-started/best-practice_EN.md @@ -0,0 +1,163 @@ +# Best Practices + +## 1 tabulation + +### 1.1 Data Model Selection + +Doris data model is currently divided into three categories: AGGREGATE KEY, UNIQUE KEY, DUPLICATE KEY. Data in all three models are sorted by KEY. + +1. AGGREGATE KEY + +When AGGREGATE KEY is the same, old and new records are aggregated. The aggregation functions currently supported are SUM, MIN, MAX, REPLACE. + +AGGREGATE KEY model can aggregate data in advance and is suitable for reporting and multi-dimensional analysis business. + +``` +CREATE TABLE site_visit +( +siteid INT, +City: SMALLINT, +username VARCHAR (32), +pv BIGINT SUM DEFAULT '0' +) +AGGREGATE KEY(siteid, city, username) +DISTRIBUTED BY HASH(siteid) BUCKETS 10; +``` + +2. KEY UNIQUE + +When UNIQUE KEY is the same, the new record covers the old record. At present, UNIQUE KEY implements the same RPLACE aggregation method as GGREGATE KEY, and they are essentially the same. Suitable for analytical business with updated requirements. + +``` +CREATE TABLE sales_order +( +orderid BIGINT, +status TINYINT, +username VARCHAR (32), +amount BIGINT DEFAULT '0' +) +KEY (orderid) UNIT +DISTRIBUTED BY HASH(orderid) BUCKETS 10; +``` + +3. DUPLICATE KEY + +Only sort columns are specified, and the same rows are not merged. It is suitable for the analysis business where data need not be aggregated in advance. + +``` +CREATE TABLE session_data +( +visitorid SMALLINT, +sessionid BIGINT, +visit time DATETIME, +City CHAR (20), +province CHAR(20), +ip. varchar (32), +brower CHAR(20), +url: VARCHAR (1024) +) +DUPLICATE KEY (visitor time, session time) +DISTRIBUTED BY HASH(sessionid, visitorid) BUCKETS 10; +``` + +### 1.2 大宽表与 Star Schema + +In order to adapt to the front-end business, business side often does not distinguish dimension information from indicator information, but defines Schema as a wide table. For Doris, the performance of such wide gauges is often unsatisfactory: + +* There are many fields in Schema, and there may be more key columns in the aggregation model. The number of columns that need to be sorted in the import process will increase. +* Dimensional information updates are reflected in the whole table, and the frequency of updates directly affects the efficiency of queries. + +In the process of using Star Schema, users are advised to use Star Schema to distinguish dimension tables from indicator tables as much as possible. Frequently updated dimension tables can also be placed in MySQL external tables. If there are only a few updates, they can be placed directly in Doris. When storing dimension tables in Doris, more copies of dimension tables can be set up to improve Join's performance. + +### 1.4 Partitions and Barrels + +Doris supports two-level partitioned storage. The first layer is RANGE partition and the second layer is HASH bucket. + +1. RANGE分区(partition) + +The RANGE partition is used to divide data into different intervals, which can be logically understood as dividing the original table into multiple sub-tables. In business, most users will choose to partition on time, which has the following advantages: + +* Differentiable heat and cold data +* Availability of Doris Hierarchical Storage (SSD + SATA) +* Delete data by partition more quickly + +2. HASH分桶(bucket) + +The data is divided into different buckets according to the hash value. + +* It is suggested that columns with large differentiation should be used as buckets to avoid data skew. +* In order to facilitate data recovery, it is suggested that the size of a single bucket should not be too large and should be kept within 10GB. Therefore, the number of buckets should be considered reasonably when building tables or increasing partitions, among which different partitions can specify different buckets. + +### 1.5 稀疏索引和 Bloom Filter + +Doris stores the data in an orderly manner, and builds a sparse index for Doris on the basis of ordered data. The index granularity is block (1024 rows). + +Sparse index chooses fixed length prefix in schema as index content, and Doris currently chooses 36 bytes prefix as index. + +* When building tables, it is suggested that the common filter fields in queries should be placed in front of Schema. The more distinguishable the query fields are, the more frequent the query fields are. +* One particular feature of this is the varchar type field. The varchar type field can only be used as the last field of the sparse index. The index is truncated at varchar, so if varchar appears in front, the length of the index may be less than 36 bytes. Specifically, you can refer to [data model, ROLLUP and prefix index] (. / data-model-rollup. md). +* In addition to sparse index, Doris also provides bloomfilter index. Bloomfilter index has obvious filtering effect on columns with high discrimination. If you consider that varchar cannot be placed in a sparse index, you can create a bloomfilter index. + +### 1.6 Physical and Chemical View (rollup) + +Rollup can essentially be understood as a physical index of the original table. When creating Rollup, only some columns in Base Table can be selected as Schema. The order of fields in Schema can also be different from that in Base Table. + +Rollup can be considered in the following cases: + +1. Base Table 中数据聚合度不高。 + +This is usually due to the fact that Base Table has more differentiated fields. At this point, you can consider selecting some columns and establishing Rollup. + +For the `site_visit'table: + +``` +site -u visit (siteid, city, username, pv) +``` + +Siteid may lead to a low degree of data aggregation. If business parties often base their PV needs on city statistics, they can build a city-only, PV-based ollup: + +``` +ALTER TABLE site_visit ADD ROLLUP rollup_city(city, pv); +``` + +2. The prefix index in Base Table cannot be hit + +Generally, the way Base Table is constructed cannot cover all query modes. At this point, you can consider adjusting the column order and establishing Rollup. + +Database Session + +``` +session -u data (visitorid, sessionid, visittime, city, province, ip, browser, url) +``` + +In addition to visitorid analysis, there are Brower and province analysis cases, Rollup can be established separately. + +``` +ALTER TABLE session_data ADD ROLLUP rollup_brower(brower,province,ip,url) DUPLICATE KEY(brower,province); +``` + +## 2 Schema Change + +Doris中目前进行 Schema Change 的方式有三种:Sorted Schema Change,Direct Schema Change, Linked Schema Change。 + +1. Sorted Schema Change + +The sorting of columns has been changed and the data needs to be reordered. For example, delete a column in a sorted column and reorder the fields. + +``` +ALTER TABLE site_visit DROP COLUMN city; +``` + +2. Direct Schema Change: There is no need to reorder, but there is a need to convert the data. For example, modify the type of column, add a column to the sparse index, etc. + +``` +ALTER TABLE site_visit MODIFY COLUMN username varchar(64); +``` + +3. Linked Schema Change: 无需转换数据,直接完成。例如加列操作。 + +``` +ALTER TABLE site_visit ADD COLUMN click bigint SUM default '0'; +``` + +Schema is recommended to be considered when creating tables so that Schema can be changed more quickly. diff --git a/docs/documentation/en/getting-started/data-model-rollup_EN.md b/docs/documentation/en/getting-started/data-model-rollup_EN.md new file mode 100644 index 00000000000000..f491908a04daa3 --- /dev/null +++ b/docs/documentation/en/getting-started/data-model-rollup_EN.md @@ -0,0 +1,612 @@ +# Data Model, ROLLUP and Prefix Index + +This document describes Doris's data model, ROLLUP and prefix index concepts at the logical level to help users better use Doris to cope with different business scenarios. + +## Basic concepts + +In Doris, data is logically described in the form of tables. +A table consists of rows and columns. Row is a row of user data. Column is used to describe different fields in a row of data. + +Columns can be divided into two categories: Key and Value. From a business perspective, Key and Value can correspond to dimension columns and indicator columns, respectively. + +Doris's data model is divided into three main categories: + +*Aggregate +* Uniq +* Duplicate + +Let's introduce them separately. + +## Aggregate 模型 + +We illustrate what aggregation model is and how to use it correctly with practical examples. + +### Example 1: Importing data aggregation + +Assume that the business has the following data table schema: + +Columns +|---|---|---|---| +| userid | LARGEINT | | user id| +| date | DATE | | date of data filling| +| City | VARCHAR (20) | | User City| +| age | SMALLINT | | User age| +| sex | TINYINT | | User gender| +| Last_visit_date | DATETIME | REPLACE | Last user access time| +| Cost | BIGINT | SUM | Total User Consumption| +| max dwell time | INT | MAX | Maximum user residence time| +| min dwell time | INT | MIN | User minimum residence time| + +If converted into a table-building statement, the following is done (omitting the Partition and Distribution information in the table-building statement) + +``` +CREATE TABLE IF NOT EXISTS example_db.expamle_tbl +( +`user_id` LARGEINT NOT NULL COMMENT "用户id", +"Date `date not null how `index `Fufu 8;'Back +` City `VARCHAR (20) COMMENT `User City', +"Age" SMALLINT COMMENT "29992;" 25143;"24180;" 40836 ", +`sex` TINYINT COMMENT "用户性别", +"last visit date" DATETIME REPLACE DEFAULT "1970 -01 -01 00:00" COMMENT "25143;" 27425;"35775;" 3838382", +`cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费", +Best Answer: Best Answer +How about "99999" as time goes by??????????????????????????????????????????????????????????????????????????????????????????? +) +AGGREGATE KEY(`user_id`, `date`, `timestamp`, `city`, `age`, `sex`) +... /* 省略 Partition 和 Distribution 信息 */ +; +``` + +As you can see, this is a typical fact table of user information and access behavior. +In general star model, user information and access behavior are stored in dimension table and fact table respectively. Here, in order to explain Doris's data model more conveniently, we store the two parts of information in a single table. + +The columns in the table are divided into Key (dimension column) and Value (indicator column) according to whether `AggregationType'is set or not. No `AggregationType', such as `user_id', `date', `age', etc., is set as ** Key **, while `AggregationType'is set as ** Value **. + +When we import data, the same rows and aggregates into one row for the Key column, while the Value column aggregates according to the set `AggregationType'. ` AggregationType `currently has the following four ways of aggregation: + +1. SUM: Sum, multi-line Value accumulation. +2. REPLACE: Instead, Values in the next batch of data will replace Values in rows previously imported. +3. MAX: Keep the maximum. +4. MIN: Keep the minimum. + +Suppose we have the following imported data (raw data): + +|user\_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| +|---|---|---|---|---|---|---|---|---| +| 10000 | 2017-10-01 | Beijing | 20 | 0 | 2017-10-01 06:00 | 20 | 10 | 10| +| 10000 | 2017-10-01 | Beijing | 20 | 0 | 2017-10-01 07:00 | 15 | 2 | 2| +| 10001 | 2017-10-01 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22| +| 10002 | 2017-10-02 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5| +| 10003 | 2017-10-02 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11| +| 10004 | 2017-10-01 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3| +| 10004 | 2017-10-03 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6| + +Let's assume that this is a table that records the user's behavior in accessing a commodity page. Let's take the first row of data as an example and explain it as follows: + +| Data | Description| +|---|---| +| 10000 | User id, each user uniquely identifies id| +| 2017-10-01 | Data storage time, accurate to date| +| Beijing | User City| +| 20 | User Age| +| 0 | Gender male (1 for female)| +| 2017-10-01 06:00 | User's time to visit this page, accurate to seconds| +| 20 | Consumption generated by the user's current visit| +| 10 | User's visit, time to stay on the page| +| 10 | User's current visit, time spent on the page (redundancy)| + +Then when this batch of data is imported into Doris correctly, the final storage in Doris is is as follows: + +|user\_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| +|---|---|---|---|---|---|---|---|---| +| 10000 | 2017-10-01 | Beijing | 20 | 0 | 2017-10-01 07:00 | 35 | 10 | 2| +| 10001 | 2017-10-01 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22| +| 10002 | 2017-10-02 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5| +| 10003 | 2017-10-02 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11| +| 10004 | 2017-10-01 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3| +| 10004 | 2017-10-03 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6| + +As you can see, there is only one line of aggregated data left for 10,000 users. The data of other users are consistent with the original data. Here we first explain the aggregated data of user 10000: + +The first five columns remain unchanged, starting with column 6 `last_visit_date': + +*` 2017-10-01 07:00 `: Because the `last_visit_date'column is aggregated by REPLACE, the `2017-10-01 07:00 ` column has been replaced by `2017-10-01 06:00'. +> Note: For data in the same import batch, the order of replacement is not guaranteed for the aggregation of REPLACE. For example, in this case, it may be `2017-10-01 06:00'. For data from different imported batches, it can be guaranteed that the data from the latter batch will replace the former batch. + +*` 35 `: Because the aggregation type of the `cost'column is SUM, 35 is accumulated from 20 + 15. +*` 10 `: Because the aggregation type of the `max_dwell_time'column is MAX, 10 and 2 take the maximum and get 10. +*` 2 `: Because the aggregation type of `min_dwell_time'column is MIN, 10 and 2 take the minimum value and get 2. + +After aggregation, Doris ultimately only stores aggregated data. In other words, detailed data will be lost and users can no longer query the detailed data before aggregation. + +### Example 2: Keep detailed data + +Following example 1, we modify the table structure as follows: + +Columns +|---|---|---|---| +| userid | LARGEINT | | user id| +| date | DATE | | date of data filling| +| Time stamp | DATETIME | | Data filling time, accurate to seconds| +| City | VARCHAR (20) | | User City| +| age | SMALLINT | | User age| +| sex | TINYINT | | User gender| +| Last visit date | DATETIME | REPLACE | Last user access time| +| Cost | BIGINT | SUM | Total User Consumption| +| max dwell time | INT | MAX | Maximum user residence time| +| min dwell time | INT | MIN | User minimum residence time| + +That is to say, a column of `timestamp'has been added to record the data filling time accurate to seconds. + +The imported data are as follows: + +|user_id|date|timestamp|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| +|---|---|---|---|---|---|---|---|---|---| +| 10000 | 2017-10-01 | 2017-10-01 08:00:05 | Beijing | 20 | 0 | 2017-10-01 06:00 | 20 | 10 | 10| +| 10000 | 2017-10-01 | 2017-10-01 09:00:05 | Beijing | 20 | 0 | 2017-10-01 07:00 | 15 | 2 | 2| +| 10001 | 2017-10-01 | 2017-10-01 18:12:10 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22| +| 10002 | 2017-10-02 | 2017-10-02 13:10:00 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5| +| 10003 | 2017-10-02 | 2017-10-02 13:15:00 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11| +| 10004 | 2017-10-01 | 2017-10-01 12:12:48 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3| +| 10004 | 2017-10-03 | 2017-10-03 12:38:20 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6| + +Then when this batch of data is imported into Doris correctly, the final storage in Doris is is as follows: + +|user_id|date|timestamp|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| +|---|---|---|---|---|---|---|---|---|---| +| 10000 | 2017-10-01 | 2017-10-01 08:00:05 | Beijing | 20 | 0 | 2017-10-01 06:00 | 20 | 10 | 10| +| 10000 | 2017-10-01 | 2017-10-01 09:00:05 | Beijing | 20 | 0 | 2017-10-01 07:00 | 15 | 2 | 2| +| 10001 | 2017-10-01 | 2017-10-01 18:12:10 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22| +| 10002 | 2017-10-02 | 2017-10-02 13:10:00 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5| +| 10003 | 2017-10-02 | 2017-10-02 13:15:00 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11| +| 10004 | 2017-10-01 | 2017-10-01 12:12:48 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3| +| 10004 | 2017-10-03 | 2017-10-03 12:38:20 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6| + +We can see that the stored data, just like the imported data, does not aggregate at all. This is because, in this batch of data, because the `timestamp'column is added, the Keys of all rows are ** not exactly the same **. That is, as long as the keys of each row are not identical in the imported data, Doris can save the complete detailed data even in the aggregation model. + +### Example 3: Importing data and aggregating existing data + +Take Example 1. Suppose that the data in the table are as follows: + +|user_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| +|---|---|---|---|---|---|---|---|---| +| 10000 | 2017-10-01 | Beijing | 20 | 0 | 2017-10-01 07:00 | 35 | 10 | 2| +| 10001 | 2017-10-01 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22| +| 10002 | 2017-10-02 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5| +| 10003 | 2017-10-02 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11| +| 10004 | 2017-10-01 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3| +| 10004 | 2017-10-03 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6| + +We imported a new batch of data: + +|user_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| +|---|---|---|---|---|---|---|---|---| +| 10004 | 2017-10-03 | Shenzhen | 35 | 0 | 2017-10-03 11:22:00 | 44 | 19 | 19| +| 10005 | 2017-10-03 | Changsha | 29 | 1 | 2017-10-03 18:11:02 | 3 | 1 | 1| + +Then when this batch of data is imported into Doris correctly, the final storage in Doris is is as follows: + +|user_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| +|---|---|---|---|---|---|---|---|---| +| 10000 | 2017-10-01 | Beijing | 20 | 0 | 2017-10-01 07:00 | 35 | 10 | 2| +| 10001 | 2017-10-01 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22| +| 10002 | 2017-10-02 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5| +| 10003 | 2017-10-02 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11| +| 10004 | 2017-10-01 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3| +| 10004 | 2017-10-03 | Shenzhen | 35 | 0 | 2017-10-03 11:22:00 | 55 | 19 | 6| +| 10005 | 2017-10-03 | Changsha | 29 | 1 | 2017-10-03 18:11:02 | 3 | 1 | 1| + +As you can see, the existing data and the newly imported data of user 10004 have been aggregated. At the same time, 10005 new users'data were added. + +Data aggregation occurs in Doris in the following three stages: + +1. The ETL stage of data import for each batch. This phase aggregates data within each batch of imported data. +2. The stage in which the underlying BE performs data Compaction. At this stage, BE aggregates data from different batches that have been imported. +3. Data query stage. In data query, the data involved in the query will be aggregated accordingly. + +Data may be aggregated to varying degrees at different times. For example, when a batch of data is just imported, it may not be aggregated with the existing data. But for users, user** can only query aggregated data. That is, different degrees of aggregation are transparent to user queries. Users should always assume that data exists in terms of the degree of aggregation that ** ultimately completes, and ** should not assume that some aggregation has not yet occurred **. (See the section ** Limitations of the aggregation model ** for more details.) + +## Uniq Model + +In some multi-dimensional analysis scenarios, users are more concerned with how to ensure the uniqueness of Key, that is, how to obtain the Primary Key uniqueness constraint. Therefore, we introduce Uniq's data model. This model is essentially a special case of aggregation model and a simplified representation of table structure. Let's give an example. + +Columns +|---|---|---|---| +| user_id | BIGINT | Yes | user id| +| username | VARCHAR (50) | Yes | User nickname| +| City | VARCHAR (20) | No | User City| +| age | SMALLINT | No | User Age| +| sex | TINYINT | No | User Gender| +| Phone | LARGEINT | No | User Phone| +| address | VARCHAR (500) | No | User Address| +| register_time | DATETIME | No | user registration time| + +This is a typical user base information table. There is no aggregation requirement for this type of data, just the uniqueness of the primary key is guaranteed. (The primary key here is user_id + username). Then our statement is as follows: + +``` +CREATE TABLE IF NOT EXISTS example_db.expamle_tbl +( +`user_id` LARGEINT NOT NULL COMMENT "用户id", +"username" VARCHAR (50) NOT NULL COMMENT "25143;" 261651;" +` City `VARCHAR (20) COMMENT `User City', +"Age" SMALLINT COMMENT "29992;" 25143;"24180;" 40836 ", +`sex` TINYINT COMMENT "用户性别", +`phone` LARGEINT COMMENT "用户电话", +'address ` VARCHAR (500) COMMENT'25143;', +'register 'or'time' DATETIME COMMENT "29992;" 25143;"27880;" 20876;"26102;" 38388;" +) +Unique Key ("User", "User", "Name") +... /* 省略 Partition 和 Distribution 信息 */ +; +``` + +This table structure is exactly the same as the following table structure described by the aggregation model: + +Columns +|---|---|---|---| +| user_id | BIGINT | | user id| +| username | VARCHAR (50) | | User nickname| +| City | VARCHAR (20) | REPLACE | User City| +| age | SMALLINT | REPLACE | User Age| +| sex | TINYINT | REPLACE | User Gender| +| Phone | LARGEINT | REPLACE | User Phone| +| address | VARCHAR (500) | REPLACE | User Address| +| register_time | DATETIME | REPLACE | User registration time| + +And table-building statements: + +``` +CREATE TABLE IF NOT EXISTS example_db.expamle_tbl +( +`user_id` LARGEINT NOT NULL COMMENT "用户id", +"username" VARCHAR (50) NOT NULL COMMENT "25143;" 261651;" +` City `VARCHAR (20) REPLACE COMMENT `User City', +What do you say when you are young? +`sex` TINYINT REPLACE COMMENT "用户性别", +"phone" LARGEINT REPLACE COMMENT "25143;" +`address` VARCHAR(500) REPLACE COMMENT "用户地址", +'register 'or'time' DATETIME REPLACE COMMENT "29992;" 25143;"27880;" 20876;"26102;" +) +AGGREGATE KEY(`user_id`, `user_name`) +... /* 省略 Partition 和 Distribution 信息 */ +; +``` + +That is to say, Uniq model can be completely replaced by REPLACE in aggregation model. Its internal implementation and data storage are exactly the same. No further examples will be given here. + +## Duplicate 模型 + +In some multidimensional analysis scenarios, data has neither primary keys nor aggregation requirements. Therefore, we introduce Duplicate data model to meet this kind of demand. Examples are given. + ++ 124; Columname = 124; type = 124; sortkey = 124; comment = 124; +|---|---|---|---| +| Timstamp | DATETIME | Yes | Logging Time| +| Type | INT | Yes | Log Type| +|error_code|INT|Yes|错误码| +| Error_msg | VARCHAR (1024) | No | Error Details| +1.2.2.2.;2.2.2.1.;2.2.2.2.2.2.2.2.2.2. +| op_time | DATETIME | No | Processing time| + +The TABLE statement is as follows: + +``` +CREATE TABLE IF NOT EXISTS example_db.expamle_tbl +( +`timestamp` DATETIME NOT NULL COMMENT "日志时间", +`type` INT NOT NULL COMMENT "日志类型", +"Error"\\\\\\\\\\\\\ +`error_msg` VARCHAR(1024) COMMENT "错误详细信息", +`op_id` BIGINT COMMENT "负责人id", +OP `op `time ` DATETIME COMMENT "22788;" 29702;"26102;" 388;" +) +DUPLICATE KEY(`timestamp`, `type`) +... /* 省略 Partition 和 Distribution 信息 */ +; +``` + +This data model is different from Aggregate and Uniq models. Data is stored entirely in accordance with the data in the imported file, without any aggregation. Even if the two rows of data are identical, they will be retained. +The DUPLICATE KEY specified in the table building statement is only used to specify which columns the underlying data is sorted according to. (The more appropriate name should be "Sorted Column", where the name "DUPLICATE KEY" is used to specify the data model used. For more explanations of "Sorted Column", see the section ** Prefix Index **. On the choice of DUPLICATE KEY, we recommend that the first 2-4 columns be selected appropriately. + +This data model is suitable for storing raw data without aggregation requirements and primary key uniqueness constraints. For more usage scenarios, see the ** Limitations of the Aggregation Model ** section. + +## ROLLUP + +ROLLUP in multidimensional analysis means "scroll up", which means that data is aggregated further at a specified granularity. + +### Basic concepts + +In Doris, we make the table created by the user through the table building statement a Base table. Base table holds the basic data stored in the way specified by the user's table-building statement. + +On top of the Base table, we can create any number of ROLLUP tables. These ROLLUP data are generated based on the Base table and physically ** stored independently **. + +The basic function of ROLLUP tables is to obtain coarser aggregated data on the basis of Base tables. + +Let's illustrate the ROLLUP tables and their roles in different data models with examples. + +#### Aggregate 和 Uniq 模型中的 ROLLUP + +Because Uniq is only a special case of the Aggregate model, we do not distinguish it here. + +Example 1: Get the total consumption per user + +Following ** Example 2 ** in the ** Aggregate Model ** section, the Base table structure is as follows: + +Columns +|---|---|---|---| +| user_id | LARGEINT | | user id| +| date | DATE | | date of data filling| +| Time stamp | DATETIME | | Data filling time, accurate to seconds| +| City | VARCHAR (20) | | User City| +| age | SMALLINT | | User age| +| sex | TINYINT | | User gender| +| Last_visit_date | DATETIME | REPLACE | Last user access time| +| Cost | BIGINT | SUM | Total User Consumption| +| max dwell time | INT | MAX | Maximum user residence time| +| min dwell time | INT | MIN | User minimum residence time| + +The data stored are as follows: + +|user_id|date|timestamp|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| +|---|---|---|---|---|---|---|---|---|---| +| 10000 | 2017-10-01 | 2017-10-01 08:00:05 | Beijing | 20 | 0 | 2017-10-01 06:00 | 20 | 10 | 10| +| 10000 | 2017-10-01 | 2017-10-01 09:00:05 | Beijing | 20 | 0 | 2017-10-01 07:00 | 15 | 2 | 2| +| 10001 | 2017-10-01 | 2017-10-01 18:12:10 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22| +| 10002 | 2017-10-02 | 2017-10-02 13:10:00 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5| +| 10003 | 2017-10-02 | 2017-10-02 13:15:00 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11| +| 10004 | 2017-10-01 | 2017-10-01 12:12:48 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3| +| 10004 | 2017-10-03 | 2017-10-03 12:38:20 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6| + +On this basis, we create a ROLLUP: + +1240; Colonname 12412; +|---| +|user_id| +|cost| + +The ROLLUP contains only two columns: user_id and cost. After the creation, the data stored in the ROLLUP is as follows: + +|user\_id|cost| +|---|---| +|10000|35| +|10001|2| +|10002|200| +|10003|30| +|10004|111| + +As you can see, ROLLUP retains only the results of SUM on the cost column for each user_id. So when we do the following query: + +`SELECT user_id, sum(cost) FROM table GROUP BY user_id;` + +Doris automatically hits the ROLLUP table, thus completing the aggregated query by scanning only a very small amount of data. + +2. Example 2: Get the total consumption, the longest and shortest page residence time of users of different ages in different cities + +Follow example 1. Based on the Base table, we create a ROLLUP: + +Columns +|---|---|---|---| +| City | VARCHAR (20) | | User City| +| age | SMALLINT | | User age| +| Cost | BIGINT | SUM | Total User Consumption| +| max dwell time | INT | MAX | Maximum user residence time| +| min dwell time | INT | MIN | User minimum residence time| + +After the creation, the data stored in the ROLLUP is as follows: + +|city|age|cost|max\_dwell\_time|min\_dwell\_time| +|---|---|---|---|---| +| Beijing | 20 | 0 | 30 | 10 | 2| +| Beijing | 30 | 1 | 2 | 22 | 22| +| Shanghai | 20 | 1 | 200 | 5 | 5| +| Guangzhou | 32 | 0 | 30 | 11 | 11| +| Shenzhen | 35 | 0 | 111 | 6 | 3| + +When we do the following queries: + +* Select City, Age, Sum (Cost), Max (Max dwell time), min (min dwell time) from table group by City, age;* +* `SELECT city, sum(cost), max(max_dwell_time), min(min_dwell_time) FROM table GROUP BY city;` +* `SELECT city, age, sum(cost), min(min_dwell_time) FROM table GROUP BY city, age;` + +Doris automatically hits the ROLLUP table. + +#### OLLUP in Duplicate Model + +Because the Duplicate model has no aggregate semantics. So the ROLLLUP in this model has lost the meaning of "scroll up". It's just to adjust the column order to hit the prefix index. In the next section, we will introduce prefix index in detail, and how to use ROLLUP to change prefix index in order to achieve better query efficiency. + +### Prefix Index and ROLLUP + +#### prefix index + +Unlike traditional database design, Doris does not support indexing on any column. OLAP databases based on MPP architecture such as Doris usually handle large amounts of data by improving concurrency. +In essence, Doris's data is stored in a data structure similar to SSTable (Sorted String Table). This structure is an ordered data structure, which can be sorted and stored according to the specified column. In this data structure, it is very efficient to search by sorting columns. + +In Aggregate, Uniq and Duplicate three data models. The underlying data storage is sorted and stored according to the columns specified in AGGREGATE KEY, UNIQ KEY and DUPLICATE KEY in their respective table-building statements. + +The prefix index, which is based on sorting, implements an index method to query data quickly according to a given prefix column. + +We use the prefix index of ** 36 bytes ** of a row of data as the prefix index of this row of data. When a VARCHAR type is encountered, the prefix index is truncated directly. We give examples to illustrate: + +1. The prefix index of the following table structure is user_id (8Byte) + age (8Bytes) + message (prefix 20 Bytes). + ++ 124; Columname = 124; type = 124; +|---|---| +|user_id|BIGINT| +|age|INT| +Message +124max \\u dwell u team 124DATE +124m;min \\u dwell u team 124DATE + +2. The prefix index of the following table structure is user_name (20 Bytes). Even if it does not reach 36 bytes, because it encounters VARCHAR, it truncates directly and no longer continues. + ++ 124; Columname = 124; type = 124; +|---|---| +User name +|age|INT| +Message +124max \\u dwell u team 124DATE +124m;min \\u dwell u team 124DATE + +When our query condition is the prefix of ** prefix index **, it can greatly speed up the query speed. For example, in the first example, we execute the following queries: + +`SELECT * FROM table WHERE user_id=1829239 and age=20;` + +The efficiency of this query is much higher than that of ** the following queries: + +`SELECT * FROM table WHERE age=20;` + +Therefore, when constructing tables, ** correctly choosing column order can greatly improve query efficiency **. + +#### ROLLUP adjusts prefix index + +Because column order is specified when a table is built, there is only one prefix index for a table. This may be inefficient for queries that use other columns that cannot hit prefix indexes as conditions. Therefore, we can manually adjust the order of columns by creating ROLLUP. Examples are given. + +The structure of the Base table is as follows: + ++ 124; Columname = 124; type = 124; +|---|---| +|user\_id|BIGINT| +|age|INT| +Message +124max \\u dwell u team 124DATE +124m;min \\u dwell u team 124DATE + +On this basis, we can create a ROLLUP table: + ++ 124; Columname = 124; type = 124; +|---|---| +|age|INT| +|user\_id|BIGINT| +Message +124max \\u dwell u team 124DATE +124m;min \\u dwell u team 124DATE + +As you can see, the columns of ROLLUP and Base tables are exactly the same, just changing the order of user_id and age. So when we do the following query: + +`SELECT * FROM table where age=20 and massage LIKE "%error%";` + +The ROLLUP table is preferred because the prefix index of ROLLUP matches better. + +### Some Explanations of ROLLUP + +* The fundamental role of ROLLUP is to improve the query efficiency of some queries (whether by aggregating to reduce the amount of data or by modifying column order to match prefix indexes). Therefore, the meaning of ROLLUP has gone beyond the scope of "roll-up". That's why we named it Materized Index in the source code. +* ROLLUP is attached to the Base table and can be seen as an auxiliary data structure of the Base table. Users can create or delete ROLLUP based on the Base table, but cannot explicitly specify a query for a ROLLUP in the query. Whether ROLLUP is hit or not is entirely determined by the Doris system. +* ROLLUP data is stored in separate physical storage. Therefore, the more OLLUP you create, the more disk space you occupy. It also has an impact on the speed of import (the ETL phase of import automatically generates all ROLLUP data), but it does not reduce query efficiency (only better). +* Data updates for ROLLUP are fully synchronized with Base representations. Users need not care about this problem. +* Columns in ROLLUP are aggregated in exactly the same way as Base tables. There is no need to specify or modify ROLLUP when creating it. +* A necessary (inadequate) condition for a query to hit ROLLUP is that all columns ** (including the query condition columns in select list and where) involved in the query exist in the column of the ROLLUP. Otherwise, the query can only hit the Base table. +* Certain types of queries (such as count (*)) cannot hit ROLLUP under any conditions. See the next section ** Limitations of the aggregation model **. +* The query execution plan can be obtained by `EXPLAIN your_sql;` command, and in the execution plan, whether ROLLUP has been hit or not can be checked. +* Base tables and all created ROLLUPs can be displayed by `DESC tbl_name ALL'; `statement. + +In this document, you can see [Query how to hit Rollup] (hit-the-rollup) + +## Limitations of aggregation model + +Here we introduce the limitations of Aggregate model (including Uniq model). + +In the aggregation model, what the model presents is the aggregated data. That is to say, any data that has not yet been aggregated (for example, two different imported batches) must be presented in some way to ensure consistency. Let's give an example. + +The hypothesis table is structured as follows: + +Columns +|---|---|---|---| +| userid | LARGEINT | | user id| +| date | DATE | | date of data filling| +| Cost | BIGINT | SUM | Total User Consumption| + +Assume that there are two batches of data that have been imported into the storage engine as follows: + +**batch 1** + +|user\_id|date|cost| +|---|---|---| +|10001|2017-11-20|50| +|10002|2017-11-21|39| + +**batch 2** + +|user\_id|date|cost| +|---|---|---| +|10001|2017-11-20|1| +|10001|2017-11-21|5| +|10003|2017-11-22|22| + +As you can see, data belonging to user 10001 in two import batches has not yet been aggregated. However, in order to ensure that users can only query the aggregated data as follows: + +|user\_id|date|cost| +|---|---|---| +|10001|2017-11-20|51| +|10001|2017-11-21|5| +|10002|2017-11-21|39| +|10003|2017-11-22|22| + +We add aggregation operator to query engine to ensure data consistency. + +In addition, on the aggregate column (Value), when executing aggregate class queries that are inconsistent with aggregate types, attention should be paid to semantics. For example, in the example above, we execute the following queries: + +`SELECT MIN(cost) FROM table;` + +The result is 5, not 1. + +At the same time, this consistency guarantee will greatly reduce the query efficiency in some queries. + +Let's take the most basic count (*) query as an example: + +`SELECT COUNT(*) FROM table;` + +In other databases, such queries return results quickly. Because in the implementation, we can get the query result by counting rows at the time of import and saving count statistics information, or by scanning only a column of data to get count value at the time of query, with very little overhead. But in Doris's aggregation model, the overhead of this query ** is very large **. + +Let's take the data as an example. + +**batch 1** + +|user\_id|date|cost| +|---|---|---| +|10001|2017-11-20|50| +|10002|2017-11-21|39| + +**batch 2** + +|user\_id|date|cost| +|---|---|---| +|10001|2017-11-20|1| +|10001|2017-11-21|5| +|10003|2017-11-22|22| + +Because the final aggregation result is: + +|user\_id|date|cost| +|---|---|---| +|10001|2017-11-20|51| +|10001|2017-11-21|5| +|10002|2017-11-21|39| +|10003|2017-11-22|22| + +So `select count (*) from table; `The correct result should be ** 4 **. But if we only scan the `user_id'column and add query aggregation, the final result is ** 3 ** (10001, 10002, 10003). If aggregated without queries, the result is ** 5 ** (a total of five rows in two batches). It can be seen that both results are wrong. + +In order to get the correct result, we must read the data of `user_id'and `date', and ** together with aggregate ** when querying, to return the correct result of ** 4 **. That is to say, in the count (*) query, Doris must scan all AGGREGATE KEY columns (here are `user_id` and `date') and aggregate them to get the semantically correct results. When aggregated columns are large, count (*) queries need to scan a large amount of data. + +Therefore, when there are frequent count (*) queries in the business, we recommend that users simulate count (*)**) by adding a column with a ** value of 1 and aggregation type of SUM. As the table structure in the previous example, we modify it as follows: + +Columns +|---|---|---|---| +| user ID | BIGINT | | user id| +| date | DATE | | date of data filling| +| Cost | BIGINT | SUM | Total User Consumption| +| count | BIGINT | SUM | for counting| + +Add a count column and import the data with the column value ** equal to 1 **. The result of `select count (*) from table; `is equivalent to `select sum (count) from table; ` The query efficiency of the latter is much higher than that of the former. However, this method also has limitations, that is, users need to guarantee that they will not import rows with the same AGGREGATE KEY column repeatedly. Otherwise, `select sum (count) from table; `can only express the number of rows originally imported, not the semantics of `select count (*) from table; ` + +Another way is to ** change the aggregation type of the `count'column above to REPLACE, and still weigh 1 **. Then `select sum (count) from table; `and `select count (*) from table; `the results will be consistent. And in this way, there is no restriction on importing duplicate rows. + +### Duplicate 模型 + +Duplicate model has no limitation of aggregation model. Because the model does not involve aggregate semantics, when doing count (*) query, we can get the correct semantics by choosing a column of queries arbitrarily. + +## Suggestions for Choosing Data Model + +Because the data model was established when the table was built, and ** could not be modified **. Therefore, it is very important to select an appropriate data model **. + +1. Aggregate model can greatly reduce the amount of data scanned and the amount of query computation by pre-aggregation. It is very suitable for report query scenarios with fixed patterns. But this model is not very friendly for count (*) queries. At the same time, because the aggregation method on the Value column is fixed, semantic correctness should be considered in other types of aggregation queries. +2. Uniq model guarantees the uniqueness of primary key for scenarios requiring unique primary key constraints. However, the query advantage brought by pre-aggregation such as ROLLUP can not be exploited (because the essence is REPLACE, there is no such aggregation as SUM). +3. Duplicate is suitable for ad-hoc queries of any dimension. Although it is also impossible to take advantage of the pre-aggregation feature, it is not constrained by the aggregation model and can take advantage of the queue-store model (only reading related columns, but not all Key columns). diff --git a/docs/documentation/en/getting-started/data-partition_EN.md b/docs/documentation/en/getting-started/data-partition_EN.md new file mode 100644 index 00000000000000..8a67fcabd2b8c1 --- /dev/null +++ b/docs/documentation/en/getting-started/data-partition_EN.md @@ -0,0 +1,269 @@ +# Data partition + +This document mainly introduces Doris's table building and data partitioning, as well as possible problems and solutions in table building operation. + +## Basic concepts + +In Doris, data is logically described in the form of tables. + +### Row & Column + +A table consists of rows and columns. Row is a row of user data. Column is used to describe different fields in a row of data. + +Columns can be divided into two categories: Key and Value. From a business perspective, Key and Value can correspond to dimension columns and indicator columns, respectively. From the point of view of the aggregation model, the same row of the Key column is aggregated into a row. The aggregation of the Value column is specified by the user when the table is built. For an introduction to more aggregation models, see [Doris data model] (. / data-model-rollup. md). + +'35;\ 35;\ 35; Tablet & Partition + +In Doris's storage engine, user data is divided horizontally into several data fragments (Tablets, also known as data buckets). Each table contains several rows of data. Data between Tablets does not intersect and is physically stored independently. + +Multiple Tablets logically belong to different Partitions. A Tablet belongs to only one Partition. A Partition contains several Tablets. Because Tablet is physically stored independently, it can be considered that Partition is physically independent. Tablet is the smallest physical storage unit for data movement, replication and other operations. + +Several Partitions form a Table. Partition can be regarded as the smallest management unit logically. Data import and deletion can be done for a single partition. + +## Data partition + +We illustrate Doris's data partition with a table-building operation. + +Doris's table building is a synchronization command. The command returns success, which means that the table building is successful. + +可以通过 `HELP CREATE TABLE;` 查看更多帮助。 + +This section presents an example of how Doris is constructed. + +``` +CREATE TABLE IF NOT EXISTS example_db.expamle_tbl +( +`user_id` LARGEINT NOT NULL COMMENT "用户id", +"Date `date not null how `index `Fufu 8;'Back +`timestamp` DATETIME NOT NULL COMMENT "数据灌入的时间戳", +` City `VARCHAR (20) COMMENT `User City', +"Age" SMALLINT COMMENT "29992;" 25143;"24180;" 40836 ", +`sex` TINYINT COMMENT "用户性别", +"last visit date" DATETIME REPLACE DEFAULT "1970 -01 -01 00:00" COMMENT "25143;" 27425;"35775;" 3838382", +`cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费", +Best Answer: Best Answer +How about "99999" as time goes by??????????????????????????????????????????????????????????????????????????????????????????? +) +ENGINE=olap +AGGREGATE KEY(`user_id`, `date`, `timestamp`, `city`, `age`, `sex`) +PARTITION BY RANGE(`date`) +( +Segmentation `P201701 `value'equals `2017-02-01', +Segmentation `P201702 `Value equals'(2017-03-01'), +Segmentation `P201703'`Value less than' (2017-04-01') +) +DISTRIBUTED BY HASH(`user_id`) BUCKETS 16 +PROPERTIES +( +"Replication\ num" = "3", +"storage_medium" = "SSD", +"Storage = U Cooldown = U Time" = "2018-01-12:00: +); + +``` + +### Column Definition + +Here we only take the AGGREGATE KEY data model as an example to illustrate. Refer to [Doris data model] (. / data-model-rollup. md) for more data models. + +The basic types of columns can be viewed by executing `HELP CREATE TABLE'in mysql-client. + +In the AGGREGATE KEY data model, all columns without specified aggregation methods (SUM, REPLACE, MAX, MIN) are treated as Key columns. The rest are Value columns. + +When defining columns, you can refer to the following suggestions: + +1. The Key column must precede all Value columns. +2. Choose integer type as far as possible. Because integer types are much more efficient than strings in computation and lookup. +3. For the selection principle of integer types of different lengths, follow the ** sufficient can **. +4. For the length of VARCHAR and STING types, follow ** suffice **. +5. The total byte length of all columns (including Key and Value) cannot exceed 100KB. + +### Zoning and Bucketing + +Doris supports two-tier data partitioning. The first layer is Partition, which only supports Range partitioning. The second layer is Bucket (Tablet), which only supports Hash partitioning. + +You can also use only one layer of partition. Bucket partitioning is only supported when using one-tier partitioning. + +1. Partition + +* Partition columns can specify one or more columns. Partition classes must be KEY columns. The usage of multi-column partitions is introduced in the following ** summary of multi-column partitions. +* Partition's boundaries are left-closed and right-open. For example, if you want to store all February data in p201702, you need to enter the partition value "2017-03-01", that is, the range: [2017-02-01, 2017-03-01]. +* Regardless of the partition column type, double quotation marks are required when writing partition values. +* Partition columns are usually time columns to facilitate the management of old and new data. +* There is no theoretical upper limit on the number of zones. +* When Partition is not used to build tables, the system automatically generates a full-range Partition with the same name as the table name. The Partition is invisible to users and cannot be deleted. + +An example is given to illustrate the change of partition scope when adding or deleting partitions. +* As shown in the example above, when the table is completed, the following three partitions are automatically generated: + +``` +p201701: [MIN VALUE, 2017 -02 -01] +p201702: [2017-02-01, 2017-03-01) +p201703: [2017-03-01, 2017-04-01) +``` + +* When we add a partition p201705 VALUES LESS THAN ("2017-06-01"), the partition results are as follows: + +``` +p201701: [MIN VALUE, 2017 -02 -01] +p201702: [2017-02-01, 2017-03-01) +p201703: [2017-03-01, 2017-04-01) +p201705: [2017-04-01, 2017-06-01) +``` + +* When we delete partition p201703, the partition results are as follows: + +``` +*p201701: [MIN VALUE, 2017 -02 -01] +* p201702: [2017-02-01, 2017-03-01) +* p201705: [2017-04-01, 2017-06-01) +``` + +> Notice that the partition ranges of p201702 and p201705 have not changed, and there is a gap between the two partitions: [2017-03-01, 2017-04-01]. That is, if the imported data range is within this empty range, it is imported as before. + +* Continue to delete partition p201702, partition results are as follows: + +``` +*p201701: [MIN VALUE, 2017 -02 -01] +* p201705: [2017-04-01, 2017-06-01) +* The void range becomes: [2017-02-01, 2017-04-01] +``` + +* Now add a partition p201702 new VALUES LESS THAN ("2017-03-01"). The partition results are as follows: + +``` +*p201701: [MIN VALUE, 2017 -02 -01] +*p201702new: [2017 -02 -01, 2017 -03 -01] +*p201705: [2017 -04 -01, 2017 -06 -01] +``` + +> It can be seen that the void range is reduced to: [2017-03-01, 2017-04-01] + +* Now delete partition p201701 and add partition p201612 VALUES LESS THAN ("2017-01-01"). The partition results are as follows: + +``` +*p201612: [MIN VALUE, 2017 -01 -01] +*p201702new: [2017 -02 -01, 2017 -03 -01] +*p201705: [2017 -04 -01, 2017 -06 -01] +``` + +> A new void appears: [2017-01-01, 2017-02-01] + +In summary, deletion of partitions does not change the scope of existing partitions. Deleting partitions may cause holes. When partitions are added, the lower bound of a partition is immediately followed by the upper bound of a partition. +Partitions with overlapping ranges cannot be added. + +2. Bucket + +* If Partition is used, the `DISTRIBUTED...'statement describes the partitioning rules of data within ** partitions. If Partition is not used, the partitioning rules for the data of the entire table are described. +* Bucket columns can be multiple columns, but must be Key columns. Bucket columns can be the same or different as ARTITION columns. +* The choice of bucket columns is a trade-off between ** query throughput ** and ** query concurrency **: + +1. If multiple bucket columns are selected, the data will be more evenly distributed. But if the query condition does not contain the equivalent condition of all bucket columns, a query scans all buckets. This increases the throughput of queries, but increases the latency of individual queries. This approach is suitable for query scenarios with high throughput and low concurrency. +2. If only one or a few bucket columns are selected, point query can query only one bucket. This method is suitable for high concurrent point query scenarios. + +* There is theoretically no upper limit on the number of buckets. + +3. Suggestions on the quantity and data quantity of Partition and Bucket. + +* The total number of tables in a table is equal to (Partition num * Bucket num). +* The number of tables in a table is recommended to be slightly more than the number of disks in the entire cluster, regardless of capacity expansion. +* There is no upper and lower bound theoretically for the data volume of a single Tablet, but it is recommended to be within the range of 1G - 10G. If the amount of single Tablet data is too small, the aggregation effect of data is not good, and the pressure of metadata management is high. If the amount of data is too large, it is not conducive to the migration and completion of replicas, and will increase the cost of failed retries of Schema Change or Rollup operations (the granularity of these failed retries is Tablet). +* When Tablet's principle of data quantity conflicts with that of quantity, it is suggested that priority be given to the principle of data quantity. +* When tabulating, the number of Buckets per partition is specified uniformly. However, when adding partitions dynamically (`ADD PARTITION'), you can specify the number of Buckets for new partitions separately. This function can be used to deal with data shrinkage or expansion conveniently. +* Once specified, the number of Buckets for a Partition cannot be changed. Therefore, in determining the number of Buckets, it is necessary to consider the situation of cluster expansion in advance. For example, currently there are only three hosts, and each host has one disk. If the number of Buckets is set to 3 or less, concurrency cannot be improved even if machines are added later. +* For example, suppose there are 10 BEs, one disk per BE. If the total size of a table is 500 MB, 4-8 fragments can be considered. 5GB: 8-16. 50GB: 32. 500GB: Recommended partition, each partition size is about 50GB, each partition 16-32 partitions. 5TB: Recommended partitions, each partition size is about 50GB, each partition 16-32 partitions. + +> Note: The amount of data in the table can be viewed by the `show data'command, and the result is divided by the number of copies, that is, the amount of data in the table. + +#### Multi-column partition + +Doris supports specifying multiple columns as partitioned columns, as shown below: + +``` +PARTITION BY RANGE(`date`, `id`) +( +Separating `P201701 `U1000 `values less than'(2017-02-01', `1000'), +Split `P201702 `U2000 `values less than'(2017-03-01', `2000'), +Segmentation `P201703 `U'all `values less than (`2017-04-01') +) +``` + +In the above example, we specify `date'(DATE type) and `id' (INT type) as partition columns. The final partition of the above example is as follows: + +``` +*p201701.1000: [(MIN VALUE, MIN VALUE), ("2017 -02 -01", "1000") +(2017 -02 -01, 1000), ("2017 -03 -01", "2000") +*p201703 all: [("2017 -03 -01", "2000"), ("2017 -04 -01", MIN VALUE)) +``` + +Note that the last partition user specifies only the partition value of the `date'column by default, so the partition value of the `id' column is filled in by default `MIN_VALUE'. When the user inserts data, the partition column values are compared sequentially, and the corresponding partitions are finally obtained. Examples are as follows: + +``` +* Data - > Partition +*2017 -01, 200 --> p201701 -u 1000 +* 2017-01-01, 2000 --> p201701_1000 +*2017 -02 -01, 100 --> p201701 -u 1000 +* 2017-02-01, 2000 --> p201702_2000 +* 2017-02-15, 5000 --> p201702_2000 +* 2017-03-01, 2000 --> p201703_all +* 2017-03-10, 1-> P201703 all +* 2017-04-01, 1000 - > Unable to import +* 2017-05-01, 1000 - > Unable to import +``` + +### PROPERTIES + +In the final PROPERTIES of the table statement, you can specify the following two parameters: + +One copy + +* Number of copies per Tablet. The default is 3. It is recommended that the default be maintained. In the table statement, the number of Tablet replicas in all Partitions is specified uniformly. When adding a new partition, you can specify the number of Tablet copies in the new partition separately. +* The number of copies can be modified at run time. It is strongly recommended that odd numbers be maintained. +* The maximum number of copies depends on the number of independent IP in the cluster (note that it is not the number of BEs). The principle of duplicate distribution in Doris is that duplicates of the same Tablet are not allowed to be distributed on the same physical machine, while identifying the physical machine is through IP. Therefore, even if three or more BE instances are deployed on the same physical machine, if the IP of these BEs is the same, only 1 copy number can be set. +* For some small and infrequently updated dimension tables, you can consider setting more copies. In this way, when Join queries, there is a greater probability of local data Join. + +2. storage_medium & storage\_cooldown\_time + +* The data storage directory of BE can be explicitly specified as SSD or HDD (distinguished by. SSD or. HDD suffixes). When creating a table, you can specify all the media that Partition initially stores. Note that the suffix function is to explicitly specify the disk media without checking whether it matches the actual media type. +* The default initial storage medium is HDD. If SSD is specified, the data is initially stored on SSD. +* If storage cooldown time is not specified, data will be automatically migrated from SSD to HDD 7 days later by default. If storage cooldown time is specified, the data migrates only after the storage_cooldown_time time time is reached. +* Note that when storage_media is specified, this parameter is just a "best effort" setting. Even if SSD storage medium is not set up in the cluster, it will not report errors, but will be automatically stored in the available data directory. Similarly, if SSD media is inaccessible and space is insufficient, it may cause data to be stored directly on other available media initially. When data is migrated to HDD at maturity, if HDD media is inaccessible and space is insufficient, the migration may fail (but it will keep trying). + +### ENGINE + +In this example, the ENGINE type is olap, which is the default ENGINE type. In Doris, only this ENGINE type is responsible for data management and storage by Doris. Other ENGINE types, such as mysql, broker, es, etc., are essentially mappings to tables in other external databases or systems to ensure that Doris can read these data. Doris itself does not create, manage and store any tables and data of non-olap ENGINE type. + +### Others + +` IF NOT EXISTS ` indicates that if the table has not been created, it will be created. Note that only the existence of table names is judged here, not whether the new table structure is the same as the existing table structure. So if there is a table with the same name but different structure, the command returns success, but it does not mean that a new table and new structure have been created. + +## Common Questions + +### Common problems in table building operation + +1. If a grammatical error occurs in a long table-building statement, the phenomenon of incomplete grammatical error hints may occur. Here is a list of possible grammatical errors for manual error correction: + +* Error in grammatical structure. Please read `HELP CREATE TABLE'carefully; `Check the relevant grammatical structure. +* Keep words. When a user-defined name encounters a reserved word, it needs to be caused by a back quotation mark `. It is recommended that all custom names be generated using this symbol. +* Chinese characters or full-angle characters. Non-utf8 coded Chinese characters, or hidden full-angle characters (spaces, punctuation, etc.), can lead to grammatical errors. It is recommended to use a text editor with invisible characters to check. + +2. `Failed to create partition [xxx] . Timeout` + +Doris tables are created in order of partition granularity. This error may be reported when a Partition creation fails. Even if you don't use Partition, you will report `Failed to create Partition'when a table is built incorrectly, because Doris will create an immutable default Artition for a table that does not specify Partition, as described earlier. + +When encountering this error, BE usually encounters problems in creating data fragments. Reference can be made to the following steps: + +1. In fe.log, find the `Failed to create Partition'log for the corresponding time point. In this log, there will be a series of number pairs similar to {10001-10010}. The first number of the number pair represents the Backend ID, and the second number represents the Tablet ID. As the above number pair indicates, on the Backend with ID 10001, the creation of a Tablet with ID 10010 failed. +2. Go to the be.INFO log corresponding to Backend and find the tablet id-related log within the corresponding time period to find the error information. +3. Following are some common tablet creation failures, including but not limited to: +* BE did not receive the related task, and the tablet ID related log could not be found in be.INFO at this time. Or BE was created successfully, but failed to report. For the above questions, see the Deployment and Upgrade Document to check the connectivity between FE and BE. +* Pre-allocated memory failed. Perhaps the byte length of a row in the table exceeds 100KB. +*` Too many open files `. The number of open file handles exceeds the Linux system limit. Handle limit of Linux system needs to be modified. + +You can also extend the timeout time by setting `tablet_create_timeout_second= xxx'in fe.conf. The default is 2 seconds. + +3. Tabulation commands do not return results for a long time. + +Doris's build command is a synchronization command. The command's timeout time is currently set in a relatively simple (tablet num * replication num) second. If you create more data fragments, and some of them fail to create fragments, you may be waiting for a longer timeout before returning an error. + +Normally, the build statement will return in a few seconds or a dozen seconds. If it takes more than one minute, it is recommended to cancel this operation directly and go to the FE or BE log to check for related errors. diff --git a/docs/documentation/en/getting-started/hit-the-rollup_EN.md b/docs/documentation/en/getting-started/hit-the-rollup_EN.md new file mode 100644 index 00000000000000..bb631c65e63b8d --- /dev/null +++ b/docs/documentation/en/getting-started/hit-the-rollup_EN.md @@ -0,0 +1,268 @@ +35; Rollup -19982;* 26597;* 35810 + +As a polymer view in Doris, Rollup can play two roles in queries: + +* Index +* Aggregate data (only for aggregate models, aggregate key) + +However, in order to hit Rollup, certain conditions need to be met, and the value of PreAggregation of ScanNdo node in the execution plan can be used to determine whether Rollup can be hit or not, and the Rollup field can be used to determine which Rollup table is hit. + +## Noun Interpretation + +Base: Base table. + +Rollup: Generally, it refers to the Rollup tables created based on Base tables, but in some scenarios, it includes Base and Rollup tables. + +## Index + +Doris's prefix index has been introduced in the previous query practice, that is, Doris will generate the first 36 bytes in the Base/Rollup table separately in the underlying storage engine (with varchar type, the prefix index may be less than 36 bytes, varchar will truncate the prefix index, and use up to 20 bytes of varchar). A sorted sparse index data (data is also sorted, positioned by index, and then searched by dichotomy in the data), and then matched each Base/Rollup prefix index according to the conditions in the query, and selected a Base/Rollup that matched the longest prefix index. + +``` +---> matching from left to right ++----+----+----+----+----+----+ +C1 `124; c2 `124; c3 `124; c4 `124; c5 `124;... 1244; +``` + +As shown in the figure above, the conditions of where and on in the query are pushed up and down to ScanNode and matched from the first column of the prefix index. Check if there are any of these columns in the condition, and then accumulate the matching length until the matching cannot match or the end of 36 bytes (columns of varchar type can only match 20 bytes and match less than 36 words). Section truncates prefix index, and then chooses a Base/Rollup with the longest matching length. The following example shows how to create a Base table and four rollups: + +``` ++---------------+-------+--------------+------+-------+---------+-------+ +| IndexName | Field | Type | Null | Key | Default | Extra | ++---------------+-------+--------------+------+-------+---------+-------+ +| test | k1 | TINYINT | Yes | true | N/A | | +| | k2 | SMALLINT | Yes | true | N/A | | +| | k3 | INT | Yes | true | N/A | | +| | k4 | BIGINT | Yes | true | N/A | | +| | k5 | DECIMAL(9,3) | Yes | true | N/A | | +| | k6 | CHAR(5) | Yes | true | N/A | | +| | k7 | DATE | Yes | true | N/A | | +| | k8 | DATETIME | Yes | true | N/A | | +Yes 1244; true 124N /A 124R; 124R (20) VARCHAR (20) 124R; Yes +| | k10 | DOUBLE | Yes | false | N/A | MAX | +| | k11 | FLOAT | Yes | false | N/A | SUM | +| | | | | | | | +Roll -up of index 1 `124; k9 `124R; VARCHAR (20)'124R; Yes `1244true `124N /A `1244'; +| | k1 | TINYINT | Yes | true | N/A | | +| | k2 | SMALLINT | Yes | true | N/A | | +| | k3 | INT | Yes | true | N/A | | +| | k4 | BIGINT | Yes | true | N/A | | +| | k5 | DECIMAL(9,3) | Yes | true | N/A | | +| | k6 | CHAR(5) | Yes | true | N/A | | +| | k7 | DATE | Yes | true | N/A | | +| | k8 | DATETIME | Yes | true | N/A | | +| | k10 | DOUBLE | Yes | false | N/A | MAX | +| | k11 | FLOAT | Yes | false | N/A | SUM | +| | | | | | | | + +| | k2 | SMALLINT | Yes | true | N/A | | +| | k1 | TINYINT | Yes | true | N/A | | +| | k3 | INT | Yes | true | N/A | | +| | k4 | BIGINT | Yes | true | N/A | | +| | k5 | DECIMAL(9,3) | Yes | true | N/A | | +| | k6 | CHAR(5) | Yes | true | N/A | | +| | k7 | DATE | Yes | true | N/A | | +| | k8 | DATETIME | Yes | true | N/A | | +| | k10 | DOUBLE | Yes | false | N/A | MAX | +| | k11 | FLOAT | Yes | false | N/A | SUM | +| | | | | | | | +| rollup_index3 | k4 | BIGINT | Yes | true | N/A | | +| | k5 | DECIMAL(9,3) | Yes | true | N/A | | +| | k6 | CHAR(5) | Yes | true | N/A | | +| | k1 | TINYINT | Yes | true | N/A | | +| | k2 | SMALLINT | Yes | true | N/A | | +| | k3 | INT | Yes | true | N/A | | +| | k7 | DATE | Yes | true | N/A | | +| | k8 | DATETIME | Yes | true | N/A | | +Yes 1244; true 124N /A 124R; 124R (20) VARCHAR (20) 124R; Yes +| | k10 | DOUBLE | Yes | false | N/A | MAX | +| | k11 | FLOAT | Yes | false | N/A | SUM | +| | | | | | | | +| rollup_index4 | k4 | BIGINT | Yes | true | N/A | | +| | k6 | CHAR(5) | Yes | true | N/A | | +| | k5 | DECIMAL(9,3) | Yes | true | N/A | | +| | k1 | TINYINT | Yes | true | N/A | | +| | k2 | SMALLINT | Yes | true | N/A | | +| | k3 | INT | Yes | true | N/A | | +| | k7 | DATE | Yes | true | N/A | | +| | k8 | DATETIME | Yes | true | N/A | | +Yes 1244; true 124N /A 124R; 124R (20) VARCHAR (20) 124R; Yes +| | k10 | DOUBLE | Yes | false | N/A | MAX | +| | k11 | FLOAT | Yes | false | N/A | SUM | ++---------------+-------+--------------+------+-------+---------+-------+ +``` + +The prefix indexes of the three tables are + +``` +Base (k1, k2, k3, k4, k5, k6, k7) + +Index1 (k9)/index rollup (k9) + + + + +``` + +Conditions on columns that can be indexed with the prefix need to be `= ``or', ``in', `between', and these conditions are side-by-side and the relationship uses `and `connections', which cannot be hit for `or', `!=` and so on. Then look at the following query: + +``` +SELECT * FROM test WHERE k1 = 1 AND k2 > 3; +``` + +With the conditions on K1 and k2, check that only the first column of Base contains K1 in the condition, so match the longest prefix index, test, explain: + +``` +| 0:OlapScanNode +| TABLE: test +| PREAGGREGATION: OFF. Reason: No AggregateInfo +| PREDICATES: `k1` = 1, `k2` > 3 +partitions =1 /1 +Rollup: test +| buckets=1/10 +| cardinality=-1 +| avgRowSize=0.0 +| numNodes=0 +| tuple ids: 0 +``` + +Look again at the following queries: + +`SELECT * FROM test WHERE k4 =1 AND k5 > 3;` + +With K4 and K5 conditions, check that the first column of rollup_index3 and rollup_index4 contains k4, but the second column of rollup_index3 contains k5, so the matching prefix index is the longest. + +``` +| 0:OlapScanNode +| TABLE: test +| PREAGGREGATION: OFF. Reason: No AggregateInfo +| PREDICATES: `k4` = 1, `k5` > 3 +partitions =1 /1 +Rollup: Rollup -u index3 +| buckets=10/10 +| cardinality=-1 +| avgRowSize=0.0 +| numNodes=0 +| tuple ids: 0 +``` + +Now we try to match the conditions on the column containing varchar, as follows: + +`SELECT * FROM test WHERE k9 IN ("xxx", "yyyy") AND k1 = 10;` + +There are K9 and K1 conditions. The first column of rollup_index1 and rollup_index2 contains k9. It is reasonable to choose either rollup here to hit the prefix index and randomly select the same one (because there are just 20 bytes in varchar, and the prefix index is truncated in less than 36 bytes). The current strategy here will continue to match k1, because the second rollup_index1 is listed as k1, so rollup_index1 is chosen, in fact, the latter K1 condition will not play an accelerating role. (If the condition outside the prefix index needs to accelerate the query, it can be accelerated by establishing a Bloom Filter filter. Typically for string types, because Doris has a Block level for columns, a Min/Max index for shaping and dates.) The following is the result of explain. + +``` +| 0:OlapScanNode +| TABLE: test +| PREAGGREGATION: OFF. Reason: No AggregateInfo +| PREDICATES: `k9` IN ('xxx', 'yyyy'), `k1` = 10 +partitions =1 /1 +Rollup: Rollup -u index1 +| buckets=1/10 +| cardinality=-1 +| avgRowSize=0.0 +| numNodes=0 +| tuple ids: 0 +``` + +Finally, look at a query that can be hit by more than one Rollup: + +"Select * from test where K4 < 1000 and K5 = 80 and K6 = 10000;" + +There are three conditions: k4, K5 and k6. The first three columns of rollup_index3 and rollup_index4 contain these three columns respectively. So the prefix index length matched by them is the same. Both can be selected. The current default strategy is to select a rollup created earlier. Here is rollup_index3. + +``` +| 0:OlapScanNode +| TABLE: test +| PREAGGREGATION: OFF. Reason: No AggregateInfo +| PREDICATES: `k4` < 1000, `k5` = 80, `k6` >= 10000.0 +partitions =1 /1 +Rollup: Rollup -u index3 +| buckets=10/10 +| cardinality=-1 +| avgRowSize=0.0 +| numNodes=0 +| tuple ids: 0 +``` + +If you modify the above query slightly as follows: + +`SELECT * FROM test WHERE k4 < 1000 AND k5 = 80 OR k6 >= 10000;` + +The query here cannot hit the prefix index. (Even any Min/Max in the Doris storage engine, the BloomFilter index doesn't work.) + +## Aggregate data + +Of course, the function of aggregated data is indispensable for general polymer views. Such materialized views are very helpful for aggregated queries or report queries. To hit the polymer views, the following prerequisites are needed: + +1. There is a separate Rollup for all columns involved in a query or subquery. +2. If there is Join in a query or sub-query, the type of Join needs to be Inner join. + +The following are some types of aggregated queries that can hit Rollup. + +| 列类型 查询类型 | Sum | Distinct/Count Distinct | Min | Max | Ndv | +|--------------|-------|-------------------------|-------|-------|-------| +Key: 124key; false 124true true 124true true true true 124true true true 124true true true true 124true true true true true true true ++ 124; Value (SUM) 124; False False False False False False False False False False False False False False False \ ++ 124; value (replace) FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE ++ 124; value (min) = 124; false = 124; true = 124; false = 124; false = 124; false = 124; false = 124; false = 124; false = 124; ++ 124; Value (Max) 124; False False False True False False False + +If the above conditions are met, there will be two stages in judging the hit of Rollup for the aggregation model: + +1. Firstly, the Rollup table with the longest index hit by prefix index is matched by conditions. See the index strategy above. +2. Then compare the rows of Rollup and select the smallest Rollup. + +The following Base table and Rollup: + +``` ++-------------+-------+--------------+------+-------+---------+-------+ +| IndexName | Field | Type | Null | Key | Default | Extra | ++-------------+-------+--------------+------+-------+---------+-------+ +| test_rollup | k1 | TINYINT | Yes | true | N/A | | +| | k2 | SMALLINT | Yes | true | N/A | | +| | k3 | INT | Yes | true | N/A | | +| | k4 | BIGINT | Yes | true | N/A | | +| | k5 | DECIMAL(9,3) | Yes | true | N/A | | +| | k6 | CHAR(5) | Yes | true | N/A | | +| | k7 | DATE | Yes | true | N/A | | +| | k8 | DATETIME | Yes | true | N/A | | +Yes 1244; true 124N /A 124R; 124R (20) VARCHAR (20) 124R; Yes +| | k10 | DOUBLE | Yes | false | N/A | MAX | +| | k11 | FLOAT | Yes | false | N/A | SUM | +| | | | | | | | +| rollup2 | k1 | TINYINT | Yes | true | N/A | | +| | k2 | SMALLINT | Yes | true | N/A | | +| | k3 | INT | Yes | true | N/A | | +| | k10 | DOUBLE | Yes | false | N/A | MAX | +| | k11 | FLOAT | Yes | false | N/A | SUM | +| | | | | | | | +| rollup1 | k1 | TINYINT | Yes | true | N/A | | +| | k2 | SMALLINT | Yes | true | N/A | | +| | k3 | INT | Yes | true | N/A | | +| | k4 | BIGINT | Yes | true | N/A | | +| | k5 | DECIMAL(9,3) | Yes | true | N/A | | +| | k10 | DOUBLE | Yes | false | N/A | MAX | +| | k11 | FLOAT | Yes | false | N/A | SUM | ++-------------+-------+--------------+------+-------+---------+-------+ +``` + +See the following queries: + +`SELECT SUM(k11) FROM test_rollup WHERE k1 = 10 AND k2 > 200 AND k3 in (1,2,3);` + +Firstly, it judges whether the query can hit the aggregated Rolup table. After checking the graph above, it is possible. Then the condition contains three conditions: k1, K2 and k3. The first three columns of test_rollup, rollup1 and rollup2 contain all the three conditions. So the prefix index length is the same. Then, it is obvious that the aggregation degree of rollup2 is the highest when comparing the number of rows. Row 2 is selected because of the minimum number of rows. + +``` +| 0:OlapScanNode | +| TABLE: test_rollup | +PREAGGREGATION: ON {124} +| PREDICATES: `k1` = 10, `k2` > 200, `k3` IN (1, 2, 3) | +partitions =1 /1 +| rollup: rollup2 | +| buckets=1/10 | +| cardinality=-1 | +| avgRowSize=0.0 | +| numNodes=0 | +| tuple ids: 0 | +``` diff --git a/docs/documentation/en/getting-started/index.rst b/docs/documentation/en/getting-started/index.rst new file mode 100644 index 00000000000000..dcb7cdb916ceb9 --- /dev/null +++ b/docs/documentation/en/getting-started/index.rst @@ -0,0 +1,12 @@ +============= +开始使用 +============= + +.. toctree:: + + basic-usage.md + advance-usage.md + best-practice.md + data-partition.md + data-model-rollup.md + hit-the-rollup.md diff --git a/docs/documentation/en/installing/compilation.md b/docs/documentation/en/installing/compilation.md new file mode 100644 index 00000000000000..0ca91e865ec6ef --- /dev/null +++ b/docs/documentation/en/installing/compilation.md @@ -0,0 +1,77 @@ +# 编译 + +本文档主要介绍如何通过源码编译 Doris。 + +## 使用 Docker 开发镜像编译(推荐) + +### 使用现成的镜像 + +1. 下载 Docker 镜像 + + `$ docker pull apachedoris/doris-dev:build-env` + + 检查镜像下载完成: + + ``` + $ docker images + REPOSITORY TAG IMAGE ID CREATED SIZE + apachedoris/doris-dev build-env f8bc5d4024e0 21 hours ago 3.28GB + ``` + +注: 针对不同的 Doris 版本,需要下载对应的镜像版本 + +| image version | commit id | release version | +|---|---|---| +| apachedoris/doris-dev:build-env | before [ff0dd0d](https://github.com/apache/incubator-doris/commit/ff0dd0d2daa588f18b6db56f947e813a56d8ec81) | 0.8.x, 0.9.x | +| apachedoris/doris-dev:build-env-1.1 | [ff0dd0d](https://github.com/apache/incubator-doris/commit/ff0dd0d2daa588f18b6db56f947e813a56d8ec81) or later | 0.10.x or later | + +2. 运行镜像 + + `$ docker run -it apachedoris/doris-dev:build-env` + + 如果你希望编译本地 Doris 源码,则可以挂载路径: + + ``` + $ docker run -it -v /your/local/incubator-doris-DORIS-x.x.x-release/:/root/incubator-doris-DORIS-x.x.x-release/ apachedoris/doris-dev:build-env + ``` + +3. 下载源码 + + 启动镜像后,你应该已经处于容器内。可以通过以下命令下载 Doris 源码(已挂载本地源码目录则不用): + + ``` + $ wget https://dist.apache.org/repos/dist/dev/incubator/doris/xxx.tar.gz + or + $ git clone https://github.com/apache/incubator-doris.git + ``` + +4. 编译 Doris + + ``` + $ sh build.sh + ``` + + 编译完成后,产出文件在 `output/` 目录中。 + +### 自行编译开发环境镜像 + +你也可以自己创建一个 Doris 开发环境镜像,具体可参阅 `docker/README.md` 文件。 + + +## 直接编译(CentOS/Ubuntu) + +你可以在自己的 linux 环境中直接尝试编译 Doris。 + +1. 系统依赖 + + `GCC 5.3.1+, Oracle JDK 1.8+, Python 2.7+, Apache Maven 3.5+, CMake 3.4.3+` + + 安装完成后,自行设置环境变量 `PATH`, `JAVA_HOME` 等。 + +2. 编译 Doris + + ``` + $ sh build.sh + ``` + + 编译完成后,产出文件在 `output/` 目录中。 diff --git a/docs/documentation/en/installing/compilation_EN.md b/docs/documentation/en/installing/compilation_EN.md new file mode 100644 index 00000000000000..e59248b0ff719b --- /dev/null +++ b/docs/documentation/en/installing/compilation_EN.md @@ -0,0 +1,76 @@ +# Compilation + +This document focuses on how to code Doris through source code. + +## Developing mirror compilation using Docker (recommended) + +### Use off-the-shelf mirrors + +1. Download Docker Mirror + + `$ docker pull apachedoris/doris-dev:build-env` + + Check mirror download completed: + + ``` + $ docker images + REPOSITORY TAG IMAGE ID CREATED SIZE + apachedoris/doris-dev build-env f8bc5d4024e0 21 hours ago 3.28GB + ``` + + Note: For different versions of Oris, you need to download the corresponding mirror version. + + | image version | commit id | release version | + |---|---|---| + | apachedoris/doris-dev:build-env | before [ff0dd0d](https://github.com/apache/incubator-doris/commit/ff0dd0d2daa588f18b6db56f947e813a56d8ec81) | 0.8.x, 0.9.x | + | apachedoris/doris-dev:build-env-1.1 | [ff0dd0d](https://github.com/apache/incubator-doris/commit/ff0dd0d2daa588f18b6db56f947e813a56d8ec81) or later | 0.10.x or later | + +2. Running Mirror + + `$ docker run -it apachedoris/doris-dev:build-env` + + If you want to compile the local Doris source code, you can mount the path: + + ``` + $ docker run -it -v /your/local/incubator-doris-DORIS-x.x.x-release/:/root/incubator-doris-DORIS-x.x.x-release/ apachedoris/doris-dev:build-env + ``` + +3. Download source code + + After starting the mirror, you should be in the container. The Doris source code can be downloaded from the following command (local source directory mounted is not required): + + ``` + $ wget https://dist.apache.org/repos/dist/dev/incubator/doris/xxx.tar.gz + or + $ git clone https://github.com/apache/incubator-doris.git + ``` + +4. Compile Doris + + ``` + $ sh build.sh + ``` + + After compilation, the output file is in the `output/` directory. + +### Self-compiling Development Environment Mirror + +You can also create a Doris development environment mirror yourself, referring specifically to the `docker/README.md'file. + + +## Direct Compilation (CentOS/Ubuntu) + +You can try to compile Doris directly in your own Linux environment. + +1. System Dependence + + `GCC 5.3.1+, Oracle JDK 1.8+, Python 2.7+, Apache Maven 3.5+, CMake 3.4.3+` + + After installation, set environment variables `PATH', `JAVA_HOME', etc. + +2. Compile Doris + + ``` + $ sh build.sh + ``` + After compilation, the output file is in the `output/` directory. diff --git a/docs/documentation/en/installing/index.rst b/docs/documentation/en/installing/index.rst new file mode 100644 index 00000000000000..29e0e2f4a1d0d3 --- /dev/null +++ b/docs/documentation/en/installing/index.rst @@ -0,0 +1,9 @@ +=========== +编译与部署 +=========== + +.. toctree:: + + compilation.md + install-deploy.md + upgrade.md diff --git a/docs/documentation/en/installing/install-deploy.md b/docs/documentation/en/installing/install-deploy.md new file mode 100644 index 00000000000000..c284e388750833 --- /dev/null +++ b/docs/documentation/en/installing/install-deploy.md @@ -0,0 +1,380 @@ +# 安装与部署 + +该文档主要介绍了部署 Doris 所需软硬件环境、建议的部署方式、集群扩容缩容,以及集群搭建到运行过程中的常见问题。 +在阅读本文档前,请先根据编译文档编译 Doris。 + +## 软硬件需求 + +### 概述 + +Doris 作为一款开源的 MPP 架构 OLAP 数据库,能够运行在绝大多数主流的商用服务器上。为了能够充分运用 MPP 架构的并发优势,以及 Doris 的高可用特性,我们建议 Doris 的部署遵循以下需求: + +#### Linux 操作系统版本需求 + +| Linux 系统 | 版本 | +|---|---| +| CentOS | 7.1 及以上 | +| Ubuntu | 16.04 及以上 | + +#### 软件需求 + +| 软件 | 版本 | +|---|---| +| Java | 1.8 及以上 | +| GCC | 4.8.2 及以上 | + +#### 开发测试环境 + +| 模块 | CPU | 内存 | 磁盘 | 网络 | 实例数量 | +|---|---|---|---|---|---| +| Frontend | 8核+ | 8GB+ | SSD 或 SATA,10GB+ * | 千兆网卡 | 1 | +| Backend | 8核+ | 16GB+ | SSD 或 SATA,50GB+ * | 千兆网卡 | 1-3 * | + +#### 生产环境 + +| 模块 | CPU | 内存 | 磁盘 | 网络 | 实例数量(最低要求) | +|---|---|---|---|---|---| +| Frontend | 16核+ | 64GB+ | SSD 或 RAID 卡,100GB+ * | 万兆网卡 | 1-5 * | +| Backend | 16核+ | 64GB+ | SSD 或 SATA,100G+ * | 万兆网卡 | 10-100 * | + +> 注1: +> 1. FE 的磁盘空间主要用于存储元数据,包括日志和 image。通常从几百 MB 到几个 GB 不等。 +> 2. BE 的磁盘空间主要用于存放用户数据,总磁盘空间按用户总数据量 * 3(3副本)计算,然后再预留额外 40% 的空间用作后台 compaction 以及一些中间数据的存放。 +> 3. 一台机器上可以部署多个 BE 实例,但是**只能部署一个 FE**。如果需要 3 副本数据,那么至少需要 3 台机器各部署一个 BE 实例(而不是1台机器部署3个BE实例)。**多个FE所在服务器的时钟必须保持一致(允许最多5秒的时钟偏差)** +> 4. 测试环境也可以仅适用一个 BE 进行测试。实际生产环境,BE 实例数量直接决定了整体查询延迟。 +> 5. 所有部署节点关闭 Swap。 + +> 注2:FE 节点的数量 +> 1. FE 角色分为 Follower 和 Observer,(Leader 为 Follower 组中选举出来的一种角色,以下统称 Follower,具体含义见 [元数据设计文档](../internal/metadata-design))。 +> 2. FE 节点数据至少为1(1 个 Follower)。当部署 1 个 Follower 和 1 个 Observer 时,可以实现读高可用。当部署 3 个 Follower 时,可以实现读写高可用(HA)。 +> 3. Follower 的数量**必须**为奇数,Observer 数量随意。 +> 4. 根据以往经验,当集群可用性要求很高是(比如提供在线业务),可以部署 3 个 Follower 和 1-3 个 Observer。如果是离线业务,建议部署 1 个 Follower 和 1-3 个 Observer。 + +* **通常我们建议 10 ~ 100 台左右的机器,来充分发挥 Doris 的性能(其中 3 台部署 FE(HA),剩余的部署 BE)** +* **当然,Doris的性能与节点数量及配置正相关。在最少4台机器(一台 FE,三台 BE,其中一台 BE 混部一个 Observer FE 提供元数据备份),以及较低配置的情况下,依然可以平稳的运行 Doris。** +* **如果 FE 和 BE 混部,需注意资源竞争问题,并保证元数据目录和数据目录分属不同磁盘。** + +#### Broker 部署 + +Broker 是用于访问外部数据源(如 hdfs)的进程。通常,在每台机器上部署一个 broker 实例即可。 + +#### 网络需求 + +Doris 各个实例直接通过网络进行通讯。以下表格展示了所有需要的端口 + +| 实例名称 | 端口名称 | 默认端口 | 通讯方向 | 说明 | +|---|---|---|---| ---| +| BE | be_port | 9060 | FE --> BE | BE 上 thrift server 的端口,用于接收来自 FE 的请求 | +| BE | be\_rpc_port | 9070 | BE <--> BE | BE 之间 rpc 使用的端口 | +| BE | webserver_port | 8040 | BE <--> BE | BE 上的 http server 的端口 | +| BE | heartbeat\_service_port | 9050 | FE --> BE | BE 上心跳服务端口(thrift),用户接收来自 FE 的心跳 | +| BE | brpc\_port* | 8060 | FE<-->BE, BE <--> BE | BE 上的 brpc 端口,用于 BE 之间通讯 | +| FE | http_port * | 8030 | FE <--> FE,用户 |FE 上的 http server 端口 | +| FE | rpc_port | 9020 | BE --> FE, FE <--> FE | FE 上的 thrift server 端口 | +| FE | query_port | 9030 | 用户 | FE 上的 mysql server 端口 | +| FE | edit\_log_port | 9010 | FE <--> FE | FE 上的 bdbje 之间通信用的端口 | +| Broker | broker\_ipc_port | 8000 | FE --> Broker, BE --> Broker | Broker 上的 thrift server,用于接收请求 | + +> 注: +> 1. 当部署多个 FE 实例时,要保证 FE 的 http\_port 配置相同。 +> 2. 部署前请确保各个端口在应有方向上的访问权限。 +> 3. brpc\_port 在 0.8.2 版本后替代了 be\_rpc_port + +#### IP 绑定 + +因为有多网卡的存在,或因为安装过 docker 等环境导致的虚拟网卡的存在,同一个主机可能存在多个不同的 ip。当前 Doris 并不能自动识别可用 IP。所以当遇到部署主机上有多个 IP 时,必须通过 priority\_networks 配置项来强制指定正确的 IP。 + +priority\_networks 是 FE 和 BE 都有的一个配置,配置项需写在 fe.conf 和 be.conf 中。该配置项用于在 FE 或 BE 启动时,告诉进程应该绑定哪个IP。示例如下: + +`priority_networks=10.1.3.0/24` + +这是一种 [CIDR](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) 的表示方法。FE 或 BE 会根据这个配置项来寻找匹配的IP,作为自己的 localIP。 + +**注意**:当配置完 priority\_networks 并启动 FE 或 BE 后,只是保证了 FE 或 BE 自身的 IP 进行了正确的绑定。而在使用 ADD BACKEND 或 ADD FRONTEND 语句中,也需要指定和 priority\_networks 配置匹配的 IP,否则集群无法建立。举例: + +BE 的配置为:`priority_networks=10.1.3.0/24` + +但是在 ADD BACKEND 时使用的是:`ALTER SYSTEM ADD BACKEND "192.168.0.1:9050";` + +则 FE 和 BE 将无法正常通信。 + +这时,必须 DROP 掉这个添加错误的 BE,重新使用正确的 IP 执行 ADD BACKEND。 + +FE 同理。 + +BROKER 当前没有,也不需要 priority\_networks 这个选项。Broker 的服务默认绑定在 0.0.0.0 上。只需在 ADD BROKER 时,执行正确可访问的 BROKER IP 即可。 + +## 集群部署 + +### 手动部署 + +#### FE 部署 + +* 拷贝 FE 部署文件到指定节点 + + 将源码编译生成的 output 下的 fe 文件夹拷贝到 FE 的节点指定部署路径下。 + +* 配置 FE + + 1. 配置文件为 conf/fe.conf。其中注意:`meta_dir`:元数据存放位置。默认在 fe/palo-meta/ 下。需**手动创建**该目录。 + 2. fe.conf 中 JAVA_OPTS 默认 java 最大堆内存为 2GB,建议生产环境调整至 8G 以上。 + +* 启动FE + + `sh bin/start_fe.sh --daemon` + + FE进程启动进入后台执行。日志默认存放在 fe/log/ 目录下。如启动失败,可以通过查看 fe/log/fe.log 或者 fe/log/fe.out 查看错误信息。 + +* 如需部署多 FE,请参见 "FE 扩容和缩容" 章节 + +#### BE 部署 + +* 拷贝 BE 部署文件到所有要部署 BE 的节点 + + 将源码编译生成的 output 下的 be 文件夹拷贝到 BE 的节点的指定部署路径下。 + +* 修改所有 BE 的配置 + + 修改 be/conf/be.conf。主要是配置 `storage_root_path`:数据存放目录,使用 `;` 分隔(最后一个目录后不要加 `;`),其它可以采用默认值。 + +* 在 FE 中添加所有 BE 节点 + + BE 节点需要先在 FE 中添加,才可加入集群。可以使用 mysql-client 连接到 FE: + + `./mysql-client -h host -P port -uroot` + + 其中 host 为 FE 所在节点 ip;port 为 fe/conf/fe.conf 中的 query_port;默认使用 root 账户,无密码登录。 + + 登录后,执行以下命令来添加每一个 BE: + + `ALTER SYSTEM ADD BACKEND "host:port";` + + 如果使用多租户功能,则执行以下命令添加 BE: + + `ALTER SYSTEM ADD FREE BACKEND "host:port";` + + 其中 host 为 BE 所在节点 ip;port 为 be/conf/be.conf 中的 heartbeat_service_port。 + + 如果不添加 FREE 关键字,BE 默认进入自动生成的 cluster,添加了 FREE 关键字后新的 BE 不属于任何 cluster,这样创建新 cluster 的时候就可以从这些空闲的be中选取,详细见[多租户设计文档](../administrator-guide/operation/multi-tenant.md) + +* 启动 BE + + `sh bin/start_be.sh --daemon` + + BE 进程将启动并进入后台执行。日志默认存放在 be/log/ 目录下。如启动失败,可以通过查看 be/log/be.log 或者 be/log/be.out 查看错误信息。 + +* 查看BE状态 + + 使用 mysql-client 连接到 FE,并执行 `SHOW PROC '/backends';` 查看 BE 运行情况。如一切正常,`isAlive` 列应为 `true`。 + +#### (可选)FS_Broker 部署 + +Broker 以插件的形式,独立于 Doris 部署。如果需要从第三方存储系统导入数据,需要部署相应的 Broker,默认提供了读取 HDFS 和百度云 BOS 的 fs_broker。fs_broker 是无状态的,建议每一个 FE 和 BE 节点都部署一个 Broker。 + +* 拷贝源码 fs_broker 的 output 目录下的相应 Broker 目录到需要部署的所有节点上。建议和 BE 或者 FE 目录保持同级。 + +* 修改相应 Broker 配置 + + 在相应 broker/conf/ 目录下对应的配置文件中,可以修改相应配置。 + + * 启动 Broker + + `sh bin/start_broker.sh --daemon` 启动 Broker。 + +* 添加 Broker + + 要让 Doris 的 FE 和 BE 知道 Broker 在哪些节点上,通过 sql 命令添加 Broker 节点列表。 + + 使用 mysql-client 连接启动的 FE,执行以下命令: + + `ALTER SYSTEM ADD BROKER broker_name "host1:port1","host2:port2",...;` + + 其中 host 为 Broker 所在节点 ip;port 为 Broker 配置文件中的 broker\_ipc\_port。 + +* 查看 Broker 状态 + + 使用 mysql-client 连接任一已启动的 FE,执行以下命令查看 Broker 状态:`SHOW PROC "/brokers";` + +**注:在生产环境中,所有实例都应使用守护进程启动,以保证进程退出后,会被自动拉起,如 [Supervisor](http://supervisord.org/)。如需使用守护进程启动,在 0.9.0 及之前版本中,需要修改各个 start_xx.sh 脚本,去掉最后的 & 符号**。从 0.10.0 版本开始,直接调用 `sh start_xx.sh` 启动即可。也可参考 [这里](https://www.cnblogs.com/lenmom/p/9973401.html) + +## 扩容缩容 + +Doris 可以很方便的扩容和缩容 FE、BE、Broker 实例。 + +### FE 扩容和缩容 + +可以通过将 FE 扩容至 3 个一上节点来实现 FE 的高可用。 + +用户可以通过 mysql 客户端登陆 Master FE。通过: + +`SHOW PROC '/frontends';` + +来查看当前 FE 的节点情况。 + +也可以通过前端页面连接:```http://fe_hostname:fe_http_port/frontend``` 或者 ```http://fe_hostname:fe_http_port/system?path=//frontends``` 来查看 FE 节点的情况。 + +以上方式,都需要 Doris 的 root 用户权限。 + +FE 节点的扩容和缩容过程,不影响当前系统运行。 + +#### 增加 FE 节点 + +FE 分为 Leader,Follower 和 Observer 三种角色。 默认一个集群,只能有一个 Leader,可以有多个 Follower 和 Observer。其中 Leader 和 Follower 组成一个 Paxos 选择组,如果 Leader 宕机,则剩下的 Follower 会自动选出新的 Leader,保证写入高可用。Observer 同步 Leader 的数据,但是不参加选举。如果只部署一个 FE,则 FE 默认就是 Leader。 + +第一个启动的 FE 自动成为 Leader。在此基础上,可以添加若干 Follower 和 Observer。 + +添加 Follower 或 Observer。使用 mysql-client 连接到已启动的 FE,并执行: + +`ALTER SYSTEM ADD FOLLOWER "host:port";` + +或 + +`ALTER SYSTEM ADD OBSERVER "host:port";` + +其中 host 为 Follower 或 Observer 所在节点 ip,port 为其配置文件 fe.conf 中的 edit\_log\_port。 + +配置及启动 Follower 或 Observer。Follower 和 Observer 的配置同 Leader 的配置。第一次启动时,需执行以下命令: + +`./bin/start_fe.sh --helper host:port --daemon` + +查看 Follower 或 Observer 运行状态。使用 mysql-client 连接到任一已启动的 FE,并执行:SHOW PROC '/frontends'; 可以查看当前已加入集群的 FE 及其对应角色。 + +> FE 扩容注意事项: +> 1. Follower FE(包括 Leader)的数量必须为奇数,建议最多部署 3 个组成高可用(HA)模式即可。 +> 2. 当 FE 处于高可用部署时(1个 Leader,2个 Follower),我们建议通过增加 Observer FE 来扩展 FE 的读服务能力。当然也可以继续增加 Follower FE,但几乎是不必要的。 +> 3. 通常一个 FE 节点可以应对 10-20 台 BE 节点。建议总的 FE 节点数量在 10 个以下。而通常 3 个即可满足绝大部分需求。 + +#### 删除 FE 节点 + +使用以下命令删除对应的 FE 节点: + +```ALTER SYSTEM DROP FOLLOWER[OBSERVER] "fe_host:edit_log_port";``` + +> FE 缩容注意事项: +> 1. 删除 Follower FE 时,确保最终剩余的 Follower(包括 Leader)节点为奇数。 + +### BE 扩容和缩容 + +用户可以通过 mysql-client 登陆 Leader FE。通过: + +```SHOW PROC '/backends';``` + +来查看当前 BE 的节点情况。 + +也可以通过前端页面连接:```http://fe_hostname:fe_http_port/backend``` 或者 ```http://fe_hostname:fe_http_port/system?path=//backends``` 来查看 BE 节点的情况。 + +以上方式,都需要 Doris 的 root 用户权限。 + +BE 节点的扩容和缩容过程,不影响当前系统运行以及正在执行的任务,并且不会影响当前系统的性能。数据均衡会自动进行。根据集群现有数据量的大小,集群会在几个小时到1天不等的时间内,恢复到负载均衡的状态。集群负载情况,可以参见 [Tablet 负载均衡文档](../administrator-guide/operation/tablet-repair-and-balance.md)。 + +#### 增加 BE 节点 + +BE 节点的增加方式同 **BE 部署** 一节中的方式,通过 `ALTER SYSTEM ADD BACKEND` 命令增加 BE 节点。 + +> BE 扩容注意事项: +> 1. BE 扩容后,Doris 会自动根据负载情况,进行数据均衡,期间不影响使用。 + +#### 删除 BE 节点 + +删除 BE 节点有两种方式:DROP 和 DECOMMISSION + +DROP 语句如下: + +```ALTER SYSTEM DROP BACKEND "be_host:be_heartbeat_service_port";``` + +**注意:DROP BACKEND 会直接删除该 BE,并且其上的数据将不能再恢复!!!所以我们强烈不推荐使用 DROP BACKEND 这种方式删除 BE 节点。当你使用这个语句时,会有对应的防误操作提示。** + +DECOMMISSION 语句如下: + +```ALTER SYSTEM DECOMMISSION BACKEND "be_host:be_heartbeat_service_port";``` + +> DECOMMISSION 命令说明: +> 1. 该命令用于安全删除 BE 节点。命令下发后,Doris 会尝试将该 BE 上的数据向其他 BE 节点迁移,当所有数据都迁移完成后,Doris 会自动删除该节点。 +> 2. 该命令是一个异步操作。执行后,可以通过 ```SHOW PROC '/backends';``` 看到该 BE 节点的 isDecommission 状态为 true。表示该节点正在进行下线。 +> 3. 该命令**不一定执行成功**。比如剩余 BE 存储空间不足以容纳下线 BE 上的数据,或者剩余机器数量不满足最小副本数时,该命令都无法完成,并且 BE 会一直处于 isDecommission 为 true 的状态。 +> 4. DECOMMISSION 的进度,可以通过 ```SHOW PROC '/backends';``` 中的 TabletNum 查看,如果正在进行,TabletNum 将不断减少。 +> 5. 该操作可以通过: +> ```CANCEL ALTER SYSTEM DECOMMISSION BACKEND "be_host:be_heartbeat_service_port";``` +> 命令取消。取消后,该 BE 上的数据将维持当前剩余的数据量。后续 Doris 重新进行负载均衡 + +**对于多租户部署环境下,BE 节点的扩容和缩容,请参阅 [多租户设计文档](../administrator-guide/operation/multi-tenant.md)。** + +### Broker 扩容缩容 + +Broker 实例的数量没有硬性要求。通常每台物理机部署一个即可。Broker 的添加和删除可以通过以下命令完成: + +```ALTER SYSTEM ADD BROKER broker_name "broker_host:broker_ipc_port";``` +```ALTER SYSTEM DROP BROKER broker_name "broker_host:broker_ipc_port";``` +```ALTER SYSTEM DROP ALL BROKER broker_name;``` + +Broker 是无状态的进程,可以随意启停。当然,停止后,正在其上运行的作业会失败,重试即可。 + +## 常见问题 + +### 进程相关 + +1. 如何确定 FE 进程启动成功 + + FE 进程启动后,会首先加载元数据,根据 FE 角色的不同,在日志中会看到 ```transfer from UNKNOWN to MASTER/FOLLOWER/OBSERVER```。最终会看到 ```thrift server started``` 日志,并且可以通过 mysql 客户端连接到 FE,则表示 FE 启动成功。 + + 也可以通过如下连接查看是否启动成功: + `http://fe_host:fe_http_port/api/bootstrap` + + 如果返回: + `{"status":"OK","msg":"Success"}` + + 则表示启动成功,其余情况,则可能存在问题。 + + > 注:如果在 fe.log 中查看不到启动失败的信息,也许在 fe.out 中可以看到。 + +2. 如何确定 BE 进程启动成功 + + BE 进程启动后,如果之前有数据,则可能有数分钟不等的数据索引加载时间。 + + 如果是 BE 的第一次启动,或者该 BE 尚未加入任何集群,则 BE 日志会定期滚动 ```waiting to receive first heartbeat from frontend``` 字样。表示 BE 还未通过 FE 的心跳收到 Master 的地址,正在被动等待。这种错误日志,在 FE 中 ADD BACKEND 并发送心跳后,就会消失。如果在接到心跳后,又重复出现 ``````master client, get client from cache failed.host: , port: 0, code: 7`````` 字样,说明 FE 成功连接了 BE,但 BE 无法主动连接 FE。可能需要检查 BE 到 FE 的 rpc_port 的连通性。 + + 如果 BE 已经被加入集群,日志中应该每隔 5 秒滚动来自 FE 的心跳日志:```get heartbeat, host: xx.xx.xx.xx, port: 9020, cluster id: xxxxxx```,表示心跳正常。 + + 其次,日志中应该每隔 10 秒滚动 ```finish report task success. return code: 0``` 的字样,表示 BE 向 FE 的通信正常。 + + 同时,如果有数据查询,应该能看到不停滚动的日志,并且有 ```execute time is xxx``` 日志,表示 BE 启动成功,并且查询正常。 + + 也可以通过如下连接查看是否启动成功: + `http://be_host:be_http_port/api/health` + + 如果返回: + `{"status": "OK","msg": "To Be Added"}` + + 则表示启动成功,其余情况,则可能存在问题。 + + > 注:如果在 be.INFO 中查看不到启动失败的信息,也许在 be.out 中可以看到。 + +3. 搭建系统后,如何确定 FE、BE 连通性正常 + + 首先确认 FE 和 BE 进程都已经单独正常启动,并确认已经通过 `ADD BACKEND` 或者 `ADD FOLLOWER/OBSERVER` 语句添加了所有节点。 + + 如果心跳正常,BE 的日志中会显示 ```get heartbeat, host: xx.xx.xx.xx, port: 9020, cluster id: xxxxxx```。如果心跳失败,在 FE 的日志中会出现 ```backend[10001] got Exception: org.apache.thrift.transport.TTransportException``` 类似的字样,或者其他 thrift 通信异常日志,表示 FE 向 10001 这个 BE 的心跳失败。这里需要检查 FE 向 BE host 的心跳端口的连通性。 + + 如果 BE 向 FE 的通信正常,则 BE 日志中会显示 ```finish report task success. return code: 0``` 的字样。否则会出现 ```master client, get client from cache failed``` 的字样。这种情况下,需要检查 BE 向 FE 的 rpc_port 的连通性。 + +4. Doris 各节点认证机制 + + 除了 Master FE 以外,其余角色节点(Follower FE,Observer FE,Backend),都需要通过 `ALTER SYSTEM ADD` 语句先注册到集群,然后才能加入集群。 + + Master FE 在第一次启动时,会在 palo-meta/image/VERSION 文件中生成一个 cluster_id。 + + FE 在第一次加入集群时,会首先从 Master FE 获取这个文件。之后每次 FE 之间的重新连接(FE 重启),都会校验自身 cluster id 是否与已存在的其它 FE 的 cluster id 相同。如果不同,则该 FE 会自动退出。 + + BE 在第一次接收到 Master FE 的心跳时,会从心跳中获取到 cluster id,并记录到数据目录的 `cluster_id` 文件中。之后的每次心跳都会比对 FE 发来的 cluster id。如果 cluster id 不相等,则 BE 会拒绝响应 FE 的心跳。 + + 心跳中同时会包含 Master FE 的 ip。当 FE 切主时,新的 Master FE 会携带自身的 ip 发送心跳给 BE,BE 会更新自身保存的 Master FE 的 ip。 + + > **priority\_network** + > + > priority\_network 是 FE 和 BE 都有一个配置,其主要目的是在多网卡的情况下,协助 FE 或 BE 识别自身 ip 地址。priority\_network 采用 CIDR 表示法:[RFC 4632](https://tools.ietf.org/html/rfc4632) + > + > 当确认 FE 和 BE 连通性正常后,如果仍然出现建表 Timeout 的情况,并且 FE 的日志中有 `backend does not found. host: xxx.xxx.xxx.xxx` 字样的错误信息。则表示 Doris 自动识别的 IP 地址有问题,需要手动设置 priority\_network 参数。 + > + > 出现这个问题的主要原因是:当用户通过 `ADD BACKEND` 语句添加 BE 后,FE 会识别该语句中指定的是 hostname 还是 IP。如果是 hostname,则 FE 会自动将其转换为 IP 地址并存储到元数据中。当 BE 在汇报任务完成信息时,会携带自己的 IP 地址。而如果 FE 发现 BE 汇报的 IP 地址和元数据中不一致时,就会出现如上错误。 + > + > 这个错误的解决方法:1)分别在 FE 和 BE 设置 **priority\_network** 参数。通常 FE 和 BE 都处于一个网段,所以该参数设置为相同即可。2)在 `ADD BACKEND` 语句中直接填写 BE 正确的 IP 地址而不是 hostname,以避免 FE 获取到错误的 IP 地址。 diff --git a/docs/documentation/en/installing/install-deploy_EN.md b/docs/documentation/en/installing/install-deploy_EN.md new file mode 100644 index 00000000000000..d5597f007f8e76 --- /dev/null +++ b/docs/documentation/en/installing/install-deploy_EN.md @@ -0,0 +1,391 @@ +# Installation and deployment + +This document mainly introduces the hardware and software environment needed to deploy Doris, the proposed deployment mode, cluster expansion and scaling, and common problems in the process of cluster building and running. +Before reading this document, compile Doris according to the compiled document. + +## Software and hardware requirements + +### Overview + +Doris, as an open source MPP architecture OLAP database, can run on most mainstream commercial servers. In order to make full use of the concurrency advantages of MPP architecture and the high availability features of Doris, we recommend that the deployment of Doris follow the following requirements: + +#### Linux Operating System Version Requirements + +| Linux System | Version| +|---|---| +| Centers | 7.1 and above | +| Ubuntu | 16.04 and above | + +#### Software requirements + +| Soft | Version | +|---|---| +| Java | 1.8 and above | +| GCC | 4.8.2 and above | + +#### Development Test Environment + +| Module | CPU | Memory | Disk | Network | Instance Number| +|---|---|---|---|---|---| +| Frontend | 8 core + | 8GB + | SSD or SATA, 10GB + * | Gigabit Network Card | 1| +| Backend | 8-core + | 16GB + | SSD or SATA, 50GB + * | Gigabit Network Card | 1-3*| + +#### Production environment + +| Module | CPU | Memory | Disk | Network | Number of Instances (Minimum Requirements)| +|---|---|---|---|---|---| +| Frontend | 16 core + | 64GB + | SSD or RAID card, 100GB + * | 10,000 Mbp network card | 1-5*| +| Backend | 16 core + | 64GB + | SSD or SATA, 100G + * | 10-100 Mbp network card*| + +> Note 1: +> +> 1. The disk space of FE is mainly used to store metadata, including logs and images. Usually it ranges from several hundred MB to several GB. +> 2. BE's disk space is mainly used to store user data. The total disk space is calculated according to the user's total data * 3 (3 copies). Then an additional 40% of the space is reserved for background compaction and some intermediate data storage. +> 3. Multiple BE instances can be deployed on a single machine, but **can only deploy one FE**. If you need three copies of data, you need at least one BE instance per machine (instead of three BE instances per machine). **Clocks of multiple FE servers must be consistent (allowing a maximum of 5 seconds clock deviation)** +> 4. The test environment can also be tested with only one BE. In the actual production environment, the number of BE instances directly determines the overall query latency. +> 5. All deployment nodes close Swap. + +> Note 2: Number of FE nodes +> +> 1. FE roles are divided into Follower and Observer. (Leader is an elected role in the Follower group, hereinafter referred to as Follower, for the specific meaning, see [Metadata Design Document] (./internal/metadata-design).) +> 2. FE node data is at least 1 (1 Follower). When one Follower and one Observer are deployed, high read availability can be achieved. When three Followers are deployed, read-write high availability (HA) can be achieved. +> 3. The number of Followers **must be** odd, and the number of Observers is arbitrary. +> 4. According to past experience, when cluster availability requirements are high (e.g. providing online services), three Followers and one to three Observers can be deployed. For offline business, it is recommended to deploy 1 Follower and 1-3 Observers. + +* **Usually we recommend about 10 to 100 machines to give full play to Doris's performance (3 of them deploy FE (HA) and the rest deploy BE)** +* **Of course, Doris performance is positively correlated with the number and configuration of nodes. With a minimum of four machines (one FE, three BEs, one BE mixed with one Observer FE to provide metadata backup) and a lower configuration, Doris can still run smoothly.** +* **If FE and BE are mixed, we should pay attention to resource competition and ensure that metadata catalogue and data catalogue belong to different disks.** + +#### Broker deployment + +Broker is a process for accessing external data sources, such as hdfs. Usually, a broker instance is deployed on each machine. + +#### Network Requirements + +Doris instances communicate directly over the network. The following table shows all required ports + +| Instance Name | Port Name | Default Port | Communication Direction | Description| +| ---|---|---|---|---| +| BE | be_port | 9060 | FE - > BE | BE for receiving requests from FE| +| BE | be\_rpc_port | 9070 | BE < - > BE | port used by RPC between BE | BE| +| BE | webserver\_port | 8040 | BE <--> BE | BE| +| BE | heartbeat\_service_port | 9050 | FE - > BE | BE on the heart beat service port (thrift), users receive heartbeat from FE| +| BE | brpc\_port* | 8060 | FE < - > BE, BE < - > BE | BE for communication between BEs| +| FE | http_port* | 8030 | FE < - > FE, HTTP server port on user | FE| +| FE | rpc_port | 9020 | BE - > FE, FE < - > FE | thrift server port on FE| +| FE | query_port | 9030 | user | FE| +| FE | edit\_log_port | 9010 | FE <--> FE | FE| +| Broker | broker ipc_port | 8000 | FE - > Broker, BE - > Broker | Broker for receiving requests| + +> Note: +> +> 1. When deploying multiple FE instances, make sure that the http port configuration of FE is the same. +> 2. Make sure that each port has access in its proper direction before deployment. +> 3. brpc port replaced be rpc_port after version 0.8.2 + +#### IP 绑定 + +Because of the existence of multiple network cards, or the existence of virtual network cards caused by the installation of docker and other environments, the same host may have multiple different ips. Currently Doris does not automatically identify available IP. So when you encounter multiple IP on the deployment host, you must force the correct IP to be specified through the priority\_networks configuration item. + +Priority\_networks is a configuration that both FE and BE have, and the configuration items need to be written in fe.conf and be.conf. This configuration item is used to tell the process which IP should be bound when FE or BE starts. Examples are as follows: + +`priority_networks=10.1.3.0/24` + +This is a representation of [CIDR] (https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing). FE or BE will find the matching IP based on this configuration item as their own local IP. + +**Note**: When priority networks is configured and FE or BE is started, only the correct IP binding of FE or BE is ensured. In ADD BACKEND or ADD FRONTEND statements, you also need to specify IP matching priority networks configuration, otherwise the cluster cannot be established. Give an example: + +BE is configured as `priority_networks = 10.1.3.0/24'.` + +但是在 ADD BACKEND 时使用的是:`ALTER SYSTEM ADD BACKEND "192.168.0.1:9050";` + +Then FE and BE will not be able to communicate properly. + +At this point, DROP must remove the BE that added errors and re-use the correct IP to perform ADD BACKEND. + +FE is the same. + +BROKER does not currently have, nor does it need, priority\ networks. Broker's services are bound to 0.0.0 by default. Simply execute the correct accessible BROKER IP when ADD BROKER is used. + +## Cluster deployment + +### Manual deployment + +#### FE 部署 + +* Copy the FE deployment file to the specified node + + Copy the Fe folder under output generated by source code compilation to the node specified deployment path of FE. + +* Configure FE + + 1. The configuration file is conf/fe.conf. Note: `meta_dir`: Metadata storage location. The default is fe/palo-meta/. The directory needs to be **created manually** by. + 2. JAVA_OPTS in fe.conf defaults to a maximum heap memory of 2GB for java, and it is recommended that the production environment be adjusted to more than 8G. + +* Start FE + + `sh bin/start_fe.sh --daemon` + + The FE process starts and enters the background execution. Logs are stored in the fe/log/directory by default. If startup fails, you can view error messages by looking at fe/log/fe.log or fe/log/fe.out. + +* For deployment of multiple FEs, see the section "FE scaling and downsizing" + +#### BE 部署 + +* Copy BE deployment files to all nodes to deploy BE + + Copy the be folder under output generated by source code compilation to the specified deployment path of the BE node. + +* Modify all BE configurations + + Modify be/conf/be.conf. Mainly configure `storage_root_path`: data storage directory, using `;` separation (do not add `;` after the last directory), others can use default values. + +* Add all BE nodes to FE + + BE nodes need to be added in FE before they can join the cluster. You can use mysql-client to connect to FE: + + `./mysql-client -h host -P port -uroot` + + The host is the node IP where FE is located; the port is the query_port in fe/conf/fe.conf; the root account is used by default and no password is used to login. + + After login, execute the following commands to add each BE: + + `ALTER SYSTEM ADD BACKEND "host:port";` + + If the multi-tenant function is used, the following command is executed to add BE: + + `ALTER SYSTEM ADD FREE BACKEND "host:port";` + + The host is the node IP where BE is located; the port is heartbeat_service_port in be/conf/be.conf. + + If the FREE keyword is not added, BE defaults to the automatically generated cluster, and the new BE does not belong to any cluster after adding the FREE keyword, so that when creating a new cluster, it can be selected from these free be, as detailed in [Multi-tenant Design Document] (./administrator-guide/operation/multi-tenant.md) + +* Start BE + + `sh bin/start_be.sh --daemon` + + The BE process will start and go into the background for execution. Logs are stored in be/log/directory by default. If startup fails, you can view error messages by looking at be/log/be.log or be/log/be.out. + +* View BE status + + Connect to FE using mysql-client and execute `SHOW PROC'/ backends'; `View BE operation. If everything is normal, the `isAlive`column should be `true`. + +#### (Optional) FS_Broker deployment + +Broker is deployed as a plug-in, independent of Doris. If you need to import data from a third-party storage system, you need to deploy the corresponding Broker. By default, it provides fs_broker to read HDFS and Baidu cloud BOS. Fs_broker is stateless and it is recommended that each FE and BE node deploy a Broker. + +* Copy the corresponding Broker directory in the output directory of the source fs_broker to all the nodes that need to be deployed. It is recommended to maintain the same level as the BE or FE directories. + +* Modify the corresponding Broker configuration + + In the corresponding broker/conf/directory configuration file, you can modify the corresponding configuration. + +* Start Broker + + `sh bin /start'u broker.sh --daemon ` start Broker + +* Add Broker + + To let Doris FE and BE know which nodes Broker is on, add a list of Broker nodes by SQL command. + + Use mysql-client to connect the FE started, and execute the following commands: + + `ALTER SYSTEM ADD BROKER broker_name "host1:port1","host2:port2",...;` + + The host is Broker's node ip; the port is brokeripcport in the Broker configuration file. + +* View Broker status + + Connect any booted FE using mysql-client and execute the following command to view Broker status: `SHOW PROC '/brokers';` + +**Note: In production environments, daemons should be used to start all instances to ensure that processes are automatically pulled up after they exit, such as [Supervisor] (http://supervisord.org/). For daemon startup, in 0.9.0 and previous versions, you need to modify the start_xx.sh scripts to remove the last & symbol**. Starting with version 0.10.0, call `sh start_xx.sh` directly to start. Also refer to [here] (https://www.cnblogs.com/lenmom/p/9973401.html) + +## Expansion and contraction + +Doris can easily expand and shrink FE, BE, Broker instances. + +### FE Expansion and Compression + +High availability of FE can be achieved by expanding FE to three top-one nodes. + +Users can login to Master FE through MySQL client. By: + +`SHOW PROC '/frontends';` + +To view the current FE node situation. + +You can also view the FE node through the front-end page connection: ``http://fe_hostname: fe_http_port/frontend`` or ```http://fe_hostname: fe_http_port/system? Path=//frontends```. + +All of the above methods require Doris's root user rights. + +The process of FE node expansion and contraction does not affect the current system operation. + +#### Adding FE nodes + +FE is divided into three roles: Leader, Follower and Observer. By default, a cluster can have only one Leader and multiple Followers and Observers. Leader and Follower form a Paxos selection group. If the Leader goes down, the remaining Followers will automatically select a new Leader to ensure high write availability. Observer synchronizes Leader data, but does not participate in the election. If only one FE is deployed, FE defaults to Leader. + +The first FE to start automatically becomes Leader. On this basis, several Followers and Observers can be added. + +Add Follower or Observer. Connect to the started FE using mysql-client and execute: + +`ALTER SYSTEM ADD FOLLOWER "host:port";` + +or + +`ALTER SYSTEM ADD OBSERVER "host:port";` + +The host is the node IP of Follower or Observer, and the port is edit\_log\_port in its configuration file fe.conf. + +Configure and start Follower or Observer. Follower and Observer are configured with Leader. The following commands need to be executed at the first startup: + +`./bin/start_fe.sh --helper host:port --daemon` + +View the status of Follower or Observer. Connect to any booted FE using mysql-client and execute: SHOW PROC'/frontends'; you can view the FE currently joined the cluster and its corresponding roles. + +> Notes for FE expansion: +> +> 1. The number of Follower FEs (including Leaders) must be odd. It is recommended that a maximum of three constituent high availability (HA) modes be deployed. +> 2. When FE is in a highly available deployment (1 Leader, 2 Follower), we recommend that the reading service capability of FE be extended by adding Observer FE. Of course, you can continue to add Follower FE, but it's almost unnecessary. +> 3. Usually a FE node can handle 10-20 BE nodes. It is suggested that the total number of FE nodes should be less than 10. Usually three can meet most of the needs. + +#### Delete FE nodes + +Delete the corresponding FE node using the following command: + +```ALTER SYSTEM DROP FOLLOWER[OBSERVER] "fe_host:edit_log_port";``` + +> Notes for FE contraction: +> +> 1. When deleting Follower FE, make sure that the remaining Follower (including Leader) nodes are odd. + +### BE Expansion and Compression + +Users can login to Leader FE through mysql-client. By: + +```SHOW PROC '/backends';``` + +To see the current BE node situation. + +You can also view the BE node through the front-end page connection: ``http://fe_hostname: fe_http_port/backend`` or ``http://fe_hostname: fe_http_port/system? Path=//backends``. + +All of the above methods require Doris's root user rights. + +The expansion and scaling process of BE nodes does not affect the current system operation and the tasks being performed, and does not affect the performance of the current system. Data balancing is done automatically. Depending on the amount of data available in the cluster, the cluster will be restored to load balancing in a few hours to a day. For cluster load, see the [Tablet Load Balancing Document] (../administrator-guide/operation/tablet-repair-and-balance.md). + +#### 增加 BE 节点 + +The BE node is added in the same way as in the **BE deployment** section. The BE node is added by the `ALTER SYSTEM ADD BACKEND` command. + +> Notes for BE expansion: +> +> 1. After BE expansion, Doris will automatically balance the data according to the load, without affecting the use during the period. + +#### Delete BE nodes + +There are two ways to delete BE nodes: DROP and DECOMMISSION + +The DROP statement is as follows: + +```ALTER SYSTEM DROP BACKEND "be_host:be_heartbeat_service_port";``` + +**Note: DROP BACKEND will delete the BE directly and the data on it will not be recovered!!! So we strongly do not recommend DROP BACKEND to delete BE nodes. When you use this statement, there will be corresponding error-proof operation hints.** + +DECOMMISSION 语句如下: + +```ALTER SYSTEM DECOMMISSION BACKEND "be_host:be_heartbeat_service_port";``` + +> DECOMMISSION 命令说明: +> +> 1. This command is used to safely delete BE nodes. After the command is issued, Doris attempts to migrate the data on the BE to other BE nodes, and when all data is migrated, Doris automatically deletes the node. +> 2. The command is an asynchronous operation. After execution, you can see that the BE node's isDecommission status is true through ``SHOW PROC '/backends';` Indicates that the node is offline. +> 3. The order **does not necessarily carry out successfully**. For example, when the remaining BE storage space is insufficient to accommodate the data on the offline BE, or when the number of remaining machines does not meet the minimum number of replicas, the command cannot be completed, and the BE will always be in the state of isDecommission as true. +> 4. The progress of DECOMMISSION can be viewed through `SHOW PROC '/backends';` Tablet Num, and if it is in progress, Tablet Num will continue to decrease. +> 5. The operation can be carried out by: +> ```CANCEL ALTER SYSTEM DECOMMISSION BACKEND "be_host:be_heartbeat_service_port";``` +> The order was cancelled. When cancelled, the data on the BE will maintain the current amount of data remaining. Follow-up Doris re-load balancing + +**For expansion and scaling of BE nodes in multi-tenant deployment environments, please refer to the [Multi-tenant Design Document] (./administrator-guide/operation/multi-tenant.md).** + +### Broker Expansion and Shrinkage + +There is no rigid requirement for the number of Broker instances. Usually one physical machine is deployed. Broker addition and deletion can be accomplished by following commands: + +```ALTER SYSTEM ADD BROKER broker_name "broker_host:broker_ipc_port";``` +```ALTER SYSTEM DROP BROKER broker_name "broker_host:broker_ipc_port";``` +```ALTER SYSTEM DROP ALL BROKER broker_name;``` + +Broker is a stateless process that can be started or stopped at will. Of course, when it stops, the job running on it will fail. Just try again. + +## Common Questions + +### Process correlation + +1. How to determine the success of FE process startup + + After the FE process starts, metadata is loaded first. According to the different roles of FE, you can see ```transfer from UNKNOWN to MASTER/FOLLOWER/OBSERVER```in the log. Eventually, you will see the ``thrift server started`` log and connect to FE through MySQL client, which indicates that FE started successfully. + + You can also check whether the startup was successful by connecting as follows: + + `http://fe_host:fe_http_port/api/bootstrap` + + If returned: + + `{"status":"OK","msg":"Success"}` + + The startup is successful, there may be problems in other cases. + + > Note: If you can't see the information of boot failure in fe. log, you may see it in fe. out. + +2. How to determine the success of BE process startup + + After the BE process starts, if there is data before, there may be several minutes of data index loading time. + + If BE is started for the first time or the BE has not joined any cluster, the BE log will periodically scroll the words `waiting to receive first heartbeat from frontend`. BE has not received Master's address through FE's heartbeat and is waiting passively. This error log will disappear after ADD BACKEND in FE sends the heartbeat. If the word `````master client', get client from cache failed. host:, port: 0, code: 7````` master client'appears again after receiving heartbeat, it indicates that FE has successfully connected BE, but BE cannot actively connect FE. It may be necessary to check the connectivity of rpc_port from BE to FE. + + If BE has been added to the cluster, the heartbeat log from FE should be scrolled every five seconds: ```get heartbeat, host:xx. xx.xx.xx, port:9020, cluster id:xxxxxxx```, indicating that the heartbeat is normal. + + Secondly, the word `finish report task success. return code: 0` should be scrolled every 10 seconds in the log to indicate that BE's communication to FE is normal. + + At the same time, if there is a data query, you should see the rolling logs, and have `execute time is xxx` logs, indicating that BE started successfully, and the query is normal. + + You can also check whether the startup was successful by connecting as follows: + + `http://be_host:be_http_port/api/health` + + If returned: + + `{"status": "OK","msg": "To Be Added"}` + + If the startup is successful, there may be problems in other cases. + + > Note: If you can't see the information of boot failure in be.INFO, you may see it in be.out. + +3. How to determine the normal connectivity of FE and BE after building the system + + Firstly, confirm that FE and BE processes have been started separately and normally, and confirm that all nodes have been added through `ADD BACKEND` or `ADD FOLLOWER/OBSERVER` statements. + + If the heartbeat is normal, BE logs will show ``get heartbeat, host:xx.xx.xx.xx, port:9020, cluster id:xxxxx`` If the heartbeat fails, the words ```backend [10001] get Exception: org.apache.thrift.transport.TTransportException``` will appear in FE's log, or other thrift communication abnormal log, indicating that the heartbeat fails from FE to 10001 BE. Here you need to check the connectivity of FE to BE host's heart-beating port. + + If BE's communication to FE is normal, the BE log will display the words `finish report task success. return code: 0`. Otherwise, the words `master client`, get client from cache failed` will appear. In this case, the connectivity of BE to the rpc_port of FE needs to be checked. + +4. Doris Node Authentication Mechanism + + In addition to Master FE, the other role nodes (Follower FE, Observer FE, Backend) need to register to the cluster through the `ALTER SYSTEM ADD` statement before joining the cluster. + + When Master FE is first started, a cluster_id is generated in the palo-meta/image/VERSION file. + + When FE first joins the cluster, it first retrieves the file from Master FE. Each subsequent reconnection between FEs (FE reboot) checks whether its cluster ID is the same as that of other existing FEs. If different, the FE will exit automatically. + + When BE first receives the heartbeat of Master FE, it gets the cluster ID from the heartbeat and records it in the `cluster_id` file of the data directory. Each heartbeat after that compares to the cluster ID sent by FE. If cluster IDs are not equal, BE will refuse to respond to FE's heartbeat. + + The heartbeat also contains Master FE's ip. When FE cuts the master, the new Master FE will carry its own IP to send the heartbeat to BE, BE will update its own saved Master FE ip. + + > **priority\_network** + > + > priority network is that both FE and BE have a configuration. Its main purpose is to assist FE or BE to identify their own IP addresses in the case of multi-network cards. Priority network is represented by CIDR: [RFC 4632] (https://tools.ietf.org/html/rfc4632) + > + > When the connectivity of FE and BE is confirmed to be normal, if the table Timeout still occurs, and the FE log has an error message with the words `backend does not find. host:xxxx.xxx.XXXX`. This means that there is a problem with the IP address that Doris automatically identifies and that priority\_network parameters need to be set manually. + > + > The main reason for this problem is that when the user adds BE through the `ADD BACKEND` statement, FE recognizes whether the statement specifies hostname or IP. If it is hostname, FE automatically converts it to an IP address and stores it in metadata. When BE reports on the completion of the task, it carries its own IP address. If FE finds that BE reports inconsistent IP addresses and metadata, it will make the above error. + > + > Solutions to this error: 1) Set **priority\_network** parameters in FE and BE respectively. Usually FE and BE are in a network segment, so this parameter can be set to the same. 2) Fill in the `ADD BACKEND` statement directly with the correct IP address of BE instead of hostname to avoid FE getting the wrong IP address. diff --git a/docs/documentation/en/installing/upgrade.md b/docs/documentation/en/installing/upgrade.md new file mode 100644 index 00000000000000..44456c11e1a699 --- /dev/null +++ b/docs/documentation/en/installing/upgrade.md @@ -0,0 +1,37 @@ +# 集群升级 + +Doris 可以通过滚动升级的方式,平滑进行升级。建议按照以下步骤进行安全升级。 + +> 注: +> 1. 以下方式均建立在高可用部署的情况下。即数据 3 副本,FE 高可用情况下。 + +## 测试 BE 升级正确性 + +1. 任意选择一个 BE 节点,部署最新的 palo_be 二进制文件。 +2. 重启 BE 节点,通过 BE 日志 be.INFO,查看是否启动成功。 +3. 如果启动失败,可以先排查原因。如果错误不可恢复,可以直接通过 DROP BACKEND 删除该 BE、清理数据后,使用上一个版本的 palo_be 重新启动 BE。然后重新 ADD BACKEND。(**该方法会导致丢失一个数据副本,请务必确保3副本完整的情况下,执行这个操作!!!**) + +## 测试 FE 元数据兼容性 + +0. **重要!!元数据兼容性异常很可能导致数据无法恢复!!** +1. 单独使用新版本部署一个测试用的 FE 进程(比如自己本地的开发机)。 +2. 修改测试用的 FE 的配置文件 fe.conf,将所有端口设置为**与线上不同**。 +3. 在 fe.conf 添加配置:cluster_id=123456 +4. 在 fe.conf 添加配置:metadata\_failure_recovery=true +5. 拷贝线上环境 Master FE 的元数据目录 palo-meta 到测试环境 +6. 将拷贝到测试环境中的 palo-meta/image/VERSION 文件中的 cluster_id 修改为 123456(即与第3步中相同) +7. 在测试环境中,运行 sh bin/start_fe.sh 启动 FE +8. 通过 FE 日志 fe.log 观察是否启动成功。 +9. 如果启动成功,运行 sh bin/stop_fe.sh 停止测试环境的 FE 进程。 +10. **以上 2-6 步的目的是防止测试环境的FE启动后,错误连接到线上环境中。** + +## 升级准备 + +1. 在完成数据正确性验证后,将 BE 和 FE 新版本的二进制文件分发到各自目录下。 +2. 通常小版本升级,BE 只需升级 palo_be;而 FE 只需升级 palo-fe.jar。如果是大版本升级,则可能需要升级其他文件(包括但不限于 bin/ lib/ 等等)如果你不清楚是否需要替换其他文件,建议全部替换。 + +## 滚动升级 + +1. 确认新版本的文件部署完成后。逐台重启 FE 和 BE 实例即可。 +2. 建议逐台重启 BE 后,再逐台重启 FE。因为通常 Doris 保证 FE 到 BE 的向后兼容性,即老版本的 FE 可以访问新版本的 BE。但可能不支持老版本的 BE 访问新版本的 FE。 +3. 建议确认前一个实例启动成功后,在重启下一个实例。实例启动成功的标识,请参阅安装部署文档。 diff --git a/docs/documentation/en/installing/upgrade_EN.md b/docs/documentation/en/installing/upgrade_EN.md new file mode 100644 index 00000000000000..84516105562cb0 --- /dev/null +++ b/docs/documentation/en/installing/upgrade_EN.md @@ -0,0 +1,37 @@ +# Cluster upgrade + +Doris can upgrade smoothly by rolling upgrades. The following steps are recommended for security upgrade. + +> Note: +> 1. The following approaches are based on highly available deployments. That is, data 3 replicas, FE high availability. + +## Test the correctness of BE upgrade + +1. Arbitrarily select a BE node and deploy the latest palo_be binary file. +2. Restart the BE node and check the BE log be.INFO to see if the boot was successful. +3. If the startup fails, you can check the reason first. If the error is not recoverable, you can delete the BE directly through DROP BACKEND, clean up the data, and restart the BE using the previous version of palo_be. Then re-ADD BACKEND. (**This method will result in the loss of a copy of the data, please make sure that three copies are complete, and perform this operation!!!** + +## Testing FE Metadata Compatibility + +0. **Important! Exceptional metadata compatibility is likely to cause data can not be restored!!** +1. Deploy a test FE process (such as your own local developer) using the new version alone. +2. Modify the FE configuration file fe.conf for testing and set all ports to **different from online**. +3. Add configuration in fe.conf: cluster_id=123456 +4. Add the configuration in fe.conf: metadatafailure_recovery=true +5. Copy the metadata directory palo-meta of the online environment Master FE to the test environment +6. Modify the cluster_id in the palo-meta/image/VERSION file copied into the test environment to 123456 (that is, the same as in Step 3) +7. "27979;" "35797;" "3681616;" sh bin /start fe.sh "21551;" FE +8. Observe whether the start-up is successful through FE log fe.log. +9. If the startup is successful, run sh bin/stop_fe.sh to stop the FE process of the test environment. +10. **The purpose of the above 2-6 steps is to prevent the FE of the test environment from being misconnected to the online environment after it starts.** + +## Upgrade preparation + +1. After data validation, the new version of BE and FE binary files are distributed to their respective directories. +2. Usually small version upgrade, BE only needs to upgrade palo_be; FE only needs to upgrade palo-fe.jar. If it is a large version upgrade, you may need to upgrade other files (including but not limited to bin / lib / etc.) If you are not sure whether you need to replace other files, it is recommended to replace all of them. + +## rolling upgrade + +1. Confirm that the new version of the file is deployed. Restart FE and BE instances one by one. +2. It is suggested that BE be restarted one by one and FE be restarted one by one. Because Doris usually guarantees backward compatibility between FE and BE, that is, the old version of FE can access the new version of BE. However, the old version of BE may not be supported to access the new version of FE. +3. It is recommended to restart the next instance after confirming that the previous instance started successfully. Refer to the Installation Deployment Document for the identification of successful instance startup. diff --git a/docs/documentation/en/internal/doris_storage_optimization_EN.md b/docs/documentation/en/internal/doris_storage_optimization_EN.md new file mode 100644 index 00000000000000..ef7721e8dcfe63 --- /dev/null +++ b/docs/documentation/en/internal/doris_storage_optimization_EN.md @@ -0,0 +1,206 @@ +# Doris Storage File Format Optimization # + +## File format ## + +![](../../../resources/images/segment_v2.png) +
1. doris segment
+ +Documents include: +- The file starts with an 8-byte magic code to identify the file format and version +- Data Region: Used to store data information for each column, where the data is loaded on demand by pages. +- Index Region: Doris stores the index data of each column in Index Region, where the data is loaded according to column granularity, so the data information of the following column is stored separately. +- Footer信息 + - FileFooterPB: Metadata Information for Definition Files + - Chesum of 4 bytes of footer Pb content + - Four bytes FileFooterPB message length for reading FileFooterPB + - The 8 byte MAGIC CODE is stored in the last bit to facilitate the identification of file types in different scenarios. + +The data in the file is organized in the form of page, which is the basic unit of coding and compression. Current page types include the following: + +### DataPage ### + +Data Page is divided into two types: nullable and non-nullable data pages. + +Nullable's data page includes: +``` + + +----------------+ + | value count | + |----------------| + | first row id | + |----------------| + | bitmap length | + |----------------| + | null bitmap | + |----------------| + | data | + |----------------| + | checksum | + +----------------+ +``` + +non -zero data page32467;- 26500;- 229140;- + +``` + |----------------| + | value count | + |----------------| + | first row id | + |----------------| + | data | + |----------------| + | checksum | + +----------------+ +``` + +The meanings of each field are as follows: + +- value count + - Represents the number of rows in a page +- First row id + - Line number of the first line in page +- bitmap length + - Represents the number of bytes in the next bitmap +- null bitmap + - bitmap representing null information +- Data + - Store data after encoding and compress + - You need to write in the header information of the data: is_compressed + - Various kinds of data encoded by different codes need to write some field information in the header information in order to achieve data parsing. + - TODO: Add header information for various encodings +- Checksum + - Store page granularity checksum, including page header and subsequent actual data + + +### Bloom Filter Pages ### + +For each bloom filter column, a page of the bloom filter is generated corresponding to the granularity of the page and saved in the bloom filter pages area. + +### Ordinal Index Page ### + +For each column, a sparse index of row numbers is established according to page granularity. The content is a pointer to the block (including offset and length) for the line number of the start line of the page + +### Short Key Index page ### + +We generate a sparse index of short key every N rows (configurable) with the contents of short key - > line number (ordinal) + +### Column's other indexes### + +The format design supports the subsequent expansion of other index information, such as bitmap index, spatial index, etc. It only needs to write the required data to the existing column data, and add the corresponding metadata fields to FileFooterPB. + +### Metadata Definition### +FileFooterPB is defined as: + +``` +message ColumnPB { + optional uint32 column_id = 1; // 这里使用column id,不使用column name是因为计划支持修改列名 + optional string type = 2; // 列类型 + optional string aggregation = 3; // 是否聚合 + optional uint32 length = 4; // 长度 + optional bool is_key = 5; // 是否是主键列 + optional string default_value = 6; // 默认值 + optional uint32 precision = 9 [default = 27]; // 精度 + optional uint32 frac = 10 [default = 9]; + optional bool is_nullable = 11 [default=false]; // 是否有null + optional bool is_bf_column = 15 [default=false]; // 是否有bf词典 + optional bool is_bitmap_column = 16 [default=false]; // 是否有bitmap索引 +} + +// page偏移 +message PagePointerPB { + required uint64 offset; // page在文件中的偏移 + required uint32 length; // page的大小 +} + +message MetadataPairPB { + optional string key = 1; + optional bytes value = 2; +} + +message ColumnMetaPB { + optional ColumnMessage encoding; // 编码方式 + + optional PagePointerPB dict_page // 词典page + repeated PagePointerPB bloom_filter_pages; // bloom filter词典信息 + optional PagePointerPB ordinal_index_page; // 行号索引数据 + optional PagePointerPB page_zone_map_page; // page级别统计信息索引数据 + + optional PagePointerPB bitmap_index_page; // bitmap索引数据 + + optional uint64 data_footprint; // 列中索引的大小 + optional uint64 index_footprint; // 列中数据的大小 + optional uint64 raw_data_footprint; // 原始列数据大小 + + optional CompressKind compress_kind; // 列的压缩方式 + + optional ZoneMapPB column_zone_map; //文件级别的过滤条件 + repeated MetadataPairPB column_meta_datas; +} + +message FileFooterPB { + optional uint32 version = 2 [default = 1]; // 用于版本兼容和升级使用 + repeated ColumnPB schema = 5; // 列Schema + optional uint64 num_values = 4; // 文件中保存的行数 + optional uint64 index_footprint = 7; // 索引大小 + optional uint64 data_footprint = 8; // 数据大小 + optional uint64 raw_data_footprint = 8; // 原始数据大小 + + optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式 + repeated ColumnMetaPB column_metas = 10; // 列元数据 + optional PagePointerPB key_index_page; // short key索引page +} + +``` + +## Read-write logic## + +### Write ### + +The general writing process is as follows: +1. Write magic +2. Generate corresponding Column Writer according to schema information. Each Column Writer obtains corresponding encoding information (configurable) according to different types, and generates corresponding encoder according to encoding. +3. Call encoder - > add (value) for data writing. Each K line generates a short key index entry, and if the current page satisfies certain conditions (the size exceeds 1M or the number of rows is K), a new page is generated and cached in memory. +4. Continuous cycle step 3 until data writing is completed. Brush the data of each column into the file in sequence +5. Generate FileFooterPB information and write it to the file. + +Relevant issues: + +- How does the index of short key be generated? + - Now we still generate a short key sparse index according to how many rows are sparse, and keep a short sparse index generated every 1024 rows. The specific content is: short key - > ordinal + +- What should be stored in the ordinal index? + - Store the first ordinal to page pointer mapping information for pages +- What are stored in pages of different encoding types? + - Dictionary Compression + - plain + - rle + - bshuf + +### Read### + +1. Read the magic of the file and judge the type and version of the file. +2. Read FileFooterPB and check sum +3. Read short key index and data ordinal index information of corresponding columns according to required columns +4. Use start key and end key, locate the row number to be read through short key index, then determine the row ranges to be read through ordinal index, and filter the row ranges to be read through statistics, bitmap index and so on. +5. Then read row data through ordinal index according to row ranges + +Relevant issues: +1. How to quickly locate a row within the page? + + The data inside the page is encoding, so it can not locate the row-level data quickly. Different encoding methods have different schemes for fast line number positioning in-house, which need to be analyzed concretely: + - If it is rle-coded, skip is performed by resolving the head of RLE until the RLE block containing the row is reached, and then the reverse solution is performed. + - binary plain encoding: offset information will be stored in the page, and offset information will be specified in the page header. When reading, offset information will be parsed into the array first, so that you can quickly locate the data of a row of block through offset data information of each row. +2. How to achieve efficient block reading? Consider merging adjacent blocks while they are being read, one-time reading? +This requires judging whether the block is continuous at the time of reading, and if it is continuous, reading it once. + +## Coding## + +In the existing Doris storage, plain encoding is adopted for string type encoding, which is inefficient. After comparison, it is found that in Baidu statistics scenario, data will expand more than twice because of string type coding. Therefore, it is planned to introduce dictionary-based coding compression. + +## Compression## + +It implements a scalable compression framework, supports a variety of compression algorithms, facilitates the subsequent addition of new compression algorithms, and plans to introduce zstd compression. + +## TODO ## +1. How to implement nested types? How to locate line numbers in nested types? +2. How to optimize the downstream bitmap and column statistics statistics caused by ScanRange splitting? diff --git a/docs/documentation/en/internal/index.rst b/docs/documentation/en/internal/index.rst new file mode 100644 index 00000000000000..2d2aee33b2665d --- /dev/null +++ b/docs/documentation/en/internal/index.rst @@ -0,0 +1,8 @@ +=========== +设计文档 +=========== + +.. toctree:: + :glob: + + * diff --git a/docs/documentation/en/internal/metadata-design_EN.md b/docs/documentation/en/internal/metadata-design_EN.md new file mode 100644 index 00000000000000..b6e85bfa47e5ec --- /dev/null +++ b/docs/documentation/en/internal/metadata-design_EN.md @@ -0,0 +1,100 @@ +# Metadata Design Document + +## Noun Interpretation + +* FE: Frontend, the front-end node of Doris. Mainly responsible for receiving and returning client requests, metadata, cluster management, query plan generation and so on. +* BE: Backend, the back-end node of Doris. Mainly responsible for data storage and management, query plan execution and other work. +* bdbje: [Oracle Berkeley DB Java Edition] (http://www.oracle.com/technetwork/database/berkeleydb/overview/index-093405.html). In Doris, we use bdbje to persist metadata operation logs and high availability of FE. + +## Overall architecture +![](../../../resources/images/palo_architecture.jpg) + +As shown above, Doris's overall architecture is divided into two layers. Multiple FEs form the first tier, providing lateral expansion and high availability of FE. Multiple BEs form the second layer, which is responsible for data storage and management. This paper mainly introduces the design and implementation of metadata in FE layer. + +1. FE 节点分为 follower 和 observer 两类。各个 FE 之间,通过 bdbje([BerkeleyDB Java Edition](http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/overview/index-093405.html))进行 leader 选举,数据同步等工作。 + +2. The follower node is elected, and one of the followers becomes the leader node, which is responsible for the writing of metadata. When the leader node goes down, other follower nodes re-elect a leader to ensure high availability of services. + +3. The observer node only synchronizes metadata from the leader node and does not participate in the election. It can be scaled horizontally to provide the extensibility of metadata reading services. + +> Note: The concepts of follower and observer corresponding to bdbje are replica and observer. You may use both names below. + +## Metadata structure + +Doris's metadata is in full memory. A complete metadata image is maintained in each FE memory. Within Baidu, a cluster of 2,500 tables and 1 million fragments (3 million copies) occupies only about 2GB of metadata in memory. (Of course, the memory overhead for querying intermediate objects and various job information needs to be estimated according to the actual situation. However, it still maintains a low memory overhead. + +At the same time, metadata is stored in the memory as a whole in a tree-like hierarchical structure. By adding auxiliary structure, metadata information at all levels can be accessed quickly. + +The following figure shows the contents stored in Doris meta-information. + +![](../../../resources/images/metadata_contents.png) + +As shown above, Doris's metadata mainly stores four types of data: + +1. User data information. Including database, table Schema, fragmentation information, etc. +2. All kinds of job information. For example, import jobs, Clone jobs, SchemaChange jobs, etc. +3. User and permission information. +4. Cluster and node information. + +## Data stream + +![](../../../resources/images/metadata_stream.png) + +The data flow of metadata is as follows: + +1. Only leader FE can write metadata. After modifying leader's memory, the write operation serializes into a log and writes to bdbje in the form of key-value. The key is a continuous integer, and as log id, value is the serialized operation log. + +2. After the log is written to bdbje, bdbje copies the log to other non-leader FE nodes according to the policy (write most/write all). The non-leader FE node modifies its metadata memory image by playback of the log, and completes the synchronization with the metadata of the leader node. + +3. When the number of log bars of the leader node reaches the threshold (default 10W bars), the checkpoint thread is started. Checkpoint reads existing image files and subsequent logs and replays a new mirror copy of metadata in memory. The copy is then written to disk to form a new image. The reason for this is to regenerate a mirror copy instead of writing an existing image to an image, mainly considering that the write operation will be blocked during writing the image plus read lock. So every checkpoint takes up twice as much memory space. + +4. After the image file is generated, the leader node notifies other non-leader nodes that a new image has been generated. Non-leader actively pulls the latest image files through HTTP to replace the old local files. + +5. The logs in bdbje will be deleted regularly after the image is completed. + +## Implementation details + +### Metadata catalogue + +1. The metadata directory is specified by the FE configuration item `meta_dir'. + +2. Data storage directory for bdbje under `bdb/` directory. + +3. The storage directory for image files under the `image/` directory. + +* `Image.[logid]`is the latest image file. The suffix `logid` indicates the ID of the last log contained in the image. +* `Image.ckpt` is the image file being written. If it is successfully written, it will be renamed `image.[logid]` and replaced with the original image file. +* The`cluster_id` is recorded in the `VERSION` file. `Cluster_id` uniquely identifies a Doris cluster. It is a 32-bit integer randomly generated at the first startup of leader. You can also specify a cluster ID through the Fe configuration item `cluster_id'. +* The role of FE itself recorded in the `ROLE` file. There are only `FOLLOWER` and `OBSERVER`. Where `FOLLOWER` denotes FE as an optional node. (Note: Even the leader node has a role of `FOLLOWER`) + +### Start-up process + +1. FE starts for the first time. If the startup script does not add any parameters, it will try to start as leader. You will eventually see `transfer from UNKNOWN to MASTER` in the FE startup log. + +2. FE starts for the first time. If the `-helper` parameter is specified in the startup script and points to the correct leader FE node, the FE first asks the leader node about its role (ROLE) and cluster_id through http. Then pull up the latest image file. After reading image file and generating metadata image, start bdbje and start bdbje log synchronization. After synchronization is completed, the log after image file in bdbje is replayed, and the final metadata image generation is completed. + + > Note 1: When starting with the `-helper` parameter, you need to first add the FE through the leader through the MySQL command, otherwise, the start will report an error. + + > Note 2: `-helper` can point to any follower node, even if it is not leader. + + > Note 3: In the process of synchronization log, the Fe log will show `xxx detached`. At this time, the log pull is in progress, which is a normal phenomenon. + +3. FE is not the first startup. If the startup script does not add any parameters, it will determine its identity according to the ROLE information stored locally. At the same time, according to the cluster information stored in the local bdbje, the leader information is obtained. Then read the local image file and the log in bdbje to complete the metadata image generation. (If the roles recorded in the local ROLE are inconsistent with those recorded in bdbje, an error will be reported.) + +4. FE is not the first boot, and the `-helper` parameter is specified in the boot script. Just like the first process started, the leader role is asked first. But it will be compared with the ROLE stored by itself. If they are inconsistent, they will report errors. + +#### Metadata Read-Write and Synchronization + +1. Users can use Mysql to connect any FE node to read and write metadata. If the connection is a non-leader node, the node forwards the write operation to the leader node. When the leader is successfully written, it returns a current and up-to-date log ID of the leader. Later, the non-leader node waits for the log ID it replays to be larger than the log ID it returns to the client before returning the message that the command succeeds. This approach guarantees Read-Your-Write semantics for any FE node. + + > Note: Some non-write operations are also forwarded to leader for execution. For example, `SHOW LOAD` operation. Because these commands usually need to read the intermediate states of some jobs, which are not written to bdbje, there are no such intermediate states in the memory of the non-leader node. (FE's direct metadata synchronization depends entirely on bdbje's log playback. If a metadata modification operation does not write bdbje's log, the result of the modification of the operation will not be seen in other non-leader nodes.) + +2. The leader node starts a TimePrinter thread. This thread periodically writes a key-value entry for the current time to bdbje. The remaining non-leader nodes read the recorded time in the log by playback and compare it with the local time. If the lag between the local time and the local time is found to be greater than the specified threshold (configuration item: `meta_delay_toleration_second`). If the write interval is half of the configuration item, the node will be in the **unreadable** state. This mechanism solves the problem that non-leader nodes still provide outdated metadata services after a long time of leader disconnection. + +3. The metadata of each FE only guarantees the final consistency. Normally, inconsistent window periods are only milliseconds. We guarantee the monotonous consistency of metadata access in the same session. But if the same client connects different FEs, metadata regression may occur. (But for batch update systems, this problem has little impact.) + +### Downtime recovery + +1. When the leader node goes down, the rest of the followers will immediately elect a new leader node to provide services. +2. Metadata cannot be written when most follower nodes are down. When metadata is not writable, if a write operation request occurs, the current process is that the **FE process exits**. This logic will be optimized in the future, and read services will still be provided in the non-writable state. +3. The downtime of observer node will not affect the state of any other node. It also does not affect metadata reading and writing at other nodes. diff --git a/docs/documentation/en/sql-reference/index.rst b/docs/documentation/en/sql-reference/index.rst new file mode 100644 index 00000000000000..c30bebbc01c5f0 --- /dev/null +++ b/docs/documentation/en/sql-reference/index.rst @@ -0,0 +1,9 @@ +=========== +SQL 手册 +=========== + +.. toctree:: + :hidden: + + sql-functions/index + sql-statements/index diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/avg_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/avg_EN.md new file mode 100644 index 00000000000000..26e0aec307b900 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/avg_EN.md @@ -0,0 +1,31 @@ +'35; AVG +Description +'35;'35;' 35; Syntax + +`AVG([DISTINCT] expr)` + + +Used to return the average value of the selected field + +Optional field DISTINCT parameters can be used to return the weighted average + +'35;'35; example + +``` +mysql> SELECT datetime, AVG(cost_time) FROM log_statis group by datetime; ++---------------------+--------------------+ +"1. Article 124b; Article 124b; Article 124g (`cost u time'); ++---------------------+--------------------+ +| 2019-07-03 21:01:20 | 25.827794561933533 | ++---------------------+--------------------+ + +mysql> SELECT datetime, AVG(distinct cost_time) FROM log_statis group by datetime; ++---------------------+---------------------------+ +| datetime | avg(DISTINCT `cost_time`) | ++---------------------+---------------------------+ +| 2019-07-04 02:23:24 | 20.666666666666668 | ++---------------------+---------------------------+ + +``` +##keyword +AVG diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/count_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/count_EN.md new file mode 100644 index 00000000000000..95681930ebe7f1 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/count_EN.md @@ -0,0 +1,35 @@ +# COUNT +Description +'35;'35;' 35; Syntax + +`COUNT([DISTINCT] expr)` + + +Number of rows used to return the required rows + +'35;'35; example + +``` +MySQL > select count(*) from log_statis group by datetime; ++----------+ +| count(*) | ++----------+ +| 28515903 | ++----------+ + +MySQL > select count(datetime) from log_statis group by datetime; ++-------------------+ +count (`datetime') 1244; ++-------------------+ +| 28521682 | ++-------------------+ + +MySQL > select count(distinct datetime) from log_statis group by datetime; ++-------------------------------+ +| count(DISTINCT `datetime`) | ++-------------------------------+ +| 71045 | ++-------------------------------+ +``` +##keyword +COUNT diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/count_distinct_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/count_distinct_EN.md new file mode 100644 index 00000000000000..fe761721a4d60e --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/count_distinct_EN.md @@ -0,0 +1,21 @@ +# COUNT_DISTINCT +Description +'35;'35;' 35; Syntax + +`COUNT_DISTINCT(expr)` + + +The number of rows used to return the required number, or the number of non-NULL rows + +'35;'35; example + +``` +MySQL > select count_distinct(query_id) from log_statis group by datetime; ++----------------------------+ ++ 124. Calculate (`query `uid') separately; ++----------------------------+ +| 577 | ++----------------------------+ +``` +##keyword +COUNT_DISTINCT,COUNT,DISTINCT diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/hll_union_agg_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/hll_union_agg_EN.md new file mode 100644 index 00000000000000..aac0b31364a66b --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/hll_union_agg_EN.md @@ -0,0 +1,26 @@ +THE EUROPEAN UNION +Description +'35;'35;' 35; Syntax + +'HLL UNION'u AGG (hll)' + + +HLL is an engineering implementation based on HyperLog algorithm, which is used to save the intermediate results of HyperLog calculation process. + +It can only be used as the value column type of the table and reduce the amount of data through aggregation to achieve the purpose of speeding up the query. + +Based on this, we get an estimate with an error of about 1%. The HLL column is generated by other columns or data imported into the data. + +When importing, hll_hash function is used to specify which column in data is used to generate HLL column. It is often used to replace count distinct, and to calculate UV quickly in business by combining rollup. + +'35;'35; example +``` +MySQL > select HLL_UNION_AGG(uv_set) from test_uv;; ++-------------------------+ +THE COURT OF JUSTICE OF THE EUROPEAN COMMUNITIES, ++-------------------------+ +| 17721 | ++-------------------------+ +``` +##keyword +HLL_UNION_AGG,HLL,UNION,AGG diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/index.rst b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/index.rst new file mode 100644 index 00000000000000..10a1dceb92d1d7 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/index.rst @@ -0,0 +1,8 @@ +============= +聚合函数 +============= + +.. toctree:: + :glob: + + * diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/max_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/max_EN.md new file mode 100644 index 00000000000000..c26638790e5447 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/max_EN.md @@ -0,0 +1,20 @@ +# MAX +Description +'35;'35;' 35; Syntax + +'MAX (expr) + + +Returns the maximum value of an expr expression + +'35;'35; example +``` +MySQL > select max(scan_rows) from log_statis group by datetime; ++------------------+ +| max(`scan_rows`) | ++------------------+ +| 4671587 | ++------------------+ +``` +##keyword +MAX diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/min_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/min_EN.md new file mode 100644 index 00000000000000..497162ee895d24 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/min_EN.md @@ -0,0 +1,20 @@ +# MIN +Description +'35;'35;' 35; Syntax + +`MIN(expr)` + + +Returns the minimum value of an expr expression + +'35;'35; example +``` +MySQL > select min(scan_rows) from log_statis group by datetime; ++------------------+ +| min(`scan_rows`) | ++------------------+ +| 0 | ++------------------+ +``` +##keyword +MIN diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/ndv_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/ndv_EN.md new file mode 100644 index 00000000000000..724023d2609435 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/ndv_EN.md @@ -0,0 +1,22 @@ +'35; NDV +Description +'35;'35;' 35; Syntax + +'NDV (expr)' + + +Returns an approximate aggregation function similar to the result of COUNT (DISTINCT col). + +It combines COUNT and DISTINCT faster and uses fixed-size memory, so less memory can be used for columns with high cardinality. + +'35;'35; example +``` +MySQL > select ndv(query_id) from log_statis group by datetime; ++-----------------+ +| ndv(`query_id`) | ++-----------------+ +| 17721 | ++-----------------+ +``` +##keyword +NDV diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/percentile_approx_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/percentile_approx_EN.md new file mode 100644 index 00000000000000..da666749fd5666 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/percentile_approx_EN.md @@ -0,0 +1,21 @@ +# PERCENTILE_APPROX +Description +'35;'35;' 35; Syntax + +`PERCENTILE_APPROX(expr, DOUBLE p)` + + +Return the approximation of the point p, where the value of P is between 0 and 1. + +This function uses fixed size memory, so less memory can be used for columns with high cardinality, and can be used to calculate statistics such as tp99. + +'35;'35; example +``` +MySQL > select `table`, percentile_approx(cost_time,0.99) from log_statis group by `table`; ++---------------------+---------------------------+ +| table | percentile_approx(`cost_time`, 0.99) | ++----------+--------------------------------------+ +| test | 54.22 | ++----------+--------------------------------------+ +##keyword +PERCENTILE_APPROX,PERCENTILE,APPROX diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/stddev_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/stddev_EN.md new file mode 100644 index 00000000000000..63e459e212cefa --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/stddev_EN.md @@ -0,0 +1,27 @@ +# STDDEV,STDDEV_POP +Description +'35;'35;' 35; Syntax + +"`stddev (expl)' + + +Returns the standard deviation of the expr expression + +'35;'35; example +``` +MySQL > select stddev(scan_rows) from log_statis group by datetime; ++---------------------+ +| stddev(`scan_rows`) | ++---------------------+ +| 2.3736656687790934 | ++---------------------+ + +MySQL > select stddev_pop(scan_rows) from log_statis group by datetime; ++-------------------------+ +| stddev_pop(`scan_rows`) | ++-------------------------+ +| 2.3722760595994914 | ++-------------------------+ +``` +##keyword +STDDEV,STDDEV_POP,POP diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/stddev_samp_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/stddev_samp_EN.md new file mode 100644 index 00000000000000..2309cb49be5364 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/stddev_samp_EN.md @@ -0,0 +1,20 @@ +"35; STDDEV SAMP +Description +'35;'35;' 35; Syntax + +'STDDEV SAMP (expr)' + + +Returns the sample standard deviation of the expr expression + +'35;'35; example +``` +MySQL > select stddev_samp(scan_rows) from log_statis group by datetime; ++--------------------------+ +| stddev_samp(`scan_rows`) | ++--------------------------+ +| 2.372044195280762 | ++--------------------------+ +``` +##keyword +STDDEVu SAMP,STDDEV,SAMP diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/sum_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/sum_EN.md new file mode 100644 index 00000000000000..a0ceb7beb9e85a --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/sum_EN.md @@ -0,0 +1,20 @@ +# SUM +Description +'35;'35;' 35; Syntax + +` Sum (Expr)' + + +Used to return the sum of all values of the selected field + +'35;'35; example +``` +MySQL > select sum(scan_rows) from log_statis group by datetime; ++------------------+ +| sum(`scan_rows`) | ++------------------+ +| 8217360135 | ++------------------+ +``` +##keyword +SUM diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/var_samp_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/var_samp_EN.md new file mode 100644 index 00000000000000..22384650e764da --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/var_samp_EN.md @@ -0,0 +1,19 @@ +"35; VARIANCE SAMP, VARIANCE SAMP +Description +'35;'35;' 35; Syntax + +'VAR SAMP (expr)' + + +Returns the sample variance of the expr expression + +'35;'35; example +``` +MySQL > select var_samp(scan_rows) from log_statis group by datetime; ++-----------------------+ +| var_samp(`scan_rows`) | ++-----------------------+ +| 5.6227132145741789 | ++-----------------------+ +##keyword +VAR SAMP, VARIANCE SAMP,VAR,SAMP,VARIANCE diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/variance_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/variance_EN.md new file mode 100644 index 00000000000000..bc28d18162640b --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/variance_EN.md @@ -0,0 +1,26 @@ +# VARIANCE,VAR_POP,VARIANCE_POP +Description +'35;'35;' 35; Syntax + +`VARIANCE(expr)` + + +Returns the variance of the expr expression + +'35;'35; example +``` +MySQL > select variance(scan_rows) from log_statis group by datetime; ++-----------------------+ +| variance(`scan_rows`) | ++-----------------------+ +| 5.6183332881176211 | ++-----------------------+ + +MySQL > select var_pop(scan_rows) from log_statis group by datetime; ++----------------------+ +| var_pop(`scan_rows`) | ++----------------------+ +| 5.6230744719006163 | ++----------------------+ +##keyword +VARIANCE,VAR_POP,VARIANCE_POP,VAR,POP diff --git a/docs/documentation/en/sql-reference/sql-functions/cast_EN.md b/docs/documentation/en/sql-reference/sql-functions/cast_EN.md new file mode 100644 index 00000000000000..d3e5fb93436a9f --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/cast_EN.md @@ -0,0 +1,56 @@ +'35; CAST +Description + + +``` +cast (input as type) +``` + +'35;'35;' 35; BIGINT type + +'35;'35;' 35; Syntax (BIGINT) + +``` cast (input as BIGINT) ``` + + +Converts input to the specified type + + +Converting the current column input to BIGINT type + +'35;'35; example + +1. Turn constant, or a column in a table + +``` +mysql> select cast (1 as BIGINT); ++-------------------+ +| CAST(1 AS BIGINT) | ++-------------------+ +| 1 | ++-------------------+ +``` + +2. Transferred raw data + +``` +curl --location-trusted -u root: -T ~/user_data/bigint -H "columns: tmp_k1, k1=cast(tmp_k1 as BIGINT)" http://host:port/api/test/bigint/_stream_load +``` + +* Note: In the import, because the original type is String, when the original data with floating point value is cast, the data will be converted to NULL, such as 12.0. Doris is currently not truncating raw data. * + +If you want to force this type of raw data cast to int. Look at the following words: + +``` +curl --location-trusted -u root: -T ~/user_data/bigint -H "columns: tmp_k1, k1=cast(cast(tmp_k1 as DOUBLE) as BIGINT)" http://host:port/api/test/bigint/_stream_load + +mysql> select cast(cast ("11.2" as double) as bigint); ++----------------------------------------+ +| CAST(CAST('11.2' AS DOUBLE) AS BIGINT) | ++----------------------------------------+ +| 11 | ++----------------------------------------+ +1 row in set (0.00 sec) +``` +##keyword +CAST diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/current_timestamp_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/current_timestamp_EN.md new file mode 100644 index 00000000000000..54b16e35768ec1 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/current_timestamp_EN.md @@ -0,0 +1,20 @@ +# current_timestamp +Description +'35;'35;' 35; Syntax + +`DATETIME CURRENT_TIMESTAMP()` + + +Get the current time and return it in Datetime type + +'35;'35; example + +``` +mysql> select current_timestamp(); ++---------------------+ +| current_timestamp() | ++---------------------+ +| 2019-05-27 15:59:33 | ++---------------------+ +##keyword +CURRENT_TIMESTAMP,CURRENT,TIMESTAMP diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_add_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_add_EN.md new file mode 100644 index 00000000000000..fbaa7bd1463124 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_add_EN.md @@ -0,0 +1,26 @@ +Date 35; Date Add +Description +'35;'35;' 35; Syntax + +`INT DATE_ADD(DATETIME date,INTERVAL expr type)` + + +Adds a specified time interval to the date. + +The date parameter is a valid date expression. + +The expr parameter is the interval you want to add. + +Sweet, sweet, sweet + +'35;'35; example + +``` +mysql > select date to add ('2010 -11 -30 23:59:59', INTERVAL 2 DAY); ++-------------------------------------------------+ ++ 124; Date = U Add (= 2010-11-30 23:59', interval 2 days) ++-------------------------------------------------+ +| 2010-12-02 23:59:59 | ++-------------------------------------------------+ +##keyword +DATE_ADD,DATE,ADD diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_format_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_format_EN.md new file mode 100644 index 00000000000000..a3bf2a62e31f8c --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_format_EN.md @@ -0,0 +1,123 @@ +# date_format +Description +'35;'35;' 35; Syntax + +'WARCHAR DATE'U FORMAT (DATETIME DATE, WARCHAR Format)' + + +Convert the date type to a bit string according to the format type. +Currently supports a string with a maximum 128 bytes and returns NULL if the length of the return value exceeds 128 + +The date parameter is the valid date. Format specifies the date/time output format. + +The formats available are: + +% a | Abbreviation for Sunday Name + +% B | Abbreviated Monthly Name + +% C | Month, numerical value + +% D | Sky in the Moon with English Prefix + +% d | Monthly day, numerical value (00-31) + +% e | Monthly day, numerical value (0-31) + +% f | microseconds + +% H | Hours (00-23) + +% h | hour (01-12) + +% I | Hours (01-12) + +% I | min, numerical value (00-59) + +% J | Days of Year (001-366) + +% k | hours (0-23) + +% L | Hours (1-12) + +% M | Moon Name + +% m | month, numerical value (00-12) + +%p%124; AM%25110PM + +% R | Time, 12 - hour (hh: mm: SS AM or PM) + +% S | seconds (00-59) + +% s | seconds (00-59) + +% T | Time, 24 - hour (hh: mm: ss) + +% U | Week (00-53) Sunday is the first day of the week + +% U | Week (00 - 53) Monday is the first day of the week + +% V | Week (01-53) Sunday is the first day of the week, and% X is used. + +% v | Week (01 - 53) Monday is the first day of the week, and% x is used + +% W | Sunday + +% w | Weekly day (0 = Sunday, 6 = Saturday) + +% X | Year, where Sunday is the first day of the week, 4 places, and% V use + +% X | year, of which Monday is the first day of the week, 4 places, and% V + +% Y | Year, 4 + +% Y | Year, 2 + +'35;'35; example + +``` +mysql > select date'u format ('2009 -10 -04 22:23:00','%W%M%Y'); ++------------------------------------------------+ ++ 124; Date = UFormat (-2009-10-04 22:23:00', w%M%Y); ++------------------------------------------------+ +| Sunday October 2009 | ++------------------------------------------------+ + +mysql > select date'u format ('2007 -10 -04 22:23:00','%H:%i:%s'); ++------------------------------------------------+ ++ 124; Date = UFormat (-2007-10-04 22:23:00', H:% I:% s)); ++------------------------------------------------+ +| 22:23:00 | ++------------------------------------------------+ + +mysql > select date'u format ('1900 -10 -04 22:23:00','%D%y%a%d%m%b%j'); ++------------------------------------------------------------+ ++ 124; Date = UFormat (+1900-10-04 22:23:00',%Y%A%D%M%B%J) ++------------------------------------------------------------+ ++ 124; 4th 00 THU 04 10 Oct 277; ++------------------------------------------------------------+ + +mysql > select date'u format ('1997 -10 -04 22:23:00','%H%k%I%r%T%S%w'); ++------------------------------------------------------------+ ++ 124; Date = UFormat ("1997-10-04 22:23:00",%H%K%I%R%T%S%W") = 124; ++------------------------------------------------------------+ +22,22,10,23:00 PM 22:23:00 00,6,1244; ++------------------------------------------------------------+ + +mysql > select date'u format ('1999 -01 -01 00:00','%X%V'); ++---------------------------------------------+ +Date of format ('1999 -01 -01 00:00','%X%V') ++---------------------------------------------+ +| 1998 52 | ++---------------------------------------------+ + +mysql> select date_format('2006-06-01', '%d'); ++------------------------------------------+ +date (2006 -06 -01 00:00','%d') 124date; ++------------------------------------------+ +| 01 | ++------------------------------------------+ +``` +##keyword +DATE_FORMAT,DATE,FORMAT diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_sub_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_sub_EN.md new file mode 100644 index 00000000000000..b794857073d2a7 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_sub_EN.md @@ -0,0 +1,26 @@ +3500; date sub +Description +'35;'35;' 35; Syntax + +`INT DATE_SUB(DATETIME date,INTERVAL expr type)` + + +Subtract the specified time interval from the date + +The date parameter is a valid date expression. + +The expr parameter is the interval you want to add. + +Sweet, sweet, sweet + +'35;'35; example + +``` +mysql > select date sub ('2010 -11 -30 23:59:59', INTERVAL 2 DAY); ++-------------------------------------------------+ ++ 124; date = USub (2010-11-30 23:59', interval 2 days); ++-------------------------------------------------+ +| 2010-11-28 23:59:59 | ++-------------------------------------------------+ +##keyword +Date, date, date diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/datediff_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/datediff_EN.md new file mode 100644 index 00000000000000..a085950c5adad6 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/datediff_EN.md @@ -0,0 +1,32 @@ +# datediff +Description +'35;'35;' 35; Syntax + +'DATETIME DATEDIFF (DATETIME expr1,DATETIME expr2)' + + +Expr1 - expr2 is calculated and the result is accurate to the sky. + +Expr1 and expr2 parameters are valid date or date/time expressions. + +Note: Only the date part of the value participates in the calculation. + +'35;'35; example + +``` +MySQL > select DateDiff (CAST ('2007 -12 -31 23:59:59 'AS DATETIME), CAST (2007 -12 -30' AS DATETIME)); ++-----------------------------------------------------------------------------------+ +;datediff (CAST ('2007 -12 -31 23:59:59 'AS DATETIME), CAST ('2007 -12 -30' THE DATETIME)) 124; ++-----------------------------------------------------------------------------------+ +| 1 | ++-----------------------------------------------------------------------------------+ + +mysql > select datediff (CAST ('2010 -11 -30 23:59:59 'AS DATETIME), CAST ('2010 -12 -31' AS DATETIME)); ++-----------------------------------------------------------------------------------+ +124th; DateDiff (CAST ('2010 -11 -30 23:59:59 'AS DATETIME), CAST ('2010 -12 -31' THE DATETIME)) ++-----------------------------------------------------------------------------------+ +| -31 | ++-----------------------------------------------------------------------------------+ +``` +##keyword +DATEDIFF diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/day_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/day_EN.md new file mode 100644 index 00000000000000..048abeb71dccd6 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/day_EN.md @@ -0,0 +1,22 @@ +# day +Description +'35;'35;' 35; Syntax + +`INT DAY(DATETIME date)` + + +Get the day information in the date, and return values range from 1 to 31. + +The parameter is Date or Datetime type + +'35;'35; example + +``` +mysql> select day('1987-01-31'); ++----------------------------+ +Day ('1987 -01 -31 00:00:00') 124day; ++----------------------------+ +| 31 | ++----------------------------+ +##keyword +DAY diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/dayname_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/dayname_EN.md new file mode 100644 index 00000000000000..e363906ff55c38 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/dayname_EN.md @@ -0,0 +1,22 @@ +# dayname +Description +'35;'35;' 35; Syntax + +'VARCHAR DAYNAME (DATE)' + + +Date name corresponding to return date + +The parameter is Date or Datetime type + +'35;'35; example + +``` +mysql > select dayname ('2007 -02 -03 00:00'); ++--------------------------------+ +Dayname ('2007 -02 -03 00:00:00') 124name; ++--------------------------------+ +| Saturday | ++--------------------------------+ +##keyword +DAYNAME diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/dayofmonth_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/dayofmonth_EN.md new file mode 100644 index 00000000000000..f683c4f9b0e2fd --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/dayofmonth_EN.md @@ -0,0 +1,22 @@ +Dayofmonth +Description +'35;'35;' 35; Syntax + +'INT DAYOFMONTH (DATETIME date)' + + +Get the day information in the date, and return values range from 1 to 31. + +The parameter is Date or Datetime type + +'35;'35; example + +``` +mysql> select dayofmonth('1987-01-31'); ++-----------------------------------+ +Dayofmonth ('1987 -01 -31 00:00') 1244; ++-----------------------------------+ +| 31 | ++-----------------------------------+ +##keyword +DAYOFMONTH diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/dayofweek_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/dayofweek_EN.md new file mode 100644 index 00000000000000..f543f1e80cf210 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/dayofweek_EN.md @@ -0,0 +1,22 @@ +# dayofweek +Description +'35;'35;' 35; Syntax + +INT DayOfWeek (DATETIME date) + + +The DAYOFWEEK function returns the index value of the working day of the date, that is, 1 on Sunday, 2 on Monday, and 7 on Saturday. + +The parameter is Date or Datetime type + +'35;'35; example + +``` +mysql> select dayofweek('2019-06-25'); ++----------------------------------+ +Dayofweek ('2019 -06 -25 00:00:00') 124s; ++----------------------------------+ +| 3 | ++----------------------------------+ +##keyword +DAYOFWEEK diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/dayofyear_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/dayofyear_EN.md new file mode 100644 index 00000000000000..5124c831561719 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/dayofyear_EN.md @@ -0,0 +1,22 @@ +3500; Dayofyear +Description +'35;'35;' 35; Syntax + +'INT DAYOFYEAR (DATETIME date)' + + +The date of acquisition is the date of the corresponding year. + +The parameter is Date or Datetime type + +'35;'35; example + +``` +mysql > select dayofyear ('2007 -02 -03 00:00'); ++----------------------------------+ +Dayofyear ('2007 -02 -03 00:00') 124year; ++----------------------------------+ +| 34 | ++----------------------------------+ +##keyword +DAYOFYEAR diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/from_days_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/from_days_EN.md new file mode 100644 index 00000000000000..45f23313e39d8a --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/from_days_EN.md @@ -0,0 +1,20 @@ +# from_days +Description +'35;'35;' 35; Syntax + +`DATE FROM_DAYS(INT N)` + + +Calculate which day by the number of days from 0000-01-01 + +'35;'35; example + +``` +mysql > select from u days (730669); ++-------------------+ +| from_days(730669) | ++-------------------+ +| 2000-07-03 | ++-------------------+ +##keyword +FROM_DAYS,FROM,DAYS diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/from_unixtime_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/from_unixtime_EN.md new file mode 100644 index 00000000000000..012b4dc5bc629c --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/from_unixtime_EN.md @@ -0,0 +1,42 @@ +'35; from unixtime +Description +'35;'35;' 35; Syntax + +'DATETIME FROM UNIXTIME (INT unix timestamp [, VARCHAR string format]' + + +Convert the UNIX timestamp to the corresponding time format of bits, and the format returned is specified by string_format + +Default yyyy-MM-dd HH:mm:ss + +Input is an integer and return is a string type + +Currently string_format supports only two types of formats: yyyy-MM-dd, yyyy-MM-dd HH: mm:ss. + +The rest of the string_format format is illegal and returns NULL + +'35;'35; example + +``` +mysql> select from_unixtime(1196440219); ++---------------------------+ +| from_unixtime(1196440219) | ++---------------------------+ +| 2007-12-01 00:30:19 | ++---------------------------+ + +mysql> select from_unixtime(1196440219, 'yyyy-MM-dd'); ++-----------------------------------------+ +| from_unixtime(1196440219, 'yyyy-MM-dd') | ++-----------------------------------------+ +| 2007-12-01 | ++-----------------------------------------+ + +mysql> select from_unixtime(1196440219, 'yyyy-MM-dd HH:mm:ss'); ++--------------------------------------------------+ +From unixtime (1196440219,'yyyy -MM -dd HH:mm:ss') ++--------------------------------------------------+ +| 2007-12-01 00:30:19 | ++--------------------------------------------------+ +##keyword +FROM_UNIXTIME,FROM,UNIXTIME diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/index.rst b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/index.rst new file mode 100644 index 00000000000000..9841d7ea1d124d --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/index.rst @@ -0,0 +1,8 @@ +============= +日期函数 +============= + +.. toctree:: + :glob: + + * diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/month_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/month_EN.md new file mode 100644 index 00000000000000..c3ae6899313f3c --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/month_EN.md @@ -0,0 +1,22 @@ +month +Description +'35;'35;' 35; Syntax + +INT MONTH (DATETIME date) + + +Returns month information in the time type, ranging from 1,12 + +The parameter is Date or Datetime type + +'35;'35; example + +``` +mysql> select month('1987-01-01'); ++-----------------------------+ +month ('1987 -01 -01 00:00:00') 1244; ++-----------------------------+ +| 1 | ++-----------------------------+ +##keyword +MONTH diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/monthname_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/monthname_EN.md new file mode 100644 index 00000000000000..edf03909516f53 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/monthname_EN.md @@ -0,0 +1,22 @@ +\ 35; Monthname +Description +'35;'35;' 35; Syntax + +'VARCHAR MONTHNAME (DATE)' + + +Month name corresponding to return date + +The parameter is Date or Datetime type + +'35;'35; example + +``` +mysql > select monthname ('2008 -02 -03 00:00'); ++----------------------------------+ +Monthname ('2008 -02 -03 00:00') 124me; ++----------------------------------+ +February 124th; ++----------------------------------+ +##keyword +MONTHNAME diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/now_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/now_EN.md new file mode 100644 index 00000000000000..0c4685bc713a92 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/now_EN.md @@ -0,0 +1,20 @@ +# now +Description +'35;'35;' 35; Syntax + +'DATETIME NOW ()' + + +Get the current time and return it in Datetime type + +'35;'35; example + +``` +mysql> select now(); ++---------------------+ +124; now. ++---------------------+ +| 2019-05-27 15:58:25 | ++---------------------+ +##keyword +NOW diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/str_to_date_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/str_to_date_EN.md new file mode 100644 index 00000000000000..6201ccabc0d410 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/str_to_date_EN.md @@ -0,0 +1,29 @@ +3500; Str_to_date +Description +'35;'35;' 35; Syntax + +'DATETIME STR TWO DATES (VARCHAR STR, VARCHAR format)' + + +Convert STR to DATE type by format specified, if the conversion result does not return NULL + +The format format supported is consistent with date_format + +'35;'35; example + +``` +mysql > select str to u date ('2014 -12 -21 12:34:56','%Y -%m -%d%H:%i:%s'); ++---------------------------------------------------------+ +Date to date ('2014 -12 -21 12:34:56','%Y -%m -%d%H:%i:%s') ++---------------------------------------------------------+ +| 2014-12-21 12:34:56 | ++---------------------------------------------------------+ + +mysql> select str_to_date('200442 Monday', '%X%V %W'); ++-----------------------------------------+ +Date to date ('200442 Monday','%X%V%W') ++-----------------------------------------+ +| 2004-10-18 | ++-----------------------------------------+ +##keyword +STR_TO_DATE,STR,TO,DATE diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/timediff_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/timediff_EN.md new file mode 100644 index 00000000000000..134cd39df4daf6 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/timediff_EN.md @@ -0,0 +1,46 @@ +Time diff +Description +'35;'35;' 35; Syntax + +'TIME TIMEDIFF (DATETIME expr1, DATETIME expr2)' + + +TIMEDIFF returns the difference between two DATETIMEs + +The TIMEDIFF function returns the result of expr1 - expr2 expressed as a time value, with a return value of TIME type + +The results are limited to TIME values ranging from - 838:59:59 to 838:59:59. + +'35;'35; example + +``` +mysql> SELECT TIMEDIFF(now(),utc_timestamp()); ++----------------------------------+ +(now (), utc -u timestamp ()124; ++----------------------------------+ +| 08:00:00 | ++----------------------------------+ + +mysql> SELECT TIMEDIFF('2019-07-11 16:59:30','2019-07-11 16:59:21'); ++--------------------------------------------------------+ +Time diff ('2019 -07 -11 16:59:30','2019 -07 -11 16:59:21') ++--------------------------------------------------------+ +| 00:00:09 | ++--------------------------------------------------------+ + +mysql > SELECT TIMEDIFF ('2019 -01 -01 00:00','2009 -01 -01 00:00'); ++--------------------------------------------------------+ +Time diff ('2019 -01 -01 00:00','2009 -01 -01 00:00') ++--------------------------------------------------------+ +| 838:59:59 | ++--------------------------------------------------------+ + +mysql > SELECT TIMEDIFF ('2019 -01 -01 00:00:00', NULL); ++---------------------------------------+ +Time diff ('2019 -01 -01 00:00','NULL') ++---------------------------------------+ +No. No. No. ++---------------------------------------+ +``` +##keyword +TIMEDIFF diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/to_days_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/to_days_EN.md new file mode 100644 index 00000000000000..efea2bbeb0f65b --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/to_days_EN.md @@ -0,0 +1,22 @@ +'35; to u days +Description +'35;'35;' 35; Syntax + +'INT TO DAYS' + + +Days of returning date distance 0000-01-01 + +The parameter is Date or Datetime type + +'35;'35; example + +``` +mysql> select to_days('2007-10-07'); ++-----------------------+ +124to; two days ('2007 -10 -07') 124to; ++-----------------------+ +| 733321 | ++-----------------------+ +##keyword +TO_DAYS,TO,DAYS diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/unix_timestamp_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/unix_timestamp_EN.md new file mode 100644 index 00000000000000..df74247c00d2e4 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/unix_timestamp_EN.md @@ -0,0 +1,31 @@ +# unix_timestamp +Description +'35;'35;' 35; Syntax + +`INT UNIX_TIMESTAMP(), UNIX_TIMESTAMP(DATETIME date)` + + +Converting a Date or Datetime type to a UNIX timestamp + +If there are no parameters, the current time is converted into a timestamp + +The parameter needs to be Date or Datetime type + +'35;'35; example + +``` +mysql> select unix_timestamp(); ++------------------+ +| unix_timestamp() | ++------------------+ +| 1558589570 | ++------------------+ + +mysql> select unix_timestamp('2007-11-30 10:30:19'); ++---------------------------------------+ +| unix_timestamp('2007-11-30 10:30:19') | ++---------------------------------------+ +| 1196389819 | ++---------------------------------------+ +##keyword +UNIX_TIMESTAMP,UNIX,TIMESTAMP diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/utc_timestamp_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/utc_timestamp_EN.md new file mode 100644 index 00000000000000..fe7442083a0d93 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/utc_timestamp_EN.md @@ -0,0 +1,24 @@ +# utc_timestamp +Description +'35;'35;' 35; Syntax + +`DATETIME UTC_TIMESTAMP()` + + +Returns the current UTC date and time in "YYYY-MM-DD HH: MM: SS" or + +A Value of "YYYYMMDDHMMSS" Format + +Depending on whether the function is used in a string or numeric context + +'35;'35; example + +``` +mysql> select utc_timestamp(),utc_timestamp() + 1; ++---------------------+---------------------+ +| utc_timestamp() | utc_timestamp() + 1 | ++---------------------+---------------------+ +| 2019-07-10 12:31:18 | 20190710123119 | ++---------------------+---------------------+ +##keyword +UTC_TIMESTAMP,UTC,TIMESTAMP diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/workofyear_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/workofyear_EN.md new file mode 100644 index 00000000000000..977957de6684b0 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/workofyear_EN.md @@ -0,0 +1,23 @@ +'35; weekofyear +Description +'35;'35;' 35; Syntax + +'INT WEEKOFYEAR (DATETIME DATE)' + + + +Get the Weeks of the Year + +The parameter is Date or Datetime type + +'35;'35; example + +``` +mysql > select weekofyear ('2008 -02 -20 00:00:00'); ++-----------------------------------+ +weekofyear ('2008 -02 -20 00:00:00') 124year; ++-----------------------------------+ +| 8 | ++-----------------------------------+ +##keyword +WEEKOFYEAR diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/year_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/year_EN.md new file mode 100644 index 00000000000000..cba5e28614eab7 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/year_EN.md @@ -0,0 +1,22 @@ +# year +Description +'35;'35;' 35; Syntax + +`INT YEAR(DATETIME date)` + + +Returns the year part of the date type, ranging from 1000 to 9999 + +The parameter is Date or Datetime type + +'35;'35; example + +``` +mysql> select year('1987-01-01'); ++-----------------------------+ +year ('1987 -01 -01 00:00') 124year; ++-----------------------------+ +| 1987 | ++-----------------------------+ +##keyword +YEAR diff --git a/docs/documentation/en/sql-reference/sql-functions/index.rst b/docs/documentation/en/sql-reference/sql-functions/index.rst new file mode 100644 index 00000000000000..4f929c43878d62 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/index.rst @@ -0,0 +1,16 @@ +=========== +SQL 函数 +=========== + +.. toctree:: + :glob: + + * + +.. toctree:: + :hidden: + + date-time-functions/index + spatial-functions/index + string-functions/index + aggregate-functions/index diff --git a/docs/documentation/en/sql-reference/sql-functions/spatial-functions/index.rst b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/index.rst new file mode 100644 index 00000000000000..f09712d02b8b01 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/index.rst @@ -0,0 +1,8 @@ +============= +地理位置函数 +============= + +.. toctree:: + :glob: + + * diff --git a/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_astext_EN.md b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_astext_EN.md new file mode 100644 index 00000000000000..2f35ac03f4c653 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_astext_EN.md @@ -0,0 +1,21 @@ +'35; 'ST AsText','ST AsWKT' +Description +'35;'35;' 35; Syntax + +'VARCHAR ST'u AsText (GEOMETRY geo)' + + +Converting a geometric figure into a WKT (Well Known Text) representation + +'35;'35; example + +``` +mysql> SELECT ST_AsText(ST_Point(24.7, 56.7)); ++---------------------------------+ +124st text (st point (24.7, 56.7))124st text; ++---------------------------------+ +| POINT (24.7 56.7) | ++---------------------------------+ +``` +##keyword +ST. ASTEXT, ST. ASWKT, ST, ASTEXT, ASWKT diff --git a/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_circle_EN.md b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_circle_EN.md new file mode 100644 index 00000000000000..55578980dfd5fd --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_circle_EN.md @@ -0,0 +1,22 @@ +# `ST_Circle` +Description +'35;'35;' 35; Syntax + +`GEOMETRY ST_Circle(DOUBLE center_lng, DOUBLE center_lat, DOUBLE radius)` + + +Convert a WKT (Well Known Text) into a circle on the earth's sphere. Where `center_lng'denotes the longitude of the center of a circle, +` Center_lat` denotes the latitude of the center of a circle, radius` denotes the radius of a circle in meters. + +'35;'35; example + +``` +mysql> SELECT ST_AsText(ST_Circle(111, 64, 10000)); ++--------------------------------------------+ +| st_astext(st_circle(111.0, 64.0, 10000.0)) | ++--------------------------------------------+ +(111 64, 10000) 124c; ++--------------------------------------------+ +``` +##keyword +ST_CIRCLE,ST,CIRCLE diff --git a/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_contains_EN.md b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_contains_EN.md new file mode 100644 index 00000000000000..95162d4443bf4a --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_contains_EN.md @@ -0,0 +1,28 @@ +\ 35; `St U contains' +Description +'35;'35;' 35; Syntax + +`BOOL ST_Contains(GEOMETRY shape1, GEOMETRY shape2)` + + +Judging whether geometric shape 1 can contain geometric shape 2 completely + +'35;'35; example + +``` +MYSQL > Select ST ^ U contains (ST ^ U Polygon ("Polygon ((0, 10, 10, 10, 10, 10, 0, 10, 0, 0, 0))), ST ^ UPOint (5, 5)); ++----------------------------------------------------------------------------------------+ ++ 124; ST = U Contains (ST = U Polygon ((0, 10, 10, 10, 0, 10, 0))), ST = UPoint (5.0, 5.0)) 124'; ++----------------------------------------------------------------------------------------+ +| 1 | ++----------------------------------------------------------------------------------------+ + +MYSQL > Select St ^ U contains (ST ^ U Polygon ("Polygon ((0, 10, 10, 10, 10, 10, 0, 10, 0, 0, 0))), St ^ UPOINT (50, 50)); ++------------------------------------------------------------------------------------------+ ++ 124; ST = U Contains (ST = U Polygon ((0, 10, 10, 10, 0, 10, 0))), ST = UPoint (50.0, 50.0)) 124 ++------------------------------------------------------------------------------------------+ +| 0 | ++------------------------------------------------------------------------------------------+ +``` +##keyword +St. John, St. John diff --git a/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_distance_sphere_EN.md b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_distance_sphere_EN.md new file mode 100644 index 00000000000000..3592e61dbb16fb --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_distance_sphere_EN.md @@ -0,0 +1,21 @@ +Distance sphere +Description +'35;'35;' 35; Syntax + +`DOUBLE ST_Distance_Sphere(DOUBLE x_lng, DOUBLE x_lat, DOUBLE y_lng, DOUBLE x_lat)` + + +Calculate the spherical distance between two points of the earth in meters. The incoming parameters are the longitude of point X, the latitude of point X, the longitude of point Y and the latitude of point Y. + +'35;'35; example + +``` +mysql> select st_distance_sphere(116.35620117, 39.939093, 116.4274406433, 39.9020987219); ++----------------------------------------------------------------------------+ +| st_distance_sphere(116.35620117, 39.939093, 116.4274406433, 39.9020987219) | ++----------------------------------------------------------------------------+ +| 7336.9135549995917 | ++----------------------------------------------------------------------------+ +``` +##keyword +ST_DISTANCE_SPHERE,ST,DISTANCE,SPHERE diff --git a/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_geometryfromtext_EN.md b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_geometryfromtext_EN.md new file mode 100644 index 00000000000000..3af66a9f5bd4c8 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_geometryfromtext_EN.md @@ -0,0 +1,21 @@ +'35; ` ST GeometryFromText','ST GeomFromText ' +Description +'35;'35;' 35; Syntax + +'GEOMETRY ST'u GeometryFromText (VARCHAR wkt)' + + +Converting a WKT (Well Known Text) into a corresponding memory geometry + +'35;'35; example + +``` +mysql> SELECT ST_AsText(ST_GeometryFromText("LINESTRING (1 1, 2 2)")); ++---------------------------------------------------------+ +| st_astext(st_geometryfromtext('LINESTRING (1 1, 2 2)')) | ++---------------------------------------------------------+ +124; LINESTRING (1, 2)124 ++---------------------------------------------------------+ +``` +##keyword +ST. GEOMETRYFROMTEXT,ST. GEOMFROMTEXT,ST,GEOMETRYFROMTEXT,GEOMFROMTEXT diff --git a/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_linefromtext_EN.md b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_linefromtext_EN.md new file mode 100644 index 00000000000000..c3925278879655 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_linefromtext_EN.md @@ -0,0 +1,21 @@ +'35; ` ST LineFromText','ST LineStringFromText ' +Description +'35;'35;' 35; Syntax + +'GEOMETRY ST LineFromText (VARCHAR wkt)' + + +Converting a WKT (Well Known Text) into a Line-style memory representation + +'35;'35; example + +``` +mysql> SELECT ST_AsText(ST_LineFromText("LINESTRING (1 1, 2 2)")); ++---------------------------------------------------------+ +| st_astext(st_geometryfromtext('LINESTRING (1 1, 2 2)')) | ++---------------------------------------------------------+ +124; LINESTRING (1, 2)124 ++---------------------------------------------------------+ +``` +##keyword +ST. LINEFROMTEXT, ST. LINESTRINGFROMTEXT,ST,LINEFROMTEXT,LINESTRINGFROMTEXT diff --git a/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_point_EN.md b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_point_EN.md new file mode 100644 index 00000000000000..a13359bdc38226 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_point_EN.md @@ -0,0 +1,22 @@ +\ 35; St upoint' +Description +'35;'35;' 35; Syntax + +`POINT ST_Point(DOUBLE x, DOUBLE y)` + + +Given the X coordinate value, the Y coordinate value returns the corresponding Point. +The current value is meaningful only for spherical sets, and X/Y corresponds to longitude/latitude. + +'35;'35; example + +``` +mysql> SELECT ST_AsText(ST_Point(24.7, 56.7)); ++---------------------------------+ +124st text (st point (24.7, 56.7))124st text; ++---------------------------------+ +| POINT (24.7 56.7) | ++---------------------------------+ +``` +##keyword +ST -u POINT,ST,POINT diff --git a/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_polygon_EN.md b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_polygon_EN.md new file mode 100644 index 00000000000000..b088ab8095a26e --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_polygon_EN.md @@ -0,0 +1,21 @@ +'35; ` ST Polygon','ST PolyFromText','ST PolygonFromText ' +Description +'35;'35;' 35; Syntax + +'GEOMETRY ST'u Polygon (VARCHAR wkt)' + + +Converting a WKT (Well Known Text) into a corresponding polygon memory form + + + +``` +mysql> SELECT ST_AsText(ST_Polygon("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))")); ++------------------------------------------------------------------+ +| st_astext(st_polygon('POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))')) | ++------------------------------------------------------------------+ + ++------------------------------------------------------------------+ +``` +##keyword +ST_POLYGON,ST_POLYFROMTEXT,ST_POLYGONFROMTEXT,ST,POLYGON,POLYFROMTEXT,POLYGONFROMTEXT diff --git a/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_x_EN.md b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_x_EN.md new file mode 100644 index 00000000000000..854655d32c677f --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_x_EN.md @@ -0,0 +1,21 @@ +35; 'ST -u X' +Description +'35;'35;' 35; Syntax + +`DOUBLE ST_X(POINT point)` + + +When point is a valid POINT type, the corresponding X coordinate value is returned. + +'35;'35; example + +``` +mysql> SELECT ST_X(ST_Point(24.7, 56.7)); ++----------------------------+ ++ 124; St x (ST point (24.7, 56.7) 124; ++----------------------------+ +| 24.7 | ++----------------------------+ +``` +##keyword +ST_X,ST,X diff --git a/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_y_EN.md b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_y_EN.md new file mode 100644 index 00000000000000..1de6f29b42788c --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/spatial-functions/st_y_EN.md @@ -0,0 +1,21 @@ +35; 'ST -u Y' +Description +'35;'35;' 35; Syntax + +`DOUBLE ST_Y(POINT point)` + + +When point is a valid POINT type, the corresponding Y coordinate value is returned. + +'35;'35; example + +``` +mysql> SELECT ST_Y(ST_Point(24.7, 56.7)); ++----------------------------+ ++ 124; St y (ST point (24.7, 56.7) 124;; ++----------------------------+ +| 56.7 | ++----------------------------+ +``` +##keyword +ST_Y,ST,Y diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/ascii_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/ascii_EN.md new file mode 100644 index 00000000000000..dcb94363402899 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/ascii_EN.md @@ -0,0 +1,28 @@ +It35; ASCII +Description +'35;'35;' 35; Syntax + +'INT AXES (WARCHAR STR)' + + +Returns the ASCII code corresponding to the first character of the string + +'35;'35; example + +``` +mysql > select ascii ('1'); ++------------+ +124; ASCII ('1') 124; ++------------+ +| 49 | ++------------+ + +mysql > select axes ('234'); ++--------------+ +124; ASCII ('234') 124; ++--------------+ +| 50 | ++--------------+ +``` +##keyword +ASCII diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/concat_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/concat_EN.md new file mode 100644 index 00000000000000..df844d58f0fbe7 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/concat_EN.md @@ -0,0 +1,35 @@ +'35; concat +Description +'35;'35;' 35; Syntax + +'VARCHAR concat (VARCHAR,...)' + + +Connect multiple strings and return NULL if any of the parameters is NULL + +'35;'35; example + +``` +mysql> select concat("a", "b"); ++------------------+ +*124concat ('a','b') 1244; ++------------------+ +1.2.2.2.2.2. ++------------------+ + +mysql> select concat("a", "b", "c"); ++-----------------------+ +124concat ('a','b','c') 1244; ++-----------------------+ +1.2.2.2.2.2.2. ++-----------------------+ + +mysql > select concat ("a", null, "c"); ++------------------------+ +124concat (a), NULL,'c') ++------------------------+ +No. No. No. ++------------------------+ +``` +##keyword +CONCAT diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/concat_ws_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/concat_ws_EN.md new file mode 100644 index 00000000000000..3a559b964478de --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/concat_ws_EN.md @@ -0,0 +1,37 @@ +Concat w +Description +'35;'35;' 35; Syntax + +'VARCHAR concat ws (VARCHAR sep., VARCHAR str,...)' + + +Using the first parameter SEP as a connector, the second parameter and all subsequent parameters are spliced into a string. +If the separator is NULL, return NULL. +` The concat_ws` function does not skip empty strings, but NULL values. + +'35;'35; example + +``` +mysql> select concat_ws("or", "d", "is"); ++----------------------------+ +124concat ws (or','d','is') 124s; ++----------------------------+ +1.2.2.2.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1. ++----------------------------+ + +mysql> select concat_ws(NULL, "d", "is"); ++----------------------------+ +(NULL,'d','is') 1244; ++----------------------------+ +No. No. No. ++----------------------------+ + +mysql > select concat ws ("or", "d", NULL,"is"); ++---------------------------------+ +Concat ws ("or", "d", NULL,"is"). ++---------------------------------+ +1.2.2.2.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1. ++---------------------------------+ +``` +##keyword +CONCAT WS,CONCAT,WS diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/find_in_set_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/find_in_set_EN.md new file mode 100644 index 00000000000000..eb20c4bb743441 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/find_in_set_EN.md @@ -0,0 +1,21 @@ +IV35; Find@U set +Description +'35;'35;' 35; Syntax + +"NOT found in set (VARCHAR str., VARCHAR strlist)" + + +Return to the location where the str first appears in strlist (counting from 1). Strlist is a comma-separated string. If not, return 0. Any parameter is NULL, returning NULL. + +'35;'35; example + +``` +mysql > select find in u set ("b", "a,b,c"); ++---------------------------+ +Find in set ('b','a,b,c') 1244; ++---------------------------+ +| 2 | ++---------------------------+ +``` +##keyword +FIND IN SET,FIND,IN,SET diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/get_json_double_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/get_json_double_EN.md new file mode 100644 index 00000000000000..5362d230fd8c3c --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/get_json_double_EN.md @@ -0,0 +1,48 @@ +Get two copies +Description +'35;'35;' 35; Syntax + +"DOUBLE get" is a double (VARCHAR is on the street, VARCHAR is on the path) + + +Parse and get the floating-point content of the specified path in the JSON string. +Where json_path must start with the $symbol and use. as the path splitter. If the path contains..., double quotation marks can be used to surround it. +Use [] to denote array subscripts, starting at 0. +The content of path cannot contain ",[and]. +If the json_string format is incorrect, or the json_path format is incorrect, or matches cannot be found, NULL is returned. + +'35;'35; example + +1. Get the value of key as "k1" + +``` +mysql > SELECT get'u json 'double ('{"k1":1.3, "k2":"2"}, "$.k1"); ++-------------------------------------------------+ +Get double ('{"k1":1.3, "k2":"2"},'$.k1')'124get; ++-------------------------------------------------+ +| 1.3 | ++-------------------------------------------------+ +``` + +2. Get the second element of the array whose key is "my. key" + +``` +mysql > SELECT get'u json 'double ('{"k1":"v1", "my.key":[1.1, 2.2, 3.3]}','$"my.key"[1]); ++---------------------------------------------------------------------------+ +Get me a double ('{"k1":"v1", "my.key":[1.1, 2.2, 3.3]}','$"my.key"[1]])'124; ++---------------------------------------------------------------------------+ +| 2.2 | ++---------------------------------------------------------------------------+ +``` + +3. Get the first element in an array whose secondary path is k1. key - > K2 +``` +mysql > SELECT get'u json 'double ('{"k1.key":{"k2":[1.1, 2.2]}}','$."k1.key".k2 [0]); ++---------------------------------------------------------------------+ +Get double ('{"k1.key":{"k2":[1.1, 2.2]}}','$"k1.key.k2 [0]])'124; ++---------------------------------------------------------------------+ +| 1.1 | ++---------------------------------------------------------------------+ +``` +##keyword +GET_JSON_DOUBLE,GET,JSON,DOUBLE diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/get_json_int_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/get_json_int_EN.md new file mode 100644 index 00000000000000..0f6bf0cb2527d7 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/get_json_int_EN.md @@ -0,0 +1,48 @@ +# get_json_int +Description +'35;'35;' 35; Syntax + +'I don't get you int (VARCHAR is on, VARCHAR is on the path) + + +Parse and retrieve the integer content of the specified path in the JSON string. +Where json_path must start with the $symbol and use. as the path splitter. If the path contains..., double quotation marks can be used to surround it. +Use [] to denote array subscripts, starting at 0. +The content of path cannot contain ",[and]. +If the json_string format is incorrect, or the json_path format is incorrect, or matches cannot be found, NULL is returned. + +'35;'35; example + +1. Get the value of key as "k1" + +``` +mysql > SELECT get json u int ('{"k1":1, "k2":"2"}, "$.k1"); ++--------------------------------------------+ +Get it on int ('{"k1":1, "k2":"2"},'$.k1') 124s; ++--------------------------------------------+ +| 1 | ++--------------------------------------------+ +``` + +2. Get the second element of the array whose key is "my. key" + +``` +mysql > SELECT get u json u int ('{"k1":"v1", "my.key":[1, 2, 3]}','$"my.key"[1]); ++------------------------------------------------------------------+ +Get me on int ('{"k1":"v1", "my.key":[1, 2, 3]}','$"my.key"[1]])'124; ++------------------------------------------------------------------+ +| 2 | ++------------------------------------------------------------------+ +``` + +3. Get the first element in an array whose secondary path is k1. key - > K2 +``` +mysql > SELECT get u json u int ('{"k1.key":{"k2":[1, 2]}','$"k1.key".k2 [0]'); ++--------------------------------------------------------------+ +Get me on int ('{"k1.key":{"k2":[1, 2]}','$"k1.key".k2 [0]) 1244; ++--------------------------------------------------------------+ +| 1 | ++--------------------------------------------------------------+ +``` +##keyword +GET_JSON_INT,GET,JSON,INT diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/get_json_string_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/get_json_string_EN.md new file mode 100644 index 00000000000000..e2d5455ac8ab26 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/get_json_string_EN.md @@ -0,0 +1,58 @@ +''35; get me a string +Description +'35;'35;' 35; Syntax + +'VARCHAR get'u string (VARCHAR json str, VARCHAR json path) + + +Parse and retrieve the string content of the specified path in the JSON string. +Where json_path must start with the $symbol and use. as the path splitter. If the path contains..., double quotation marks can be used to surround it. +Use [] to denote array subscripts, starting at 0. +The content of path cannot contain ",[and]. +If the json_string format is incorrect, or the json_path format is incorrect, or matches cannot be found, NULL is returned. + +'35;'35; example + +1. Get the value of key as "k1" + +``` +mysql > SELECT get a json string ('{"k1":"v1", "k2":"v2"}, "$.k1"); ++---------------------------------------------------+ +Get json string ('{"k1":"v1", "k2":"v2"}','$.k1')'124get; ++---------------------------------------------------+ +1.2.2.1.; ++---------------------------------------------------+ +``` + +2. Get the second element of the array whose key is "my. key" + +``` +mysql > SELECT get u json string ('{"k1":"v1", "my.key":["e1", "e2", "e3"]}','$."my.key"[1]); ++------------------------------------------------------------------------------+ +Get json string ('{"k1":"v1", "my.key":["e1", "e2", "e3"]}','$"my.key"[1]])'1244; ++------------------------------------------------------------------------------+ +1.2.2.;2.; ++------------------------------------------------------------------------------+ +``` + +3. Get the first element in an array whose secondary path is k1. key - > K2 +``` +mysql > SELECT get u json string ('{"k1.key":{"k2":["v1", "v2"]}}','$."k1.key".k2 [0]); ++-----------------------------------------------------------------------+ +Get json string ('{"k1.key":{"k2":["v1", "v2"]}','$"k1.key.k2 [0]])'124s; ++-----------------------------------------------------------------------+ +1.2.2.1.; ++-----------------------------------------------------------------------+ +``` + +4. Get all the values in the array where the key is "k1" +``` +mysql > SELECT get u json string ('["k1":"v1"}, {"k2":"v2"}, {"k1":"v3"}, {"k1":"v4"}],'$.k1'); ++---------------------------------------------------------------------------------+ +Get your string ('[{"k1":"v1"}, {"k2":"v2"}, {"k1":"v3"}, {"k1":"v3"}, {"k1":"v4"}]],'$.k1') 1244; ++---------------------------------------------------------------------------------+ +124; ["v1","v3","v4"].124 ++---------------------------------------------------------------------------------+ +``` +##keyword +GET_JSON_STRING,GET,JSON,STRING diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/group_concat_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/group_concat_EN.md new file mode 100644 index 00000000000000..c3c06d05468676 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/group_concat_EN.md @@ -0,0 +1,37 @@ +Groups \\\35 +Description +'35;'35;' 35; Syntax + +'VARCHAR group 'concat (VARCHAR str [, VARCHAR sep]) + + +This function is an aggregation function similar to sum (), and group_concat links multiple rows of results in the result set to a string. The second parameter, sep, is a connection symbol between strings, which can be omitted. This function usually needs to be used with group by statements. + +'35;'35; example + +``` +mysql> select value from test; ++-------+ +| value | ++-------+ +(a)'124; +(b)'1244; +(c)'1244; ++-------+ + +mysql> select group_concat(value) from test; ++-----------------------+ +| group_concat(`value`) | ++-----------------------+ +124a, b, c, 124a; ++-----------------------+ + +mysql> select group_concat(value, " ") from test; ++----------------------------+ +| group_concat(`value`, ' ') | ++----------------------------+ +| a b c | ++----------------------------+ +``` +##keyword +GROUP_CONCAT,GROUP,CONCAT diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/index.rst b/docs/documentation/en/sql-reference/sql-functions/string-functions/index.rst new file mode 100644 index 00000000000000..7d372f8a93a082 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/index.rst @@ -0,0 +1,8 @@ +============= +字符串函数 +============= + +.. toctree:: + :glob: + + * diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/instr_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/instr_EN.md new file mode 100644 index 00000000000000..36c299a471d5da --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/instr_EN.md @@ -0,0 +1,28 @@ +~35instr +Description +'35;'35;' 35; Syntax + +'INT INSR (WARCHAR STR, WARCHAR substrate)' + + +Returns the location where substr first appeared in str (counting from 1). If substr does not appear in str, return 0. + +'35;'35; example + +``` +mysql> select instr("abc", "b"); ++-------------------+ +124Insr ('abc','b') 124 ++-------------------+ +| 2 | ++-------------------+ + +mysql> select instr("abc", "d"); ++-------------------+ +124Insr ('abc','d') 124 ++-------------------+ +| 0 | ++-------------------+ +``` +##keyword +INSTR diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/lcase_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/lcase_EN.md new file mode 100644 index 00000000000000..b1d23e0059a722 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/lcase_EN.md @@ -0,0 +1,11 @@ +'35; lcase +Description +'35;'35;' 35; Syntax + +'INT lcase (VARCHAR str)' + + +Consistent with `lower'. + +##keyword +LCASE diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/left_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/left_EN.md new file mode 100644 index 00000000000000..ad071bf789d79f --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/left_EN.md @@ -0,0 +1,21 @@ +# left +Description +'35;'35;' 35; Syntax + +'VARCHAR left (VARCHAR str)' + + +It returns the left part of a string of specified length + +'35;'35; example + +``` +mysql> select left("Hello doris",5); ++------------------------+ +left ('Hello doris', 5)'1244; ++------------------------+ +| Hello | ++------------------------+ +``` +##keyword +LEFT diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/length_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/length_EN.md new file mode 100644 index 00000000000000..b24ea0c4a3e8aa --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/length_EN.md @@ -0,0 +1,28 @@ +# length +Description +'35;'35;' 35; Syntax + +'INT length (VARCHAR str)' + + +Returns the length of the string and the number of characters returned for multi-byte characters. For example, five two-byte width words return a length of 10. + +'35;'35; example + +``` +mysql> select length("abc"); ++---------------+ +length ('abc') 1244; ++---------------+ +| 3 | ++---------------+ + +mysql> select length("中国"); ++------------------+ +| length('中国') | ++------------------+ +| 6 | ++------------------+ +``` +##keyword +LENGTH diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/locate_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/locate_EN.md new file mode 100644 index 00000000000000..fcee2e13ef0161 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/locate_EN.md @@ -0,0 +1,35 @@ +'35; locate +Description +'35;'35;' 35; Syntax + +'INT LOCATION (WARCHAR substrate, WARCHAR str [, INT pos]]' + + +Returns where substr appears in str (counting from 1). If the third parameter POS is specified, the position where substr appears is found from the string where STR starts with POS subscript. If not found, return 0 + +'35;'35; example + +``` +mysql> SELECT LOCATE('bar', 'foobarbar'); ++----------------------------+ +| locate('bar', 'foobarbar') | ++----------------------------+ +| 4 | ++----------------------------+ + +mysql> SELECT LOCATE('xbar', 'foobar'); ++--------------------------+ +| locate('xbar', 'foobar') | ++--------------------------+ +| 0 | ++--------------------------+ + +mysql > SELECT LOCATE ('bar','foobarbar', 5); ++-------------------------------+ +Location ('bar','foobarbar', 5)'124s; ++-------------------------------+ +| 7 | ++-------------------------------+ +``` +##keyword +LOCATE diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/lower_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/lower_EN.md new file mode 100644 index 00000000000000..94f56bb49b1526 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/lower_EN.md @@ -0,0 +1,21 @@ +# lower +Description +'35;'35;' 35; Syntax + +'INT lower (WARCHAR str)' + + +Convert all strings in parameters to lowercase + +'35;'35; example + +``` +mysql> SELECT lower("AbC123"); ++-----------------+ +1.A/AC.109/2002/L.1; ++-----------------+ +1244; abc123 ++-----------------+ +``` +##keyword +LOWER diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/lpad_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/lpad_EN.md new file mode 100644 index 00000000000000..070a81b7734fc0 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/lpad_EN.md @@ -0,0 +1,28 @@ +{35; lpad +Description +'35;'35;' 35; Syntax + +'VARCHAR lpad (VARCHAR str., INT len, VARCHAR pad)' + + +Returns a string of length len in str, starting with the initials. If len is longer than str, pad characters are added to STR until the length of the string reaches len. If len is less than str's length, the function is equivalent to truncating STR strings and returning only strings of len's length. + +'35;'35; example + +``` +mysql > SELECT lpad ("hi", 5, "xy"); ++---------------------+ +1244; lpad ('hi', 5,'xy') 1244; ++---------------------+ +| xyxhi | ++---------------------+ + +mysql > SELECT lpad ("hi", 1, "xy"); ++---------------------+ +1244; lpad ('hi', 1,'xy') 1244; ++---------------------+ +1.2.2.2.2.2.1.1.1.1.2. ++---------------------+ +``` +##keyword +LPAD diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/ltrim_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/ltrim_EN.md new file mode 100644 index 00000000000000..bacd064c85cd92 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/ltrim_EN.md @@ -0,0 +1,21 @@ +'35; ltrim +Description +'35;'35;' 35; Syntax + +'VARCHAR ltrim (VARCHAR str)' + + +Remove the space that appears continuously from the beginning of the parameter str + +'35;'35; example + +``` +mysql > SELECT ltrim (ab d'); ++------------------+ +1244; ltrim (ab d') 1244; ++------------------+ +1.2.4.;b d ++------------------+ +``` +##keyword +LTRIM diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/money_format_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/money_format_EN.md new file mode 100644 index 00000000000000..3b7c6036f1b5eb --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/money_format_EN.md @@ -0,0 +1,35 @@ +# money_format +Description +'35;'35;' 35; Syntax + +VARCHAR money format (Number) + + +The number is output in currency format, the integer part is separated by commas every three bits, and the decimal part is reserved for two bits. + +'35;'35; example + +``` +mysql> select money_format(17014116); ++------------------------+ +| money_format(17014116) | ++------------------------+ +| 17,014,116.00 | ++------------------------+ + +mysql> select money_format(1123.456); ++------------------------+ +| money_format(1123.456) | ++------------------------+ +| 1,123.46 | ++------------------------+ + +mysql> select money_format(1123.4); ++----------------------+ +| money_format(1123.4) | ++----------------------+ +| 1,123.40 | ++----------------------+ +``` +##keyword +MONEY_FORMAT,MONEY,FORMAT diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/regexp_extract_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/regexp_extract_EN.md new file mode 100644 index 00000000000000..4e5d90b632be4f --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/regexp_extract_EN.md @@ -0,0 +1,28 @@ +# regexp_extract +Description +'35;'35;' 35; Syntax + +'VARCHAR regexp 'extract (VARCHAR str, VARCHAR pattern, int pos) + + +The string STR is matched regularly and the POS matching part which conforms to pattern is extracted. Patterns need to match exactly some part of the STR to return to the matching part of the pattern. If there is no match, return an empty string. + +'35;'35; example + +``` +mysql> SELECT regexp_extract('AbCdE', '([[:lower:]]+)C([[:lower:]]+)', 1); ++-------------------------------------------------------------+ +.124; regexp Extract ('AbCdE', [[[[[:lower:]]]+)C ([[[:lower:]+]]]]'-1'-124; ++-------------------------------------------------------------+ +(b)'1244; ++-------------------------------------------------------------+ + +mysql> SELECT regexp_extract('AbCdE', '([[:lower:]]+)C([[:lower:]]+)', 2); ++-------------------------------------------------------------+ +.124; regexp Extract ('AbCdE', [[[[[:lower:]]]+)C ([[[:lower:]+]]]]]'-2'-124; ++-------------------------------------------------------------+ +(d) 124d; ++-------------------------------------------------------------+ +``` +##keyword +REGEXP_EXTRACT,REGEXP,EXTRACT diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/regexp_replace_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/regexp_replace_EN.md new file mode 100644 index 00000000000000..8913c4e5fe6584 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/regexp_replace_EN.md @@ -0,0 +1,28 @@ +Replace regexp +Description +'35;'35;' 35; Syntax + +VARCHAR regexp replace (VARCHAR str, VARCHAR pattern, VARCHAR repl) + + +Regular matching of STR strings, replacing the part hitting pattern with repl + +'35;'35; example + +``` +mysql> SELECT regexp_replace('a b c', " ", "-"); ++-----------------------------------+ +| regexp_replace('a b c', ' ', '-') | ++-----------------------------------+ +A -b -c `124; ++-----------------------------------+ + +mysql> SELECT regexp_replace('a b c','(b)','<\\1>'); ++----------------------------------------+ +Replace ('a b c','(b)','<\1 >') regexp; ++----------------------------------------+ +A c {1244} ++----------------------------------------+ +``` +##keyword +REGEXP_REPLACE,REGEXP,REPLACE diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/repeat_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/repeat_EN.md new file mode 100644 index 00000000000000..0630b9a711a163 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/repeat_EN.md @@ -0,0 +1,28 @@ +# repeat +Description +'35;'35;' 35; Syntax + +'VARCHAR repeat (VARCHAR str, INT count) + + +Repeat the str of the string count times, return empty string when count is less than 1, return NULL when str, count is any NULL + +'35;'35; example + +``` +mysql> SELECT repeat("a", 3); ++----------------+ +repeat ('a', 3)'1244; ++----------------+ +| aaa | ++----------------+ + +mysql> SELECT repeat("a", -1); ++-----------------+ +repeat ('a', -1) 1244; ++-----------------+ +| | ++-----------------+ +``` +##keyword +REPEAT, diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/right_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/right_EN.md new file mode 100644 index 00000000000000..3bce2e313fde61 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/right_EN.md @@ -0,0 +1,21 @@ +# right +Description +'35;'35;' 35; Syntax + +'WARCHAR RIGHT (WARCHAR STR)' + + +It returns the right part of a string of specified length + +'35;'35; example + +``` +mysql> select right("Hello doris",5); ++-------------------------+ +Right ('Hello doris', 5)'1244; ++-------------------------+ +1.2.2.2.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1. ++-------------------------+ +``` +##keyword +RIGHT diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/split_part_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/split_part_EN.md new file mode 100644 index 00000000000000..ea173dac068d56 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/split_part_EN.md @@ -0,0 +1,43 @@ +# split_part +Description +'35;'35;' 35; Syntax + +'VARCHAR split party (VARCHAR content, VARCHAR delimiter, INT field)' + + +Returns the specified partition (counting from the beginning) by splitting the string according to the partitioner. + +'35;'35; example + +``` +mysql> select split_part("hello world", " ", 1); ++----------------------------------+ +(hello world','1)'124split part'; ++----------------------------------+ +| hello | ++----------------------------------+ + + +mysql> select split_part("hello world", " ", 2); ++----------------------------------+ +(hello world','2)'124u; ++----------------------------------+ +| world | ++----------------------------------+ + +mysql> select split_part("2019年7月8号", "月", 1); ++-----------------------------------------+ +(2019726376;821495;','263761,') 1244; ++-----------------------------------------+ +| July 2019| ++-----------------------------------------+ + +mysql> select split_part("abca", "a", 1); ++----------------------------+ +split part ('abca','a', 1) ++----------------------------+ +| | ++----------------------------+ +``` +##keyword +SPLIT_PART,SPLIT,PART diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/strleft_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/strleft_EN.md new file mode 100644 index 00000000000000..3e62b0da341f3d --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/strleft_EN.md @@ -0,0 +1,21 @@ +# strleft +Description +'35;'35;' 35; Syntax + +'WARCHAR STRAIGHT (WARCHAR STR)' + + +It returns the left part of a string of specified length + +'35;'35; example + +``` +mysql> select strleft("Hello doris",5); ++------------------------+ +(Hello doris', 5)'1244; ++------------------------+ +| Hello | ++------------------------+ +``` +##keyword +STRLEFT diff --git a/docs/documentation/en/sql-reference/sql-functions/string-functions/strright_EN.md b/docs/documentation/en/sql-reference/sql-functions/string-functions/strright_EN.md new file mode 100644 index 00000000000000..96b4c164adfede --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-functions/string-functions/strright_EN.md @@ -0,0 +1,21 @@ +35; strright +Description +'35;'35;' 35; Syntax + +'VARCHAR strright (VARCHAR str)' + + +It returns the right part of a string of specified length + +'35;'35; example + +``` +mysql> select strright("Hello doris",5); ++-------------------------+ +strright ('Hello doris', 5)'1244; ++-------------------------+ +1.2.2.2.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1. ++-------------------------+ +``` +##keyword +STRRIGHT diff --git a/docs/documentation/en/sql-reference/sql-statements/Account Management/CREATE ROLE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Account Management/CREATE ROLE_EN.md new file mode 100644 index 00000000000000..6d939d48e61eaf --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Account Management/CREATE ROLE_EN.md @@ -0,0 +1,19 @@ +# CREATE ROLE +Description +The statement user creates a role + +Grammar: +CREATE ROLE role1; + +This statement creates an unauthorized role that can be subsequently granted permission through the GRANT command. + +'35;'35; example + +1. Create a role + +CREATE ROLE role1; + +## keyword +CREATE, ROLE + + diff --git a/docs/documentation/en/sql-reference/sql-statements/Account Management/CREATE USER_EN.md b/docs/documentation/en/sql-reference/sql-statements/Account Management/CREATE USER_EN.md new file mode 100644 index 00000000000000..949534a2dfeca0 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Account Management/CREATE USER_EN.md @@ -0,0 +1,48 @@ +# CREATE USER +Description + +Syntax: + +CREATE USER user_identity [IDENTIFIED BY 'password'] [DEFAULT ROLE 'role_name'] + +user_identity: +'user_name'@'host' + +The CREATE USER command is used to create a Doris user. In Doris, a user_identity uniquely identifies a user. User_identity consists of two parts, user_name and host, where username is the user name. The host identifies the host address where the client connects. The host part can use% for fuzzy matching. If no host is specified, the default is'%', which means that the user can connect to Doris from any host. + +The host part can also be specified as a domain with the grammar:'user_name'@['domain']. Even if surrounded by brackets, Doris will think of it as a domain and try to parse its IP address. At present, it only supports BNS analysis within Baidu. + +If a role (ROLE) is specified, the permissions that the role has are automatically granted to the newly created user. If not specified, the user defaults to having no permissions. The specified ROLE must already exist. + +'35;'35; example + +1. Create a passwordless user (without specifying host, it is equivalent to Jack @'%') + +CREATE USER 'jack'; + +2. Create a password user that allows login from'172.10.1.10' + +CREATE USER jack@'172.10.1.10' IDENTIFIED BY '123456'; + +3. To avoid passing plaintext, use case 2 can also be created in the following way + +CREATE USER jack@'172.10.1.10' IDENTIFIED BY PASSWORD '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9'; + +Later encrypted content can be obtained through PASSWORD (), for example: + +SELECT PASSWORD('123456'); + +4. Create a user who is allowed to log in from the'192.168'subnet and specify its role as example_role + +CREATE USER 'jack'@'192.168.%' DEFAULT ROLE 'example_role'; + +5. Create a user who is allowed to log in from the domain name'example_domain'. + +CREATE USER 'jack'@['example_domain'] IDENTIFIED BY '12345'; + +6. Create a user and specify a role + +CREATE USER 'jack'@'%' IDENTIFIED BY '12345' DEFAULT ROLE 'my_role'; + +## keyword +CREATE, USER diff --git a/docs/documentation/en/sql-reference/sql-statements/Account Management/DROP ROLE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Account Management/DROP ROLE_EN.md new file mode 100644 index 00000000000000..5726b96f643f77 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Account Management/DROP ROLE_EN.md @@ -0,0 +1,17 @@ +# DROP ROLE +Description +The statement user deletes a role + +Grammar: +DROP ROLE role1; + +Deleting a role does not affect the permissions of users who previously belonged to that role. It is only equivalent to decoupling the role from the user. The permissions that the user has obtained from the role will not change. + +'35;'35; example + +1. Delete a role + +DROP ROLE role1; + +## keyword +DROP, ROLE diff --git a/docs/documentation/en/sql-reference/sql-statements/Account Management/DROP USER_EN.md b/docs/documentation/en/sql-reference/sql-statements/Account Management/DROP USER_EN.md new file mode 100644 index 00000000000000..ce34afc93ff269 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Account Management/DROP USER_EN.md @@ -0,0 +1,17 @@ +# DROP USER +Description + +Syntax: + +DROP USER 'user_name' + +The DROP USER command deletes a Palo user. Doris does not support deleting the specified user_identity here. When a specified user is deleted, all user_identities corresponding to that user are deleted. For example, two users, Jack @'192%'and Jack @['domain'] were created through the CREATE USER statement. After DROP USER'jack' was executed, Jack @'192%'and Jack @['domain'] would be deleted. + +'35;'35; example + +1. Delete user jack + +DROP USER 'jack' + +## keyword +DROP, USER diff --git a/docs/documentation/en/sql-reference/sql-statements/Account Management/GRANT_EN.md b/docs/documentation/en/sql-reference/sql-statements/Account Management/GRANT_EN.md new file mode 100644 index 00000000000000..a3ca675977bc91 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Account Management/GRANT_EN.md @@ -0,0 +1,55 @@ +Grant +Description + +The GRANT command is used to give the specified user or role the specified permissions. + +Syntax: + +GRANT privilege_list ON db_name[.tbl_name] TO user_identity [ROLE role_name] + + +Privilege_list is a list of permissions that need to be granted, separated by commas. Currently Doris supports the following permissions: + +NODE_PRIV: Operational privileges of cluster nodes, including operation of nodes'up and down lines. Only root users have this privilege and can not be given to other users. +ADMIN_PRIV: All rights except NODE_PRIV. +GRANT_PRIV: Permission to operate permissions. Including the creation and deletion of users, roles, authorization and revocation, password settings and so on. +SELECT_PRIV: Read permissions for specified libraries or tables +LOAD_PRIV: Import permissions for specified libraries or tables +ALTER_PRIV: schema change permissions for specified libraries or tables +CREATE_PRIV: Creation permissions for specified libraries or tables +DROP_PRIV: Delete permissions for specified libraries or tables + +旧版权限中的 ALL 和 READ_WRITE 会被转换成:SELECT_PRIV,LOAD_PRIV,ALTER_PRIV,CREATE_PRIV,DROP_PRIV; +READ_ONLY is converted to SELECT_PRIV. + +Db_name [.tbl_name] supports the following three forms: + +1. *. * permissions can be applied to all libraries and all tables in them +2. db. * permissions can be applied to all tables under the specified library +3. db.tbl permissions can be applied to specified tables under specified Libraries + +The libraries or tables specified here can be non-existent libraries and tables. + +user_identity: + +The user_identity syntax here is the same as CREATE USER. And you must create user_identity for the user using CREATE USER. The host in user_identity can be a domain name. If it is a domain name, the validity time of permissions may be delayed by about one minute. + +You can also grant permissions to the specified ROLE, which is automatically created if the specified ROLE does not exist. + +'35;'35; example + +1. Grant permissions to all libraries and tables to users + +GRANT SELECT_PRIV ON *.* TO 'jack'@'%'; + +2. Grant permissions to specified library tables to users + +GRANT SELECT_PRIV,ALTER_PRIVS,LOAD_PRIV ON db1.tbl1 TO 'jack'@'192.8.%'; + +3. Grant permissions to specified library tables to roles + +GRANT LOAD_PRIV ON db1.* TO ROLE 'my_role'; + +## keyword +GRANT + diff --git a/docs/documentation/en/sql-reference/sql-statements/Account Management/REVOKE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Account Management/REVOKE_EN.md new file mode 100644 index 00000000000000..fa217e166389c0 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Account Management/REVOKE_EN.md @@ -0,0 +1,22 @@ +# REVOKE +Description + +The REVOKE command is used to revoke the rights specified by the specified user or role. +Syntax +REVOKE privilege_list ON db_name[.tbl_name] FROM user_identity [ROLE role_name] + +user_identity: + +The user_identity syntax here is the same as CREATE USER. And you must create user_identity for the user using CREATE USER. The host in user_identity can be a domain name. If it is a domain name, the revocation time of permission may be delayed by about one minute. + +You can also revoke the permission of the specified ROLE, which must exist for execution. + +'35;'35; example + +1. Revoke the rights of user Jack database testDb + +REVOKE SELECT_PRIV ON db1.* FROM 'jack'@'192.%'; + +## keyword + +REVOKE diff --git a/docs/documentation/en/sql-reference/sql-statements/Account Management/SET PASSWORD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Account Management/SET PASSWORD_EN.md new file mode 100644 index 00000000000000..a288ad1ca5a510 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Account Management/SET PASSWORD_EN.md @@ -0,0 +1,29 @@ +# SET PASSWORD +Description + +Syntax: + +SET PASSWORD [FOR user_identity] = +[PASSWORD('plain password')]|['hashed password'] + +The SET PASSWORD command can be used to modify a user's login password. If the [FOR user_identity] field does not exist, modify the password of the current user. + +Note that the user_identity here must match exactly the user_identity specified when creating a user using CREATE USER, otherwise the user will be reported as non-existent. If user_identity is not specified, the current user is'username'@'ip', which may not match any user_identity. The current user can be viewed through SHOW GRANTS. + +PASSWORD () input is a plaintext password, and direct use of strings, you need to pass the encrypted password. +If you change the password of other users, you need to have administrator privileges. + +'35;'35; example + +1. Modify the password of the current user + +SET PASSWORD = PASSWORD('123456') +SET PASSWORD = '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9' + +2. Modify the specified user password + +SET PASSWORD FOR 'jack'@'192.%' = PASSWORD('123456') +SET PASSWORD FOR 'jack'@['domain'] = '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9' + +## keyword +SET, PASSWORD diff --git a/docs/documentation/en/sql-reference/sql-statements/Account Management/SET PROPERTY_EN.md b/docs/documentation/en/sql-reference/sql-statements/Account Management/SET PROPERTY_EN.md new file mode 100644 index 00000000000000..fa7f2f86659a44 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Account Management/SET PROPERTY_EN.md @@ -0,0 +1,56 @@ +# SET PROPERTY +Description + +Syntax: + +SET PROPERTY [FOR 'user'] 'key' = 'value' [, 'key' = 'value'] + +Set user attributes, including resources allocated to users, import cluster, etc. The user attributes set here are for user, not user_identity. That is to say, if two users'jack'@'%' and'jack'@'192%'are created through the CREATE USER statement, the SET PROPERTY statement can only be used for the jack user, not'jack'@'%' or'jack'@'192%' + +Importing cluster is only applicable to Baidu internal users. + +key: + +Super user rights: +Max_user_connections: Maximum number of connections. +resource.cpu_share: cpu资源分配。 +Load_cluster. {cluster_name}. priority: assigns priority to a specified cluster, which can be HIGH or NORMAL + +Ordinary user rights: +Quota.normal: Resource allocation at the normal level. +Quota.high: Resource allocation at the high level. +Quota.low: Resource allocation at low level. + +Load_cluster. {cluster_name}. hadoop_palo_path: The Hadoop directory used by Palo needs to store ETL programs and intermediate data generated by ETL for Palo to import. After the import is completed, the intermediate data will be automatically cleaned up, and the ETL program will be automatically reserved for next use. +Load_cluster. {cluster_name}. hadoop_configs: configuration of hadoop, where fs. default. name, mapred. job. tracker, hadoop. job. UGI must be filled in. +Load ucluster. {cluster name}. hadoop port: Hadoop HDFS name node http} +Default_load_cluster: The default import cluster. + +'35;'35; example + +1. Modify the maximum number of user jacks to 1000 +SET PROPERTY FOR 'jack' 'max_user_connections' = '1000'; + +2. Modify the cpu_share of user Jack to 1000 +SET PROPERTY FOR 'jack' 'resource.cpu_share' = '1000'; + +3. Modify the weight of the normal group of Jack users +Set property for'jack''quota. normal' = 400'; + +4. Add import cluster for user jack +SET PROPERTY FOR 'jack' +'load 'cluster.{cluster name}.hadoop'u palo path' ='/user /palo /palo path', +'load_cluster.{cluster_name}.hadoop_configs' = 'fs.default.name=hdfs://dpp.cluster.com:port;mapred.job.tracker=dpp.cluster.com:port;hadoop.job.ugi=user,password;mapred.job.queue.name=job_queue_name_in_hadoop;mapred.job.priority=HIGH;'; + +5. Delete the import cluster under user jack. +SET PROPERTY FOR 'jack' 'load_cluster.{cluster_name}' = ''; + +6. Modify user jack's default import cluster +SET PROPERTY FOR 'jack' 'default_load_cluster' = '{cluster_name}'; + +7. Modify the cluster priority of user Jack to HIGH +SET PROPERTY FOR 'jack' 'load_cluster.{cluster_name}.priority' = 'HIGH'; + +## keyword +SET, PROPERTY + diff --git a/docs/documentation/en/sql-reference/sql-statements/Account Management/SHOW GRANTS_EN.md b/docs/documentation/en/sql-reference/sql-statements/Account Management/SHOW GRANTS_EN.md new file mode 100644 index 00000000000000..35295aff57b9b2 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Account Management/SHOW GRANTS_EN.md @@ -0,0 +1,30 @@ +# SHOW GRANTS +Description + +This statement is used to view user rights. + +Grammar: +SHOW [ALL] GRANTS [FOR user_identity]; + +Explain: +1. SHOW ALL GRANTS can view the privileges of all users. +2. If you specify user_identity, view the permissions of the specified user. And the user_identity must be created for the CREATE USER command. +3. If you do not specify user_identity, view the permissions of the current user. + + +'35;'35; example + +1. View all user rights information + +SHOW ALL GRANTS; + +2. View the permissions of the specified user + +SHOW GRANTS FOR jack@'%'; + +3. View the permissions of the current user + +SHOW GRANTS; + +## keyword +SHOW, GRANTS diff --git a/docs/documentation/en/sql-reference/sql-statements/Account Management/SHOW ROLES_EN.md b/docs/documentation/en/sql-reference/sql-statements/Account Management/SHOW ROLES_EN.md new file mode 100644 index 00000000000000..b877c4b7b06671 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Account Management/SHOW ROLES_EN.md @@ -0,0 +1,15 @@ +# SHOW ROLES +Description +This statement is used to display all created role information, including the role name, the user included, and the permissions. + +Grammar: +SHOW ROLES; + +'35;'35; example + +1. View the created roles: + +SHOW ROLES; + +## keyword +SHOW,ROLES diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR_EN.md new file mode 100644 index 00000000000000..a38293a56053e3 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR_EN.md @@ -0,0 +1,21 @@ +# ADMIN CANCEL REPAIR +Description + +This statement is used to cancel repairing a specified table or partition with high priority + +Grammar: + +ADMIN CANCEL REPAIR TABLE table_name[ PARTITION (p1,...)]; + +Explain: + +1. This statement only indicates that the system no longer repairs fragmented copies of specified tables or partitions with high priority. The system will still repair the copy by default scheduling. + +'35;'35; example + +1. Cancel High Priority Repair + +ADMIN CANCEL REPAIR TABLE tbl PARTITION(p1); + +## keyword +ADMIN,CANCEL,REPAIR diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN REPAIR_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN REPAIR_EN.md new file mode 100644 index 00000000000000..fe24057c3f7b1d --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN REPAIR_EN.md @@ -0,0 +1,26 @@ +# ADMIN REPAIR +Description + +This statement is used to try to fix the specified table or partition first + +Grammar: + +ADMIN REPAIR TABLE table_name[ PARTITION (p1,...)] + +Explain: + +1. This statement only means that the system attempts to repair a fragmented copy of a specified table or partition with high priority, and it is not guaranteed to be successful. Users can view the repair status through the ADMIN SHOW REPLICA STATUS command. +2. The default timeout is 14400 seconds (4 hours). Timeout means that the system will no longer repair fragmented copies of specified tables or partitions with high priority. The command settings need to be reused. + +'35;'35; example + +1. Attempt to fix the specified table + +ADMIN REPAIR TABLE tbl1; + +2. Attempt to fix the specified partition + +ADMIN REPAIR TABLE tbl1 PARTITION (p1, p2); + +## keyword +ADMIN,REPAIR diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN SET CONFIG_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN SET CONFIG_EN.md new file mode 100644 index 00000000000000..1a17c5e1add684 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN SET CONFIG_EN.md @@ -0,0 +1,18 @@ +# ADMIN SET CONFIG +Description + +This statement is used to set the configuration items for the cluster (currently only the configuration items for setting FE are supported). +Settable configuration items can be viewed through AMDIN SHOW FRONTEND CONFIG; commands. + +Grammar: + +ADMIN SET FRONTEND CONFIG ("key" = "value"); + +'35;'35; example + +1. "disable balance" true + +ADMIN SET FRONTEND CONFIG ("disable_balance" = "true"); + +## keyword +ADMIN,SET,CONFIG diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG_EN.md new file mode 100644 index 00000000000000..7ff776e2b86040 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG_EN.md @@ -0,0 +1,27 @@ +# ADMIN SHOW CONFIG +Description + +This statement is used to show the configuration of the current cluster (currently only supporting the display of FE configuration items) + +Grammar: + +ADMIN SHOW FRONTEND CONFIG; + +Explain: + +The implications of the results are as follows: +1. Key: Configuration item name +2. Value: Configuration item value +3. Type: Configuration item type +4. IsMutable: 是否可以通过 ADMIN SET CONFIG 命令设置 +5. MasterOnly: 是否仅适用于 Master FE +6. Comment: Configuration Item Description + +'35;'35; example + +1. View the configuration of the current FE node + +ADMIN SHOW FRONTEND CONFIG; + +## keyword +ADMIN,SHOW,CONFIG diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION_EN.md new file mode 100644 index 00000000000000..c34fb9a9a228d3 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION_EN.md @@ -0,0 +1,25 @@ +# ADMIN SHOW REPLICA DISTRIBUTION +Description + +This statement is used to show the distribution status of a table or partition replica + +Grammar: + +ADMIN SHOW REPLICA DISTRIBUTION FROM [db_name.]tbl_name [PARTITION (p1, ...)]; + +Explain: + +The Graph column in the result shows the distribution ratio of replicas graphically + +'35;'35; example + +1. View the distribution of replicas of tables + +ADMIN SHOW REPLICA DISTRIBUTION FROM tbl1; + +2. View the distribution of copies of partitions in the table + +ADMIN SHOW REPLICA DISTRIBUTION FROM db1.tbl1 PARTITION(p1, p2); + +## keyword +ADMIN,SHOW,REPLICA,DISTRIBUTION diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS_EN.md new file mode 100644 index 00000000000000..53525c09a829ec --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS_EN.md @@ -0,0 +1,38 @@ +# ADMIN SHOW REPLICA STATUS +Description + +This statement is used to display copy status information for a table or partition + +Grammar: + +ADMIN SHOW REPLICA STATUS FROM [dbu name.]tbl name [PARTITION (p1,...)] +[where_clause]; + +where_clause: +WHERE STATUS [!]= "replica_status" + +Reply status: +OK: Replica 22788;'20581;' 29366;'24577; +DEAD: The Backend of replica is not available +VERSION_ERROR: The replica data version is missing +SCHEMA ERROR: replica schema hash +MISSING: replica does not exist + +'35;'35; example + +1. View the status of all copies of the table + +ADMIN SHOW REPLICA STATUS FROM db1.tbl1; + +2. View a copy of a partition state of the table as VERSION_ERROR + +ADMIN SHOW REPLICA STATUS FROM tbl1 PARTITION (p1, p2) + + +3. Check all unhealthy copies of the table + +ADMIN SHOW REPLICA STATUS FROM tbl1 +WHERE STATUS != "OK"; + +## keyword +ADMIN,SHOW,REPLICA,STATUS diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/ALTER CLUSTER_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/ALTER CLUSTER_EN.md new file mode 100644 index 00000000000000..ad43bc711254b6 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/ALTER CLUSTER_EN.md @@ -0,0 +1,27 @@ +OLD CLUSES +Description + +This statement is used to update the logical cluster. Administrator privileges are required + +grammar + +ALTER CLUSTER cluster_name PROPERTIES ("key"="value", ...); + +1. Scaling, scaling (according to the number of be existing in the cluster, large is scaling, small is scaling), scaling for synchronous operation, scaling for asynchronous operation, through the state of backend can be known whether the scaling is completed. + +Proerties ("Instrume = Unum"= "3") + +Instancefn Microsoft Yahei + +'35;'35; example + +1. Reduce the number of be of logical cluster test_cluster containing 3 be by 2. + +ALTER CLUSTER test_cluster PROPERTIES ("instance_num"="2"); + +2. Expansion, increase the number of be of logical cluster test_cluster containing 3 be to 4 + +ALTER CLUSTER test_cluster PROPERTIES ("instance_num"="4"); + +## keyword +ALTER,CLUSTER diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/ALTER SYSTEM_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/ALTER SYSTEM_EN.md new file mode 100644 index 00000000000000..3b69dc6e1f1aee --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/ALTER SYSTEM_EN.md @@ -0,0 +1,93 @@ +# ALTER SYSTEM +Description + +This statement is used to operate on nodes in a system. (Administrator only!) +Grammar: +1) Adding nodes (without multi-tenant functionality, add in this way) +ALTER SYSTEM ADD BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; +2) Adding idle nodes (that is, adding BACKEND that does not belong to any cluster) +ALTER SYSTEM ADD FREE BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; +3) Adding nodes to a cluster +ALTER SYSTEM ADD BACKEND TO cluster_name "host:heartbeat_port"[,"host:heartbeat_port"...]; +4) Delete nodes +ALTER SYSTEM DROP BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; +5) Node offline +ALTER SYSTEM DECOMMISSION BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; +6)226;- 21152;-Broker +ALTER SYSTEM ADD BROKER broker_name "host:port"[,"host:port"...]; +(7) 20943;"23569;" Broker +ALTER SYSTEM DROP BROKER broker_name "host:port"[,"host:port"...]; +8) Delete all Brokers +ALTER SYSTEM DROP ALL BROKER broker_name +9) Set up a Load error hub for centralized display of import error information +ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES ("key" = "value"[, ...]); + +Explain: +1) Host can be hostname or IP address +2) heartbeat_port is the heartbeat port of the node +3) Adding and deleting nodes are synchronous operations. These two operations do not take into account the existing data on the node, the node is directly deleted from the metadata, please use cautiously. +4) Node offline operations are used to secure offline nodes. This operation is asynchronous. If successful, the node will eventually be removed from the metadata. If it fails, the offline will not be completed. +5) The downline operation of the node can be cancelled manually. See CANCEL DECOMMISSION for details +6) Load error hub: +Currently, two types of Hub are supported: Mysql and Broker. You need to specify "type" = "mysql" or "type" = "broker" in PROPERTIES. +If you need to delete the current load error hub, you can set type to null. +1) When using the Mysql type, the error information generated when importing will be inserted into the specified MySQL library table, and then the error information can be viewed directly through the show load warnings statement. + +Hub of Mysql type needs to specify the following parameters: +guest guest +port:mysql port +user:mysql user +password:mysql password +database mysql database +table:mysql table + +2) When the Broker type is used, the error information generated when importing will form a file and be written to the designated remote storage system through the broker. Make sure that the corresponding broker is deployed +Hub of Broker type needs to specify the following parameters: +Broker: Name of broker +Path: Remote Storage Path +Other properties: Other information necessary to access remote storage, such as authentication information. + +'35;'35; example + +1. Add a node +ALTER SYSTEM ADD BACKEND "host:port"; + +2. Adding an idle node +ALTER SYSTEM ADD FREE BACKEND "host:port"; + +3. Delete two nodes +ALTER SYSTEM DROP BACKEND "host1:port", "host2:port"; + +4. Two downline nodes +ALTER SYSTEM DECOMMISSION BACKEND "host1:port", "host2:port"; + +5. Add two Hdfs Broker +ALTER SYSTEM ADD BROKER hdfs "host1:port", "host2:port"; + +6. Add a load error hub of Mysql type +ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES +("type"= "mysql", +"host" = "192.168.1.17" +"port" = "3306", +"User" = "my" name, +"password" = "my_passwd", +"database" = "doris_load", +"table" = "load_errors" +); + +7. 添加一个 Broker 类型的 load error hub +ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES +("type"= "broker", +"Name" = BOS, +"path" = "bos://backup-cmy/logs", +"bosu endpoint" ="http://gz.bcebos.com", +"bos_accesskey" = "069fc278xxxxxx24ddb522", +"bos_secret_accesskey"="700adb0c6xxxxxx74d59eaa980a" +); + +8. Delete the current load error hub +ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES +("type"= "null"); + +## keyword +AGE,SYSTEM,BACKGROUND,BROKER,FREE diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION_EN.md new file mode 100644 index 00000000000000..40a59122a65656 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION_EN.md @@ -0,0 +1,14 @@ +# CANCEL DECOMMISSION +Description + +This statement is used to undo a node's offline operation. (Administrator only!) +Grammar: +CANCEL DECOMMISSION BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; + +'35;'35; example + +1. Cancel the offline operation of two nodes: +CANCEL DECOMMISSION BACKEND "host1:port", "host2:port"; + +## keyword +CANCEL,DECOMMISSION,BACKEND diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/CREATE CLUSTER_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/CREATE CLUSTER_EN.md new file mode 100644 index 00000000000000..425e5d676f50f4 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/CREATE CLUSTER_EN.md @@ -0,0 +1,35 @@ +# CREATE CLUSTER +Description + +This statement is used to create a new logical cluster, requiring administrator privileges. If you don't use multiple tenants, create a cluster named default_cluster directly. Otherwise, create a cluster with a custom name. + +grammar + +CREATE CLUSTER [IF NOT EXISTS] cluster_name + +PROPERTIES ("key"="value", ...) + +IDENTIFIED BY 'password' + +1. PROPERTIES + +Specify attributes of logical clusters + +PROPERTIES ("instance_num" = "3") + +Instancefn Microsoft Yahei + +2. Identify by'password'each logical cluster contains a superuser whose password must be specified when creating a logical cluster + +'35;'35; example + +1. Create a new test_cluster with three be nodes and specify its superuser password + +CREATE CLUSTER test_cluster PROPERTIES("instance_num"="3") IDENTIFIED BY 'test'; + +2. Create a new default_cluster with three be nodes (no multi-tenant is used) and specify its superuser password + +CREATE CLUSTER default_cluster PROPERTIES("instance_num"="3") IDENTIFIED BY 'test'; + +## keyword +CREATE,CLUSTER diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/CREATE FILE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/CREATE FILE_EN.md new file mode 100644 index 00000000000000..25c03b350954c2 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/CREATE FILE_EN.md @@ -0,0 +1,50 @@ +# CREATE FILE +Description + +This statement is used to create and upload a file to the Doris cluster. +This function is usually used to manage files that need to be used in some other commands, such as certificates, public key, private key, etc. + +This command can be executed by users with amdin privileges only. +A file belongs to a database. This file can be used by users who have access to database. + +The size of a single file is limited to 1MB. +A Doris cluster uploads up to 100 files. + +Grammar: + +CREATE FILE "File name" [IN database] +[properties] + +Explain: +File_name: Custom file name. +Database: The file belongs to a db, and if not specified, the DB of the current session is used. +properties 支持以下参数: + +Url: Must. Specify a download path for a file. Currently only unauthenticated HTTP download paths are supported. When the command line succeeds, the file will be saved in Doris and the URL will no longer be required. +Catalog: Yes. The classification name of the file can be customized. But in some commands, files in the specified catalog are looked up. For example, in a routine import, when the data source is kafka, the file under the name of catalog is looked up. +Md5: Optional. MD5 of the file. If specified, it will be checked after downloading the file. + +'35;'35; example + +1. Create a file ca. pem, categorized as Kafka + +CREATE FILE "ca.pem" +PROPERTIES +( +"url" ="https://test.bj.bcebos.com /kafka -key /ca.pem", +"catalog" = "kafka" +); + +2. Create the file client. key, categorized as my_catalog + +CREATE FILE "client.key" +IN my database +PROPERTIES +( +"url" ="https://test.bj.bcebos.com /kafka -key /client.key", +"catalog" = "my_catalog", +"md5"= "b5bb901bf1099205b39a46ac3557dd9" +); + +## keyword +CREATE,FILE diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/DROP CLUSTER_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/DROP CLUSTER_EN.md new file mode 100644 index 00000000000000..254a038dc70039 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/DROP CLUSTER_EN.md @@ -0,0 +1,17 @@ +# DROP CLUSTER +Description + +This statement is used to delete logical cluster. Successful deletion of logical cluster requires first deleting dB in the cluster and administrator privileges. + +grammar + +DROP CLUSTER [IF EXISTS] cluster_name + +'35;'35; example + +Delete logical cluster test_cluster + +DROP CLUSTER test_cluster; + +## keyword +DROP,CLUSTER diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/DROP FILE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/DROP FILE_EN.md new file mode 100644 index 00000000000000..77614532f2605c --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/DROP FILE_EN.md @@ -0,0 +1,25 @@ +# DROP FILE +Description + +This statement is used to delete an uploaded file. + +Grammar: + +DROP FILE "file_name" [FROM database] +[properties] + +Explain: +File_name: File name. +Database: A DB to which the file belongs, if not specified, uses the DB of the current session. +properties 支持以下参数: + +Catalog: Yes. Classification of documents. + +'35;'35; example + +1. Delete the file ca.pem + +DROP FILE "ca.pem" properties("catalog" = "kafka"); + +## keyword +DROP,FILE diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/ENTER_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/ENTER_EN.md new file mode 100644 index 00000000000000..337ffb3ff62b18 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/ENTER_EN.md @@ -0,0 +1,18 @@ +ENTER +Description + +This statement is used to enter a logical cluster. All users and databases created need to be executed in a logical cluster. After creation, they belong to the logic. + +Cluster, need administrator privileges + +ENTER cluster name + +'35;'35; example + +1. Enter the logical cluster test_cluster + +ENTER test cluster; + +## keyword +ENTER + diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/LINK DATABASE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/LINK DATABASE_EN.md new file mode 100644 index 00000000000000..478e78f5c1e8aa --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/LINK DATABASE_EN.md @@ -0,0 +1,23 @@ +'35; LINK DATABASE +Description + +This statement allows users to link a database of one logical cluster to another logical cluster. A database is only allowed to be linked once at the same time and the linked database is deleted. + +It does not delete data, and the linked database cannot be deleted. Administrator privileges are required. + +grammar + +LINK DATABASE src u cluster name.src db name of the cluster name.des db name + +'35;'35; example + +1. Link test_db in test_cluster A to test_cluster B and name it link_test_db + +LINK DATABASE test_clusterA.test_db test_clusterB.link_test_db; + +2. Delete linked database link_test_db + +DROP DATABASE link_test_db; + +## keyword +LINK,DATABASE diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/MIGRATE DATABASE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/MIGRATE DATABASE_EN.md new file mode 100644 index 00000000000000..2b64b07f1d4a83 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/MIGRATE DATABASE_EN.md @@ -0,0 +1,19 @@ +# MIGRATE DATABASE +Description + +This statement is used to migrate a logical cluster database to another logical cluster. Before performing this operation, the database must be in a link state and need to be managed. + +Membership authority + +grammar + +MIGRATE DATABASE src u cluster name.src db name of the cluster name.des db name + +'35;'35; example + +1. 迁移test_clusterA中的test_db到test_clusterB + +MIGRATE DATABASE test_clusterA.test_db test_clusterB.link_test_db; + +## keyword +MIGRATE,DATABASE diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/SHOW BACKENDS_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/SHOW BACKENDS_EN.md new file mode 100644 index 00000000000000..7c146f51bbf398 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/SHOW BACKENDS_EN.md @@ -0,0 +1,21 @@ +# SHOW BACKENDS +Description +This statement is used to view BE nodes in the cluster +Grammar: +SHOW BACKENDS; + +Explain: +1. LastStartTime indicates the last BE start-up time. +2. LastHeartbeat represents the latest heartbeat. +3. Alive indicates whether the node survives. +4. System Decommissioned is true to indicate that the node is safely offline. +5. Cluster Decommissioned is true to indicate that the node is rushing downline in the current cluster. +6. TabletNum represents the number of fragments on the node. +7. Data Used Capacity represents the space occupied by the actual user data. +8. Avail Capacity represents the available space on the disk. +9. Total Capacity represents total disk space. Total Capacity = AvailCapacity + DataUsedCapacity + other non-user data files take up space. +10. UsedPct represents the percentage of disk usage. +11. ErrMsg is used to display error messages when a heartbeat fails. + +## keyword +SHOW, BACKENDS diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/SHOW BROKER_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/SHOW BROKER_EN.md new file mode 100644 index 00000000000000..576ca117acf32c --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/SHOW BROKER_EN.md @@ -0,0 +1,14 @@ +# SHOW BROKER +Description +This statement is used to view the existing broker +Grammar: +SHOW BROKER; + +Explain: +1. LastStartTime indicates the last BE start-up time. +2. LastHeartbeat represents the latest heartbeat. +3. Alive indicates whether the node survives. +4. ErrMsg is used to display error messages when the heartbeat fails. + +## keyword +SHOW, BROKER diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/SHOW FILE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/SHOW FILE_EN.md new file mode 100644 index 00000000000000..239ef389b19808 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/SHOW FILE_EN.md @@ -0,0 +1,26 @@ +# SHOW FILE +Description + +This statement is used to show a file created in a database + +Grammar: + +SHOW FILE [FROM database]; + +Explain: + +FileId: File ID, globally unique +DbName: The name of the database to which it belongs +Catalog: Custom Categories +FileName: File name +FileSize: File size, unit byte +MD5: Document MD5 + +'35;'35; example + +1. View uploaded files in my_database + +SHOW FILE FROM my_database; + +## keyword +SHOW,FILE diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/SHOW FRONTENDS_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/SHOW FRONTENDS_EN.md new file mode 100644 index 00000000000000..9cb7379fdde63c --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/SHOW FRONTENDS_EN.md @@ -0,0 +1,17 @@ +# SHOW FRONTENDS +Description +This statement is used to view FE nodes +Grammar: +SHOW FRONTENDS; + +Explain: +1. name denotes the name of the FE node in bdbje. +2. Join is true to indicate that the node has joined the cluster. But it doesn't mean that it's still in the cluster (it may be out of touch) +3. Alive indicates whether the node survives. +4. Replayed Journal Id represents the maximum metadata log ID that the node has currently replayed. +5. LastHeartbeat is the latest heartbeat. +6. IsHelper indicates whether the node is a helper node in bdbje. +7. ErrMsg is used to display error messages when the heartbeat fails. + +## keyword +SHOW, FRONTENDS diff --git a/docs/documentation/en/sql-reference/sql-statements/Administration/SHOW MIGRATIONS_EN.md b/docs/documentation/en/sql-reference/sql-statements/Administration/SHOW MIGRATIONS_EN.md new file mode 100644 index 00000000000000..dca606cff71635 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Administration/SHOW MIGRATIONS_EN.md @@ -0,0 +1,11 @@ +# SHOW MIGRATIONS +Description + +This statement is used to view the progress of database migration + +grammar + +SHOW MIGRATIONS + +## keyword +SHOW,MIGRATIONS diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/ALTER DATABASE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/ALTER DATABASE_EN.md new file mode 100644 index 00000000000000..b4c436b2722cf9 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/ALTER DATABASE_EN.md @@ -0,0 +1,29 @@ +'35; OLD DATABASE +Description +This statement is used to set the properties of the specified database. (Administrators only) +Grammar: +1) Setting database data quota in B/K/KB/M/MB/G/GB/T/TB/P/PB +OTHER DATABASE dbu name SET DATA QUOTA quota; + +2) Rename the database +ALTER DATABASE db_name RENAME new_db_name; + +Explain: +After renaming the database, use REVOKE and GRANT commands to modify the corresponding user rights if necessary. + +'35;'35; example +1. Setting the specified database data quota +ALTER DATABASE example_db SET DATA QUOTA 10995116277760; +The above units are bytes, equivalent to +ALTER DATABASE example_db SET DATA QUOTA 10T; + +ALTER DATABASE example_db SET DATA QUOTA 100G; + +ALTER DATABASE example_db SET DATA QUOTA 200M; + +2. Rename the database example_db to example_db2 +ALTER DATABASE example_db RENAME example_db2; + +## keyword +ALTER,DATABASE,RENAME + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/ALTER TABLE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/ALTER TABLE_EN.md new file mode 100644 index 00000000000000..e8bec9f98c04fe --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/ALTER TABLE_EN.md @@ -0,0 +1,240 @@ +# ALTER TABLE +Description +This statement is used to modify an existing table. If no rollup index is specified, the default operation is base index. +该语句分为三种操作类型: schema change 、rollup 、partition +These three types of operations cannot appear in an ALTER TABLE statement at the same time. +Where schema change and rollup are asynchronous operations, task submission returns if it succeeds. You can then use the SHOW ALTER command to view progress. +Partition is a synchronous operation, and the return of the command indicates that the execution is complete. + +Grammar: +ALTER TABLE [database.]table +alter_clause1[, alter_clause2, ...]; + +alter_clause 分为 partition 、rollup、schema change 和 rename 四种。 + +partition 支持如下几种修改方式 +1. Adding partitions +Grammar: +ADD PARTITION [IF NOT EXISTS] partition_name VALUES LESS THAN [MAXVALUE|("value1")] ["key"="value"] +[DISTRIBUTED BY HASH (k1[,k2 ...]) [BUCKETS num]] +Be careful: +1) The partition is left-closed and right-open, the user specifies the right boundary, and the system automatically determines the left boundary. +2) If no bucket-dividing method is specified, the bucket-dividing method used in table-building will be used automatically. +3) If the barrel-dividing method is specified, only the number of barrels can be modified, but not the barrel-dividing method or the barrel-dividing column. +4) The ["key"= "value"] section can set some properties of the partition, as specified in CREATE TABLE + +2. Delete partitions +Grammar: +DROP PARTITION [IF EXISTS] partition_name +Be careful: +1) A partitioned table should have at least one partition. +2) During the execution of DROP PARTITION, deleted partitions can be restored through RECOVER statements. See RECOVER statement for details + +3. Modify partition attributes +Grammar: +MODIFY PARTITION partition u name SET ("key" ="value",...) +Explain: +1) Currently, three attributes, storage_medium, storage_cooldown_time and replication_num, are supported to modify partitions. +2) For a single partition table, partition_name is the same table name. + +Rollup supports the following ways of creation: +One. 1.2.1.1.1.1.1.1.1. +Grammar: +ADD ROLLUP rollup_name (column_name1, column_name2, ...) +[FROM from index name] +[PROPERTIES ("key"="value", ...)] +Be careful: +1) If no from_index_name is specified, it is created by default from base index +2) The column in the rollup table must be an existing column from_index +3) In properties, you can specify the storage format. See CREATE TABLE for details. + +2. 1.2.2.2.2.2.2.2.2. +Grammar: +DROP ROLLUP rollup_name +[PROPERTIES ("key"="value", ...)] +Be careful: +1) Base index cannot be deleted +2) During the execution of DROP ROLLUP, the deleted rollup index can be restored by RECOVER statement. See RECOVER statement for details + + +schema change 支持如下几种修改方式: +1. Add a column to the specified index location +Grammar: +ADD COLUMN column_name column_type [KEY | agg_type] [DEFAULT "default_value"] +[AFTER column_name|FIRST] +[TO rollup_index_name] +[PROPERTIES ("key"="value", ...)] +Be careful: +1) If the value column is added to the aggregation model, agg_type needs to be specified +2) If the key column is added to the non-aggregate model, KEY keywords need to be specified. +3) Cannot add columns already existing in base index in rollup index +If necessary, you can re-create a rollup index. + +2. Add multiple columns to the specified index +Grammar: +ADD COLUMN (column_name1 column_type [KEY | agg_type] DEFAULT "default_value", ...) +[TO rollup_index_name] +[PROPERTIES ("key"="value", ...)] +Be careful: +1) If the value column is added to the aggregation model, agg_type needs to be specified +2) If the key column is added to the non-aggregate model, KEY keywords need to be specified. +3) Cannot add columns already existing in base index in rollup index +(You can re-create a rollup index if you need to) + +3. Delete a column from the specified index +Grammar: +DROP COLUMN column_name +[FROM rollup_index_name] +Be careful: +1) Partition columns cannot be deleted +2) If a column is deleted from base index, it will also be deleted if it is included in rollup index + +4. Modify the column type and column location of the specified index +Grammar: +MODIFY COLUMN column_name column_type [KEY | agg_type] [NULL | NOT NULL] [DEFAULT "default_value"] +[AFTER column_name|FIRST] +[FROM rollup_index_name] +[PROPERTIES ("key"="value", ...)] +Be careful: +1) The aggregation model needs to specify agg_type if it modifies the value column +2) If you modify the key column for a non-aggregated type, you need to specify the KEY keyword +3) The type of column can only be modified, and the other attributes of the column remain the same (that is, other attributes should be explicitly written in the statement according to the original attributes, see example 8). +4) Partition column cannot be modified +5) The following types of conversion are currently supported (accuracy loss is guaranteed by users) +TINYINT/SMALLINT/INT/BIGINT is converted to TINYINT/SMALLINT/INT/BIGINT/DOUBLE. +LARGEINT 转换成 DOUBLE +VARCHAR 25345;'20462;' 25913;'38271;' 24230s; +6) Conversion from NULL to NOT NULL is not supported + +5. Reordering columns with specified index +Grammar: +ORDER BY (column_name1, column_name2, ...) +[FROM rollup_index_name] +[PROPERTIES ("key"="value", ...)] +Be careful: +1) All columns in index should be written out +2) Value is listed after the key column + +6. Modify table attributes, which currently support modifying bloom filter columns and colocate_with attributes +Grammar: +PROPERTIES ("key"="value") +Be careful: +You can also incorporate it into the schema change operation above to modify it, as shown in the following example + + +Rename supports the modification of the following names: +1. Modify the table name +Grammar: +RENAME new_table_name; + +2. 1.2.2.5.5.5.5.;5.5.5.5.5.5. +Grammar: +RENAME ROLLUP old_rollup_name new_rollup_name; + +3. 修改 partition 名称 +Grammar: +Rename old partition name and new partition name + +'35;'35; example +[partition] +1. Increase partitions, existing partitions [MIN, 2013-01-01], increase partitions [2013-01-01, 2014-01-01], using default bucket partitioning +ALTER TABLE example_db.my_table +ADD PARTITION p1 VALUES LESS THAN ("2014-01-01"); + +2. Increase partitions and use new buckets +ALTER TABLE example_db.my_table +ADD PARTITION p1 VALUES LESS THAN ("2015-01-01") +DISTRIBUTED BY HASH(k1) BUCKETS 20; + +3. Delete partitions +ALTER TABLE example_db.my_table +DROP PARTITION p1; + +[rollup] +1. Create index: example_rollup_index, based on base index (k1, k2, k3, v1, v2). Formula storage. +ALTER TABLE example_db.my_table +ADD ROLLUP example_rollup_index(k1, k3, v1, v2) +PROPERTIES("storage_type"="column"); + +2. Create index: example_rollup_index2, based on example_rollup_index (k1, k3, v1, v2) +ALTER TABLE example_db.my_table +ADD ROLLUP example_rollup_index2 (k1, v1) +FROM example_rollup_index; + +3. Delete index: example_rollup_index2 +ALTER TABLE example_db.my_table +DROP ROLLUP example_rollup_index2; + +[schema change] +1. Add a key column new_col (non-aggregate model) to col1 of example_rollup_index +ALTER TABLE example_db.my_table +ADD COLUMN new_col INT KEY DEFAULT "0" AFTER col1 +TO example_rollup_index; + +2. Add a value column new_col (non-aggregate model) to col1 of example_rollup_index +ALTER TABLE example_db.my_table +ADD COLUMN new_col INT DEFAULT "0" AFTER col1 +TO example_rollup_index; + +3. Add a key column new_col (aggregation model) to col1 of example_rollup_index +ALTER TABLE example_db.my_table +ADD COLUMN new_col INT DEFAULT "0" AFTER col1 +TO example_rollup_index; + +4. Add a value column new_col SUM aggregation type (aggregation model) to col1 of example_rollup_index +ALTER TABLE example_db.my_table +ADD COLUMN new_col INT SUM DEFAULT "0" AFTER col1 +TO example_rollup_index; + +5. Add multiple columns to example_rollup_index (aggregation model) +ALTER TABLE example_db.my_table +ADD COLUMN (col1 INT DEFAULT "1", col2 FLOAT SUM DEFAULT "2.3") +TO example_rollup_index; + +6. Delete a column from example_rollup_index +ALTER TABLE example_db.my_table +DROP COLUMN col2 +FROM example_rollup_index; + +7. Modify the col1 column type of base index to BIGINT and move to the back of col2 column +ALTER TABLE example_db.my_table +MODIFY COLUMN col1 BIGINT DEFAULT "1" AFTER col2; + +8. 修改 base index 的 val1 列最大长度。原 val1 为 (val1 VARCHAR(32) REPLACE DEFAULT "abc") +ALTER TABLE example_db.my_table +MODIFY COLUMN val1 VARCHAR(64) REPLACE DEFAULT "abc"; + +9. Rearrange the columns in example_rollup_index (set the original column order to k1, k2, k3, v1, v2) +ALTER TABLE example_db.my_table +ORDER BY (k3,k1,k2,v2,v1) +FROM example_rollup_index; + +10. Perform two operations simultaneously +ALTER TABLE example_db.my_table +ADD COLUMN v2 INT MAX DEFAULT "0" AFTER k2 TO example_rollup_index, +ORDER BY (k3,k1,k2,v2,v1) FROM example_rollup_index; + +11. 20462;- 259130;-bloom filter -210151; +ALTER TABLE example_db.my_table SET ("bloom_filter_columns"="k1,k2,k3"); + +You can also incorporate it into the schema change operation above (note that the grammar of multiple clauses is slightly different) +ALTER TABLE example_db.my_table +DROP COLUMN col2 +PROPERTIES ("bloom_filter_columns"="k1,k2,k3"); + +12. Modify the Colocate property of the table +ALTER TABLE example_db.my_table set ("colocate_with"="t1"); + +[Rename] +1. Modify the table named Table 1 to table2 +ALTER TABLE table1 RENAME table2; + +2. 将表 example_table 中名为 rollup1 的 rollup index 修改为 rollup2 +ALTER TABLE example_table RENAME ROLLUP rollup1 rollup2; + +3. 将表 example_table 中名为 p1 的 partition 修改为 p2 +ALTER TABLE example_table RENAME PARTITION p1 p2; + +## keyword +ALTER,TABLE,ROLLUP,COLUMN,PARTITION,RENAME + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/BACKUP_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/BACKUP_EN.md new file mode 100644 index 00000000000000..da56070feb03ef --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/BACKUP_EN.md @@ -0,0 +1,39 @@ +# BACKUP +Description +This statement is used to backup data under the specified database. This command is an asynchronous operation. After successful submission, you need to check progress through the SHOW BACKUP command. Only tables of OLAP type are backed up. +Grammar: +BACKUP SNAPSHOT [db_name].{snapshot_name} +TO `repository_name` +ON ( +"`Table `U name'[Distriction (`P1',...)], +... +) +PROPERTIES ("key"="value", ...); + +Explain: +1. Only one BACKUP or RESTORE task can be performed under the same database. +2. The ON clause identifies the tables and partitions that need to be backed up. If no partition is specified, all partitions of the table are backed up by default. +3. PROPERTIES currently supports the following attributes: +"Type" = "full": means that this is a full update (default). +"Timeout" = "3600": Task timeout, default to one day. Unit seconds. + +'35;'35; example + +1. Back up the table example_tbl under example_db in full to the warehouse example_repo: +BACKUP SNAPSHOT example_db.snapshot_label1 +TO example repo +On (example tbl) +PROPERTIES ("type" = "full"); + +2. Under full backup example_db, the P1 and P2 partitions of table example_tbl, and table example_tbl2 to warehouse example_repo: +BACKUP SNAPSHOT example_db.snapshot_label2 +TO example repo +ON +( +example_tbl PARTITION (p1,p2), +Example: +); + +## keyword +BACKUP + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/CANCEL ALTER_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/CANCEL ALTER_EN.md new file mode 100644 index 00000000000000..d731bdc8a54609 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/CANCEL ALTER_EN.md @@ -0,0 +1,32 @@ +# CANCEL ALTER +Description +This statement is used to undo an ALTER operation. +1. 撤销 ALTER TABLE COLUMN 操作 +Grammar: +CANCEL ALTER TABLE COLUMN +FROM db_name.table_name + +2. 撤销 ALTER TABLE ROLLUP 操作 +Grammar: +CANCEL ALTER TABLE ROLLUP +FROM db_name.table_name + +2. OTHER CLUSTER +Grammar: +(To be realized... + + +'35;'35; example +[CANCEL ALTER TABLE COLUMN] +1. 撤销针对 my_table 的 ALTER COLUMN 操作。 +CANCEL ALTER TABLE COLUMN +FROM example_db.my_table; + +[CANCEL ALTER TABLE ROLLUP] +1. 撤销 my_table 下的 ADD ROLLUP 操作。 +CANCEL ALTER TABLE ROLLUP +FROM example_db.my_table; + +## keyword +CANCEL,ALTER,TABLE,COLUMN,ROLLUP + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/CANCEL BACKUP_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/CANCEL BACKUP_EN.md new file mode 100644 index 00000000000000..80cc609169a7b9 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/CANCEL BACKUP_EN.md @@ -0,0 +1,13 @@ +# CANCEL BACKUP +Description +This statement is used to cancel an ongoing BACKUP task. +Grammar: +CANCEL BACKUP FROM db_name; + +'35;'35; example +1. Cancel the BACKUP task under example_db. +CANCEL BACKUP FROM example_db; + +## keyword +CANCEL, BACKUP + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/CANCEL RESTORE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/CANCEL RESTORE_EN.md new file mode 100644 index 00000000000000..d83050468929ad --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/CANCEL RESTORE_EN.md @@ -0,0 +1,16 @@ +# CANCEL RESTORE +Description +This statement is used to cancel an ongoing RESTORE task. +Grammar: +CANCEL RESTORE FROM db_name; + +Be careful: +When the recovery is abolished around the COMMIT or later stage, the restored tables may be inaccessible. At this point, data recovery can only be done by performing the recovery operation again. + +'35;'35; example +1. Cancel the RESTORE task under example_db. +CANCEL RESTORE FROM example_db; + +## keyword +CANCEL, RESTORE + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE DATABASE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE DATABASE_EN.md new file mode 100644 index 00000000000000..57d598f7832f5a --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE DATABASE_EN.md @@ -0,0 +1,13 @@ +# CREATE DATABASE +Description +This statement is used to create a new database +Grammar: +CREATE DATABASE [IF NOT EXISTS] db_name; + +'35;'35; example +1. New database db_test +CREATE DATABASE db_test; + +## keyword +CREATE,DATABASE + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY_EN.md new file mode 100644 index 00000000000000..29b39513f60d12 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY_EN.md @@ -0,0 +1,49 @@ +# CREATE REPOSITORY +Description +This statement is used to create the warehouse. The warehouse is used for backup or recovery. Only root or superuser users can create warehouses. +Grammar: +CREATE [READ ONLY] REPOSITORY `repo_name` +WITH BROKER `broker_name` +ON LOCATION `repo_location` +PROPERTIES ("key"="value", ...); + +Explain: +1. The creation of warehouses depends on existing brokers +2. If it is a read-only warehouse, it can only be restored on the warehouse. If not, you can backup and restore operations. +3. According to the different types of broker, PROPERTIES is different, see the example. + +'35;'35; example +1. Create a warehouse named bos_repo, which relies on BOS broker "bos_broker", and the data root directory is: bos://palo_backup. +CREATE REPOSITORY `bos_repo` +WITH BROKER `bos_broker` +ON LOCATION "bos://palo_backup" +PROPERTIES +( +"bosu endpoint" ="http://gz.bcebos.com", +"bos_accesskey" = "069fc2786e664e63a5f111111114ddbs22", +"bos_secret_accesskey"="70999999999999de274d59eaa980a" +); + +2. Create the same warehouse as in Example 1, but with read-only attributes: +CREATE READ ONLY REPOSITORY `bos_repo` +WITH BROKER `bos_broker` +ON LOCATION "bos://palo_backup" +PROPERTIES +( +"bosu endpoint" ="http://gz.bcebos.com", +"bos_accesskey" = "069fc2786e664e63a5f111111114ddbs22", +"bos_secret_accesskey"="70999999999999de274d59eaa980a" +); + +3. Create a warehouse named hdfs_repo, which relies on Baidu HDFS broker "hdfs_broker", and the data root directory is: hdfs://hadoop-name-node:54310/path/to/repo./ +CREATE REPOSITORY `hdfs_repo` +WITH BROKER `hdfs_broker` +ON LOCATION "hdfs://hadoop-name-node:54310/path/to/repo/" +PROPERTIES +( +"Username" = "User" +"password" = "password" +); + +## keyword +CREATE REPOSITORY diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE TABLE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE TABLE_EN.md new file mode 100644 index 00000000000000..a39d07cbe601bf --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE TABLE_EN.md @@ -0,0 +1,334 @@ +# CREATE TABLE +Description +This statement is used to create a table. +Grammar: +CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [database.]table_name +(column_definition1[, column_definition2, ...]) +[ENGINE = [olap|mysql|broker]] +[key squeaks] +[Distriction & UDESC] +[Distribution & UDESC] +[PROPERTIES ("key"="value", ...)]; +[BROKER PROPERTIES ("key"="value", ...)]; + +1. column_definition +Grammar: +col_name col_type [agg_type] [NULL | NOT NULL] [DEFAULT "default_value"] + +Explain: +Col_name: Column name +Col_type: Column type +TINYINT (1 byte) +Scope: - 2 ^ 7 + 1 - 2 ^ 7 - 1 +SMALLINT (2 bytes) +Scope: - 2 ^ 15 + 1 - 2 ^ 15 - 1 +INT (4 bytes) +Scope: - 2 ^ 31 + 1 - 2 ^ 31 - 1 +BIGINT (8 bytes) +Scope: - 2 ^ 63 + 1 - 2 ^ 63 - 1 +LARGEINT (16 bytes) +Scope: 0 - 2 ^ 127 - 1 +FLOAT (4 bytes) +Supporting scientific counting +DOUBLE (12 bytes) +Supporting scientific counting +Decima [(precision, scale)] (4023383; 33410) +The decimal type guaranteeing accuracy. Default is DECIMAL (10, 0) +Precision: 1 ~27 +scale: 0 ~ 9 +The integer part is 1 - 18 +No support for scientific counting +DATE (3 bytes) +Scope: 1900-01-01-9999-12-31 +DATETIME (8 bytes) +Scope: 1900-01:00:00-9999-12-31:23:59:59 +CHAR[(length)] +Fixed-length string. Length range: 1 - 255. Default 1 +VARCHAR[(length)] +Variable length string. Length range: 1 - 65533 +HLL (1 ~1638520010;* 33410s) +HLL column type, no need to specify length and default value, length aggregation based on data +Degree system internal control, and HLL columns can only be queried or used by matching hll_union_agg, Hll_cardinality, hll_hash + +Agg_type: The aggregation type, if not specified, is listed as the key column. Otherwise, it is listed as value column +SUM, MAX, MIN, REPLACE, HLL_UNION (only for HLL columns, unique aggregation of HLL) +This type is only useful for aggregation models (the type of key_desc is AGGREGATE KEY), and other models do not need to specify this. + +Whether NULL is allowed or not: NULL is not allowed by default. NULL values are represented in imported data byN + +2. ENGINE 类型 +The default is olap. Optional mysql, broker +1) If it is mysql, you need to provide the following information in properties: + +PROPERTIES ( +"host" ="mysql server" host, +"port" = "mysql_server_port", +"user" = "your_user_name", +"password" = "your_password", +"database" ="database" u name, +"table" = "table_name" +) + +Be careful: +The "table_name" in the "table" entry is the real table name in mysql. +The table_name in the CREATE TABLE statement is the name of the MySQL table in Palo, which can be different. + +The purpose of creating MySQL tables in Palo is to access the MySQL database through Palo. +Palo itself does not maintain or store any MySQL data. +2) If it is a broker, it means that the access to tables needs to be through the specified broker, and the following information needs to be provided in properties: +PROPERTIES ( +"broker"u name "="broker "u name", +"paths" = "file_path1[,file_path2]", +Columbus@U separator="value@u separator" +"line_delimiter" = "value_delimiter" +) +In addition, you need to provide the property information Broker needs to pass through BROKER PROPERTIES, such as HDFS needs to be imported. +BROKER PROPERTIES ( +"Username" = "name" +"password" = "password" +) +Depending on the Broker type, the content that needs to be passed in is different. +Be careful: +If there are multiple files in "paths", split them with commas [,]. If the file name contains commas, use% 2C instead. If the file name is included, use% 25 instead. +Now the file content format supports CSV, GZ, BZ2, LZ4, LZO (LZOP) compression format. + +THREE. Key u descu +Grammar: +Key type (k1 [,k2...]) +Explain: +The data is sorted according to the specified key column and has different characteristics according to different key_types. +Key_type supports some types: +AGGREGATE KEY: The key column has the same record, and the value column aggregates according to the specified aggregation type. +Suitable for business scenarios such as report, multi-dimensional analysis, etc. +UNIQUE KEY: The key column has the same record, and the value column is overwritten in the import order. +It is suitable for the point query business of adding, deleting and modifying by key column. +DUPLICATE KEY: Key column has the same record and exists in Palo. +Suitable for business scenarios where detailed data is stored or data is not aggregated. +Be careful: +Except for AGGREGATE KEY, other key_types do not require the value column to specify the aggregation type when building tables. + +Four Division +1) Range 分区 +Grammar: +PARTITION BY RANGE (k1, k2, ...) +( +PARTITION partition_name VALUES LESS THAN MAXVALUE|("value1", "value2", ...) +PARTITION partition_name VALUES LESS THAN MAXVALUE|("value1", "value2", ...) +... +) +Explain: +Partitioning using the specified key column and the specified range of values. +1) The partition name only supports letters at the beginning, letters, numbers, and underscores +2) Currently, only the following types of columns are supported as Range partition columns, and only one partition column can be specified. +Tinyint, smallint, int, bigint, largeinet, date, date +3) The partition is left-closed and right-open, and the left boundary of the first partition is the minimum. +4) NULL values are stored only in partitions containing minimum values. When the partition containing the minimum value is deleted, the NULL value cannot be imported. +5) You can specify one or more columns as partition columns. If the partition value is default, the minimum value is filled by default. + +Be careful: +1) Partitions are generally used for data management in time dimension +2) If there is a need for data backtracking, the first partition can be considered as an empty partition in order to increase the number of partitions in the future. + +Five distribution +(1) Hash -20998;` 26742; +Grammar: +DISTRIBUTED BY HASH (k1[,k2 ...]) [BUCKETS num] +Explain: +Hash buckets using the specified key column. The default partition number is 10 + +Suggestion: Hash bucket dividing method is recommended. + +6. PROPERTIES +1) If the ENGINE type is olap, you can specify a column store in properties (currently we only support column store) + +PROPERTIES ( +"storage_type" = "[column]", +) + +2) If ENGINE type is OLAP +You can set the initial storage medium, storage expiration time, and number of copies of the table data in properties. + +PROPERTIES ( +"storage_medium" = "[SSD|HDD]", +["Storage = U cooldown = time" = "YYY-MM-DD HH: mm: ss"], +["Replication = Unum" = "3"] +) + +Storage_medium: The initial storage medium used to specify the partition can be SSD or HDD. The default is HDD. +Storage_cooldown_time: When the storage medium is set to SSD, specify the storage expiration time of the partition on SSD. +Store by default for 7 days. +The format is: "yyyy-MM-dd HH: mm: ss" +Replication_num: The number of copies of the specified partition. Default 3 + +When a table is a single partitioned table, these attributes are attributes of the table. +When tables are two-level partitions, these attributes are attached to each partition. +If you want different partitions to have different attributes. It can be operated through ADD PARTITION or MODIFY PARTITION + +3) 如果 Engine 类型为 olap, 并且 storage_type 为 column, 可以指定某列使用 bloom filter 索引 +Blooming filter index is only applicable to the case where the query conditions are in and equal. The more decentralized the values of the column, the better the effect. +Currently, only the following columns are supported: key columns other than TINYINT FLOAT DOUBLE type and value columns aggregated by REPLACE + +PROPERTIES ( +"bloom_filter_columns"="k1,k2,k3" +) +4) If you want to use the Colocate Join feature, you need to specify it in properties + +PROPERTIES ( +"colocate_with"="table1" +) + +'35;'35; example +1. Create an OLAP table, use HASH buckets, use column storage, aggregate records of the same key +CREATE TABLE example_db.table_hash +( +k1 DURATION, +K2 Decima (10,2) Default "10.5", +v1 CHAR(10) REPLACE, +v2 INT SUM +) +ENGINE=olap +AGGREGATE KEY (k1, k2) +DISTRIBUTED BY HASH(k1) BUCKETS 32 +PROPERTIES ("storage_type"="column"); + +2. Create an OLAP table, use Hash bucket, use column storage, and overwrite the same key record. +Setting initial storage medium and cooling time +CREATE TABLE example_db.table_hash +( +k1 BIGINT +k2 LARGEINT, +v1 VARCHAR(2048) REPLACE, +v2 SMALLINT SUM DEFAULT "10" +) +ENGINE=olap +KEY (k1, K2) UNIT +DISTRIBUTED BY HASH (k1, k2) BUCKETS 32 +PROPERTIES( +"storage_type"="column", +"storage_medium" = "SSD", +"Storage = U cooldown time" = "2015-06-04:00:00:00:00: +); + +3. Create an OLAP table, use Key Range partition, use Hash bucket, default column storage. +Records of the same key coexist, setting the initial storage medium and cooling time +CREATE TABLE example_db.table_range +( +k1 DATE, +k2 INT +k3 SMALL +v1 VARCHAR (2048), +V2 DATETIME DEFAULT "2014 -02 -04 15:36:00" +) +ENGINE=olap +DUPLICATE KEY (k1, k2, k3) +PARTITION BY RANGE (k1) +( +The partition value of P1 is less than ("2014-01-01"). +The segmentation value of P2 is lower than that of ("2014-06-01"). +The partition value of P3 is less than ("2014-12-01") +) +DISTRIBUTED BY HASH(k2) BUCKETS 32 +PROPERTIES( +"Storage = U Medium"= "SSD", "Storage = U Cooldown = U Time"= "2015-06-04:00:00:00:00: +); + +Explain: +This statement divides the data into the following three partitions: +({MIN}, {"2014 -01 -01"}} +[ {"2014-01-01"}, {"2014-06-01"} ) +[ {"2014-06-01"}, {"2014-12-01"} ) + +Data that is not within these partitions will be treated as illegal data being filtered + +4. Create a MySQL table +CREATE TABLE example_db.table_mysql +( +k1 DATE, +k2 INT +k3 SMALL +k4 VARCHAR (2048), +K5 DATE +) +ENGINE=mysql +PROPERTIES +( +"host" = "127.0.0.1", +"port" = "8239", +"user" = "mysql_user", +"password" = "mysql_passwd", +"database" ="mysql" db test, +"table" = "mysql_table_test" +) + +5. Create a broker external table where the data file is stored on HDFS, and the data is split by "|" and "\ n" newline +CREATE EXTERNAL TABLE example_db.table_broker ( +k1 DATE, +k2 INT +k3 SMALL +k4 VARCHAR (2048), +K5 DATE +) +ENGINE=broker +PROPERTIES ( +"broker" u name ="hdfs", +"path" ="hdfs http://hdfs -u -host:hdfs" port /data1,hdfs http://hdfs -u -host:hdfs -u -port /data3%2c4 ", +"Column"= U separator"= 124;" +"line_delimiter" = "\n" +) +BROKER PROPERTIES ( +"Username" = "HDFS\\ user" +"password" = "hdfs_password" +) + +6. Create a table with HLL columns +CREATE TABLE example_db.example_table +( +k1 DURATION, +K2 Decima (10,2) Default "10.5", +THE EUROPEAN UNION, +V2 HLL HLL UNION +) +ENGINE=olap +AGGREGATE KEY (k1, k2) +DISTRIBUTED BY HASH(k1) BUCKETS 32 +PROPERTIES ("storage_type"="column"); + +7. Create two tables T1 and T2 that support Colocat Join +CREATE TABLE `t1` ( +`id` int(11) COMMENT "", +'value ` varchar (8) COMMENT "" +) ENGINE=OLAP +DUPLICATE KEY(`id`) +DISTRIBUTED BY HASH(`id`) BUCKETS 10 +PROPERTIES ( +"colocate_with" = "t1" +); + +CREATE TABLE `t2` ( +`id` int(11) COMMENT "", +'value ` varchar (8) COMMENT "" +) ENGINE=OLAP +DUPLICATE KEY(`id`) +DISTRIBUTED BY HASH(`id`) BUCKETS 10 +PROPERTIES ( +"colocate_with" = "t1" +); + +8. Create a broker external table with data files stored on BOS +CREATE EXTERNAL TABLE example_db.table_broker ( +date +) +ENGINE=broker +PROPERTIES ( +"broker_name" = "bos", +"path" = "bos://my_bucket/input/file", +) +BROKER PROPERTIES ( +"bosu endpoint" ="http://bj.bcebos.com", +"bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx", +"bos_secret_accesskey"="yyyyyyyyyyyyyyyyyyyy" +) + +## keyword +CREATE,TABLE + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE VIEW_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE VIEW_EN.md new file mode 100644 index 00000000000000..dd8c2707f281e6 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/CREATE VIEW_EN.md @@ -0,0 +1,22 @@ +# CREATE VIEW +Description +This statement is used to create a logical view +Grammar: +CREATE VIEW [IF NOT EXISTS] +[db_name.]view_name (column1[, column2, ...]) +AS query + +Explain: +1. Views are logical views without physical storage. All queries on views are equivalent to sub-queries corresponding to views. +2. Query_stmt is arbitrarily supported SQL + +'35;'35; example +1. Create view example_view on example_db +CREATE VIEW example_db.example_view (k1, k2, k3, v1) +AS +SELECT c1 as k1, k2, k3, SUM(v1) FROM example_table +WHERE k1 = 20160112 GROUP BY k1,k2,k3; + +## keyword +CREATE,VIEW + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/Colocate Join_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/Colocate Join_EN.md new file mode 100644 index 00000000000000..7ddf1ee25bfb2f --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/Colocate Join_EN.md @@ -0,0 +1,72 @@ +# Colocate Join +Description +Colocate/Local Join means that when multiple nodes are Join, there is no data movement and network transmission, and each node is only Join locally. +The premise of Join locally is to import data from the same Join Key into a fixed node according to the same rules. + +1 How To Use: + +Simply add the property colocate_with when building a table. The value of colocate_with can be set to any one of the same set of colocate tables. +However, you need to ensure that tables in the colocate_with attribute are created first. + +If you need to Colocate Join table t1 and t2, you can build tables according to the following statements: + +CREATE TABLE `t1` ( +`id` int(11) COMMENT "", +'value ` varchar (8) COMMENT "" +) ENGINE=OLAP +DUPLICATE KEY(`id`) +DISTRIBUTED BY HASH(`id`) BUCKETS 10 +PROPERTIES ( +"colocate_with" = "t1" +); + +CREATE TABLE `t2` ( +`id` int(11) COMMENT "", +'value ` varchar (8) COMMENT "" +) ENGINE=OLAP +DUPLICATE KEY(`id`) +DISTRIBUTED BY HASH(`id`) BUCKETS 10 +PROPERTIES ( +"colocate_with" = "t1" +); + +2 Colocate Join 目前的限制: + +1. Colcoate Table must be an OLAP-type table +2. The BUCKET number of tables with the same colocate_with attribute must be the same +3. The number of copies of tables with the same colocate_with attribute must be the same +4. Data types of DISTRIBUTTED Columns for tables with the same colocate_with attribute must be the same + +3 Colocate Join's applicable scenario: + +Colocate Join is well suited for scenarios where tables are bucketed according to the same field and high frequency according to the same field Join. + +4 FAQ: + +Q: 支持多张表进行Colocate Join 吗? + +A: 25903;. 25345 + +Q: Do you support Colocate table and normal table Join? + +A: 25903;. 25345 + +Q: Does the Colocate table support Join with non-bucket Key? + +A: Support: Join that does not meet Colocate Join criteria will use Shuffle Join or Broadcast Join + +Q: How do you determine that Join is executed according to Colocate Join? + +A: The child node of Hash Join in the result of explain is Colocate Join if it is OlapScanNode directly without Exchange Node. + +Q: How to modify the colocate_with attribute? + +A: ALTER TABLE example_db.my_table set ("colocate_with"="target_table"); + +Q: 229144; colcoate join? + +A: set disable_colocate_join = true; 就可以禁用Colocate Join,查询时就会使用Shuffle Join 和Broadcast Join + +## keyword + +COLOCATE, JOIN, CREATE TABLE diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/DROP DATABASE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/DROP DATABASE_EN.md new file mode 100644 index 00000000000000..70c91025de9b3f --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/DROP DATABASE_EN.md @@ -0,0 +1,16 @@ +# DROP DATABASE +Description +This statement is used to delete the database +Grammar: +DROP DATABASE [IF EXISTS] db_name; + +Explain: +After executing DROP DATABASE for a period of time, the deleted database can be restored through the RECOVER statement. See RECOVER statement for details + +'35;'35; example +1. Delete database db_test +DROP DATABASE db_test; + +## keyword +DROP,DATABASE + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/DROP REPOSITORY_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/DROP REPOSITORY_EN.md new file mode 100644 index 00000000000000..6a40cefefe757f --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/DROP REPOSITORY_EN.md @@ -0,0 +1,15 @@ +# DROP REPOSITORY +Description +This statement is used to delete a created warehouse. Only root or superuser users can delete the warehouse. +Grammar: +DROP REPOSITORY `repo_name`; + +Explain: +1. Delete the warehouse, just delete the mapping of the warehouse in Palo, and do not delete the actual warehouse data. After deletion, you can map to the repository again by specifying the same broker and LOCATION. + +'35;'35; example +1. Delete the warehouse named bos_repo: +DROP REPOSITORY `bos_repo`; + +## keyword +DROP REPOSITORY diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/DROP TABLE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/DROP TABLE_EN.md new file mode 100644 index 00000000000000..00b9e2261bb755 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/DROP TABLE_EN.md @@ -0,0 +1,19 @@ +# DROP TABLE +Description +This statement is used to delete the table. +Grammar: +DROP TABLE [IF EXISTS] [db_name.]table_name; + +Explain: +After executing DROP TABLE for a period of time, the deleted table can be restored through the RECOVER statement. See RECOVER statement for details + +'35;'35; example +1. Delete a table +DROP TABLE my_table; + +2. If it exists, delete the table that specifies the database +DROP TABLE IF EXISTS example_db.my_table; + +## keyword +DROP,TABLE + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/DROP VIEW_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/DROP VIEW_EN.md new file mode 100644 index 00000000000000..697c9610ab9f1d --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/DROP VIEW_EN.md @@ -0,0 +1,14 @@ +# DROP VIEW +Description +This statement is used to delete a logical view VIEW +Grammar: +DROP VIEW [IF EXISTS] +[db_name.]view_name; + +'35;'35; example +1. If it exists, delete view example_view on example_db +DROP VIEW IF EXISTS example_db.example_view; + +## keyword +DROP,VIEW + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/HLL_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/HLL_EN.md new file mode 100644 index 00000000000000..b66b040088f0c6 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/HLL_EN.md @@ -0,0 +1,79 @@ +# HLL +Description +HLL is an engineering implementation based on the HyperLogLog algorithm. It is used to store the intermediate results of the HyperLog calculation process. It can only be used as the value column type of the table. +By aggregating to reduce the amount of data continuously, in order to achieve the purpose of speeding up the query, based on which an estimated result, the error is about 1%. +The HLL column is generated by other columns or data in the imported data. When imported, the hll_hash function is used to specify which column in the data is used to generate the HLL column. +It is often used to replace count distinct, and to quickly calculate UV in business by combining rollup. + +The correlation function: + +TOTAL UNION +This function is an aggregation function, which is used to calculate the cardinality estimation of all data satisfying the conditions. This function can also be used to analyze functions. It only supports the default window and does not support the window clause. + +Coach L.u RAW AGG +This function is an aggregation function that aggregates HLL type fields and returns HLL type. + +HLL_CARDINALITY(hll) +This function is used to estimate the cardinality of a single HLL sequence + +HLL_HASH(column_name) +Generate HLL column types for insert or import, see the instructions for the use of imports + +'35;'35; example +1. First create a table with HLL columns +create table test( +dt date, +id int, +name char(10), +Province of char (10), +The char (1), +the European Union, +European Union +distributed by hash(id) buckets 32; + +2. Import data. See help curl for the way you import it. + +A. Generate HLL columns using columns in tables +curl --location-trusted -uname:password -T data http://host/api/test_db/test/_load?label=load_1\&hll=set1,id:set2,name + +B. Generate HLL columns using a column in the data +curl --location-trusted -uname:password -T data http://host/api/test_db/test/_load?label=load_1\&hll=set1,cuid:set2,os +\&columns=dt,id,name,province,sex,cuid,os + +3. There are three common ways of aggregating data: (without aggregating the base table directly, the speed may be similar to that of using NDV directly) + +A. Create a rollup that allows HLL columns to generate aggregation. +alter table test add rollup test_rollup(dt, set1); + +B. Create another table dedicated to computing uv, and insert data) + +create table test_uv( +dt date, +uv_set hll hll_union) +distributed by hash(id) buckets 32; + +insert into test_uv select dt, set1 from test; + +C. Create another table dedicated to computing uv, then insert and generate HLL columns from other non-hll columns of test through hll_hash + +create table test_uv( +dt date, +id_set hll hll_union) +distributed by hash(id) buckets 32; + +insert into test_uv select dt, hll_hash(id) from test; + +4. Query, HLL column is not allowed to query its original value directly, it can be queried by matching functions. + +a. 27714; 24635; uv +select HLL_UNION_AGG(uv_set) from test_uv; + +B. Seek every day's UV +select dt, HLL_CARDINALITY(uv_set) from test_uv; + +C. Find the aggregate value of Set1 in the test table +select dt, HLL_CARDINALITY(uv) from (select dt, HLL_RAW_AGG(set1) as uv from test group by dt) tmp; +select dt, HLL_UNION_AGG(set1) as uv from test group by dt; + +## keyword +HLL diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/RECOVER_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/RECOVER_EN.md new file mode 100644 index 00000000000000..a8f8eaa9696722 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/RECOVER_EN.md @@ -0,0 +1,28 @@ +# RECOVER +Description +This statement is used to restore previously deleted databases, tables, or partitions +Grammar: +1)24674;"22797database; +RECOVER DATABASE db_name; +2) 恢复 table +RECOVER TABLE [db_name.]table_name; +3)24674;"22797partition +RECOVER PARTITION partition name FROM [dbu name.]table name; + +Explain: +1. This operation can only recover the meta-information deleted in the previous period of time. The default is 3600 seconds. +2. If new meta-information of the same name and type is created after deleting meta-information, the previously deleted meta-information cannot be restored. + +'35;'35; example +1. Restore the database named example_db +RECOVER DATABASE example_db; + +2. Restore table named example_tbl +RECOVER TABLE example_db.example_tbl; + +3. Restore partition named P1 in example_tbl +RECOVER PARTITION p1 FROM example_tbl; + +## keyword +RECOVER + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/RESTORE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/RESTORE_EN.md new file mode 100644 index 00000000000000..f3cc3a551e85e6 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/RESTORE_EN.md @@ -0,0 +1,52 @@ +"35; RESTORE +Description +1. RESTOR +This statement is used to restore the data previously backed up by the BACKUP command to the specified database. This command is an asynchronous operation. After successful submission, you need to check progress through the SHOW RESTORE command. Restoring tables of OLAP type is supported only. +Grammar: +SNAPSHOT RESTORE [dbu name].{snapshot name} +FROM `repository_name` +ON ( +"`Table `uname'[`partition (`p1',...)] [as `tbl `uu alias'], +... +) +PROPERTIES ("key"="value", ...); + +Explain: +1. Only one BACKUP or RESTORE task can be performed under the same database. +2. The ON clause identifies the tables and partitions that need to be restored. If no partition is specified, all partitions of the table are restored by default. The specified tables and partitions must already exist in the warehouse backup. +3. The backup tables in the warehouse can be restored to new tables through AS statements. But the new table name cannot already exist in the database. Partition name cannot be changed. +4. The backup tables in the warehouse can be restored and replaced with the same-name tables in the database, but the table structure of the two tables must be completely consistent. Table structure includes: table name, column, partition, Rollup and so on. +5. Partitions of the recovery table can be specified, and the system checks whether the partition Range matches. +6. PROPERTIES currently supports the following attributes: +"Backup_timestamp" = "2018-05-04-16-45-08": specifies which version of the time to restore the corresponding backup must be filled in. This information can be obtained through the `SHOW SNAPSHOT ON repo;'statement. +"Replication_num" = "3": Specifies the number of replicas of the restored table or partition. The default is 3. If an existing table or partition is restored, the number of copies must be the same as the number of copies of an existing table or partition. At the same time, there must be enough hosts to accommodate multiple copies. +"Timeout" = "3600": Task timeout, default to one day. Unit seconds. +"Meta_version" = 40: Use the specified meta_version to read the previously backed up metadata. Note that as a temporary solution, this parameter is only used to restore the data backed up by the older version of Doris. The latest version of the backup data already contains metaversion, no need to specify. + +'35;'35; example +1. Restore backup table backup_tbl in snapshot_1 from example_repo to database example_db1 with the time version of "2018-05-04-16-45-08". Restore to one copy: +RESTORE SNAPSHOT example_db1.`snapshot_1` +FROM `example 'u repo' +ON ( `backup_tbl` ) +PROPERTIES +( +"backup_timestamp"="2018-05-04-16-45-08", +"Replication\ num" = "1" +); + +2. Restore the partitions p1, P2 of table backup_tbl in snapshot_2 and table backup_tbl2 to database example_db1 from example_repo and rename it new_tbl. The time version is "2018-05-04-17-11-01". By default, three copies are restored: +RESTORE SNAPSHOT example_db1.`snapshot_2` +FROM `example 'u repo' +ON +( +`backup_tbl` PARTITION (`p1`, `p2`), +`backup_tbl2` AS `new_tbl` +) +PROPERTIES +( +"backup_timestamp"="2018-05-04-17-11-01" +); + +## keyword +RESTORE + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE_EN.md new file mode 100644 index 00000000000000..2d5057b29e7221 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE_EN.md @@ -0,0 +1,26 @@ +'35; TRUNCATE TABLES +Description +This statement is used to empty the data of the specified table and partition +Grammar: + +TRUNCATE TABLE [db.]tbl[ PARTITION(p1, p2, ...)]; + +Explain: +1. The statement empties the data, but retains the table or partition. +2. Unlike DELETE, this statement can only empty the specified tables or partitions as a whole, without adding filtering conditions. +3. Unlike DELETE, using this method to clear data will not affect query performance. +4. The data deleted by this operation is not recoverable. +5. When using this command, the table state should be NORMAL, i.e. SCHEMA CHANGE operations are not allowed. + +'35;'35; example + +1. Clear the table TBL under example_db + +TRUNCATE TABLE example_db.tbl; + +2. P1 and P2 partitions of clearing TABLE tbl + +TRUNCATE TABLE tbl PARTITION(p1, p2); + +## keyword +TRUNCATE,TABLE diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/create-function_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/create-function_EN.md new file mode 100644 index 00000000000000..8bca1e552c19e4 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/create-function_EN.md @@ -0,0 +1,71 @@ +# CREATE FUNCTION +Description +'35;'35;' 35; Syntax + +``` +CREATE [AGGREGATE] FUNCTION function_name +(angry type [...]) +RETURNS ret_type +[INTERMEDIATE inter_type] +[PROPERTIES ("key" = "value" [, ...]) ] +``` + +### Parameters + +>` AGGREGATE `: If this is the case, it means that the created function is an aggregate function, otherwise it is a scalar function. +> +>` Function_name': To create the name of the function, you can include the name of the database. For example: `db1.my_func'. +> +>` arg_type': The parameter type of the function is the same as the type defined at the time of table building. Variable-length parameters can be represented by `,...'. If it is a variable-length type, the type of the variable-length part of the parameters is the same as the last non-variable-length parameter type. +> +>` ret_type': Function return type. +> +>` Inter_type': A data type used to represent the intermediate stage of an aggregate function. +> +>` properties `: Used to set properties related to this function. Properties that can be set include +> +> "Object_file": Custom function dynamic library URL path, currently only supports HTTP/HTTPS protocol, this path needs to remain valid throughout the life cycle of the function. This option is mandatory +> +> "symbol": Function signature of scalar functions for finding function entries from dynamic libraries. This option is mandatory for scalar functions +> +> "init_fn": Initialization function signature of aggregate function. Necessary for aggregation functions +> +> "update_fn": Update function signature of aggregate function. Necessary for aggregation functions +> +> "merge_fn": Merge function signature of aggregate function. Necessary for aggregation functions +> +> "serialize_fn": Serialized function signature of aggregate function. For aggregation functions, it is optional, and if not specified, the default serialization function will be used +> +> "finalize_fn": A function signature that aggregates functions to obtain the final result. For aggregation functions, it is optional. If not specified, the default fetch result function will be used. +> +> "md5": The MD5 value of the function dynamic link library, which is used to verify that the downloaded content is correct. This option is optional + + +This statement creates a custom function. Executing this command requires that the user have `ADMIN'privileges. + +If the `function_name'contains the database name, the custom function will be created in the corresponding database, otherwise the function will be created in the database where the current session is located. The name and parameters of the new function cannot be the same as functions already existing in the current namespace, otherwise the creation will fail. But only with the same name and different parameters can the creation be successful. + +'35;'35; example + +1. Create a custom scalar function + +``` +CREATE FUNCTION my_add(INT, INT) RETURNS INT PROPERTIES ( +"Symbol"=""\\\\\\\\ zn9doris\\\ udf6addudfepns\\ FunctionContexterkns\\ INTVales 4\, +"object file" ="http://host:port /libmyadd.so" +); +``` + +2. Create a custom aggregation function + +``` +CREATE AGGREGATE FUNCTION my_count (BIGINT) RETURNS BIGINT PROPERTIES ( +"init u fn"= "ZN9doris, udf9CountInitEPNS -u 15FunctionContextEPNS, u 9BigIntValE", +"Update fn" = " zn9doris \ udf11Countupdateepns \ \ FunctionContexterkns \ Intvalepns bigintvale", +"Merge fn"="\ zn9doris\\ udf10CountMergeepns\ \ FunctionContexterkns\ Bigintvaleps2\\\\\\\\\\\\\ +"Finalize \ fn" = "\ zn9doris \ udf13Count Finalizepns \\ FunctionContexterkns \ Bigintvale", +"object" file ="http://host:port /libudasample.so" +); +``` +##keyword +CREATE,FUNCTION diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/drop-function_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/drop-function_EN.md new file mode 100644 index 00000000000000..f4b31dea75c1e9 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/drop-function_EN.md @@ -0,0 +1,28 @@ +# DROP FUNCTION +Description +'35;'35;' 35; Syntax + +``` +DROP FUNCTION function_name +(angry type [...]) +``` + +### Parameters + +>` function_name': To delete the name of the function +> +>` arg_type`: To delete the parameter list of the function +> + + +Delete a custom function. The name of the function and the type of the parameter are exactly the same before they can be deleted. + +'35;'35; example + +1. Delete a function + +``` +DROP FUNCTION my_add(INT, INT) +``` +##keyword +DROP,FUNCTION diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Definition/show-function_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Definition/show-function_EN.md new file mode 100644 index 00000000000000..0f09d88dc09b80 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Definition/show-function_EN.md @@ -0,0 +1,37 @@ +# SHOW FUNCTION +Description +'35;'35;' 35; Syntax + +``` +SHOW FUNCTION [FROM db] +``` + +### Parameters + +>` DB `: The name of the database to query + + +Look at all the custom functions under the database. If the user specifies the database, then look at the corresponding database, otherwise directly query the database where the current session is located. + +You need `SHOW'privileges for this database + +'35;'35; example + +``` +mysql> show function in testDb\G +*********************************1. row ************************ +Signature: my_count(BIGINT) +Return Type: BIGINT +Function Type: Aggregate +Intermediate Type: NULL +Properties: {"object_file":"http://host:port/libudasample.so","finalize_fn":"_ZN9doris_udf13CountFinalizeEPNS_15FunctionContextERKNS_9BigIntValE","init_fn":"_ZN9doris_udf9CountInitEPNS_15FunctionContextEPNS_9BigIntValE","merge_fn":"_ZN9doris_udf10CountMergeEPNS_15FunctionContextERKNS_9BigIntValEPS2_","md5":"37d185f80f95569e2676da3d5b5b9d2f","update_fn":"_ZN9doris_udf11CountUpdateEPNS_15FunctionContextERKNS_6IntValEPNS_9BigIntValE"} +*********************************2. row ************************ +Signature: my_add(INT,INT) +Return Type: INT +Function Type: Scalar +Intermediate Type: NULL +Properties: {"symbol":"_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_","object_file":"http://host:port/libudfsample.so","md5":"cfe7a362d10f3aaf6c49974ee0f1f878"} +2 rows in set (0.00 sec) +``` +##keyword +SHOW,FUNCTION diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/CANCEL DELETE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/CANCEL DELETE_EN.md new file mode 100644 index 00000000000000..cda3aead06136e --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/CANCEL DELETE_EN.md @@ -0,0 +1,10 @@ +# CANCEL DELETE +Description + +This statement is used to undo a DELETE operation. (Administrator only!) (To be realized) + +'35;'35; example + +## keyword +CANCEL,DELETE + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/CANCEL LABEL_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/CANCEL LABEL_EN.md new file mode 100644 index 00000000000000..dbf1e400fabd46 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/CANCEL LABEL_EN.md @@ -0,0 +1,27 @@ +35; Cancel Label +Description +NAME: +cancel_label: cancel a transaction with label + +SYNOPSIS +curl -u user:passwd -XPOST http://host:port/api/{db}/{label}/_cancel + +DESCRIPTION +This command is used to cancel a transaction corresponding to a specified Label, which can be successfully cancelled during the Prepare phase. + +RETURN VALUES +When the execution is complete, the relevant content of this import will be returned in Json format. Currently includes the following fields +Status: Successful cancel +Success: 成功cancel事务 +20854; 2018282: 22833; 361333; +Message: Specific Failure Information + +ERRORS + +'35;'35; example + +1. cancel testDb, testLabel20316;- 19994; +curl -u root -XPOST http://host:port/api/testDb/testLabel/_cancel + +## keyword +Cancel, Rabel diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD_EN.md new file mode 100644 index 00000000000000..d485d46fe8949d --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD_EN.md @@ -0,0 +1,19 @@ +# CANCEL LOAD +Description + +This statement is used to undo the import job for the batch of the specified load label. +This is an asynchronous operation, which returns if the task is submitted successfully. After execution, you can use the SHOW LOAD command to view progress. +Grammar: +CANCEL LOAD +[FROM both names] +WHERE LABEL = "load_label"; + +'35;'35; example + +1. Revoke the import job of example_db_test_load_label on the database example_db +CANCEL LOAD +FROM example_db +WHERE LABEL = "example_db_test_load_label"; + +## keyword +CANCEL,LOAD diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/DELETE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/DELETE_EN.md new file mode 100644 index 00000000000000..af18e822f50f5c --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/DELETE_EN.md @@ -0,0 +1,36 @@ +# DELETE +Description + +This statement is used to conditionally delete data in the specified table (base index) partition. +This action deletes the rollup index data associated with this base index at the same time. +Grammar: +PART FROM table name [PARTITION partition name] +WHERE +column_name1 op value[ AND column_name2 op value ...]; + +Explain: +1) Optional types of OP include: =,>,<,>=,<=,<=,<=,!= +2) Conditions on key columns can only be specified. +2) When the selected key column does not exist in a rollup, delete cannot be performed. +3) The relationship between conditions can only be "and". +If you want to achieve the "or" relationship, you need to divide the conditions into two DELETE statements. +4) If you partition a table for RANGE, you must specify PARTITION. If it is a single partition table, you can not specify it. + +Be careful: +This statement may reduce query efficiency for a period of time after execution. +The degree of impact depends on the number of deletion conditions specified in the statement. +The more conditions specified, the greater the impact. + +'35;'35; example + +1. Delete rows whose K1 column value is 3 in my_table partition p 1 +DELETE FROM my_table PARTITION p1 +WHERE k1 = 3; + +2. Delete rows whose K1 column value is greater than or equal to 3 and whose K2 column value is "abc" in my_table partition P1 +DELETE FROM my_table PARTITION p1 +WHERE k1 >= 3 AND k2 = "abc"; + +## keyword +DELETE + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/EXPORT_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/EXPORT_EN.md new file mode 100644 index 00000000000000..8b94ca1a21556a --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/EXPORT_EN.md @@ -0,0 +1,57 @@ +# EXPORT +Description + +This statement is used to export data from a specified table to a specified location. +This function is implemented by broker process. For different purpose storage systems, different brokers need to be deployed. Deployed brokers can be viewed through SHOW BROKER. +This is an asynchronous operation, which returns if the task is submitted successfully. After execution, you can use the SHOW EXPORT command to view progress. + +Grammar: +EXPORT TABLE table_name +[PARTITION (p1 [,p2]] +TO export_path +[opt_properties] +broker; + +1. table_name +The table names to be exported currently support the export of tables with engine as OLAP and mysql. + +2. partition +You can export only certain specified partitions of the specified table + +3. export_path +The exported path needs to be a directory. At present, it can't be exported to local, so it needs to be exported to broker. + +4. opt_properties +Used to specify some special parameters. +Grammar: +[PROPERTIES ("key"="value", ...)] + +The following parameters can be specified: +Column_separator: Specifies the exported column separator, defaulting to t. +Line_delimiter: Specifies the exported line separator, defaulting to\n. +Exc_mem_limit: Exports the upper limit of memory usage for a single BE node, defaulting to 2GB in bytes. +Timeout: The time-out for importing jobs is 1 day by default, in seconds. +Tablet_num_per_task: The maximum number of tablets that each subtask can allocate. + +Five. debris +Broker used to specify export usage +Grammar: +WITH BROKER broker_name ("key"="value"[,...]) +Here you need to specify the specific broker name and the required broker attributes + +For brokers corresponding to different storage systems, the input parameters are different. Specific parameters can be referred to: `help broker load', broker required properties. + +'35;'35; example + +1. Export all data from the testTbl table to HDFS +EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); + +2. Export partitions P1 and P2 from the testTbl table to HDFS + +EXPORT TABLE testTbl PARTITION (p1,p2) TO "hdfs://hdfs_host:port/a/b/c" WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); +3. Export all data in the testTbl table to hdfs, using "," as column separator + +EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("column_separator"=",") WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); + +## keyword +EXPORT diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/GET LABEL STATE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/GET LABEL STATE_EN.md new file mode 100644 index 00000000000000..964ab3e8834e90 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/GET LABEL STATE_EN.md @@ -0,0 +1,32 @@ +# GET LABEL STATE +Description +NAME: +get_label_state: get label's state + +SYNOPSIS +curl -u user:passwd http://host:port /api /{db}/{label}// u state + +DESCRIPTION +This command is used to view the transaction status of a Label + +RETURN VALUES +After execution, the relevant content of this import will be returned in Json format. Currently includes the following fields +Label: The imported label, if not specified, is a uuid. +Status: Whether this command was successfully executed or not, Success indicates successful execution +Message: Specific execution information +State: It only makes sense if Status is Success +UNKNOWN: No corresponding Label was found +PREPARE: The corresponding transaction has been prepared, but not yet committed +COMMITTED: The transaction has been committed and cannot be canceled +VISIBLE: Transaction submission, and data visible, cannot be canceled +ABORTED: The transaction has been ROLLBACK and the import has failed. + +ERRORS + +'35;'35; example + +1. Obtain the state of testDb, testLabel +curl -u root http://host:port /api /testDb /testLabel / u state + +## keyword +GET, LABEL, STATE diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/LOAD_EN.md new file mode 100644 index 00000000000000..3ca26557710248 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/LOAD_EN.md @@ -0,0 +1,284 @@ +# LOAD +Description + +Palo currently supports the following four import methods: + +1. Hadoop Load: Importing ETL based on MR. +2. Broker Load: Use broker to import data. +3. Mini Load: Upload files through HTTP protocol for batch data import. +4. Stream Load: Stream data import through HTTP protocol. + +This help mainly describes the first import method, namely Hadoop Load related help information. The rest of the import methods can use the following commands to view help: + +This import method may not be supported in a subsequent version. It is recommended that other import methods be used for data import. !!! + +1. help broker load; +2. help mini load; +3. help stream load; + +Hadoop Load is only applicable to Baidu's internal environment. Public, private and open source environments cannot use this import approach. +The import method must set up a Hadoop computing queue for ETL, which can be viewed through the help set property command. + +Stream load only supports Baidu internal users for the time being. Open source communities and public cloud users will be supported in subsequent version updates. + +Grammar: + +LOAD LABEL load_label +( +Date of date of date of entry +) +[opt_properties]; + +1. load label + +The label of the current imported batch. Unique in a database. +Grammar: +[database_name.]your_label + +2. data_desc + +Used to describe a batch of imported data. +Grammar: +DATA INFILE +( +"file_path1"[, file_path2, ...] +) +[NEGATIVE] +INTO TABLE `table_name` +[PARTITION (p1, P2)] +[COLUMNS TERMINATED BY "column_separator"] +[FORMAT AS "file_type"] +[(column_list)] +[set (k1 = fun (k2)]] + +Explain: +file_path: + +File paths can be specified to a file, or * wildcards can be used to specify all files in a directory. Wildcards must match to files, not directories. + +PARTICIPATION: + +If this parameter is specified, only the specified partition will be imported, and data outside the imported partition will be filtered out. +If not specified, all partitions of the table are imported by default. + +NEGATIVE: +If this parameter is specified, it is equivalent to importing a batch of "negative" data. Used to offset the same batch of data imported before. +This parameter applies only to the case where there are value columns and the aggregation type of value columns is SUM only. + +Column U separator: + +Used to specify the column separator in the import file. Default tot +If the character is invisible, it needs to be prefixed with \x, using hexadecimal to represent the separator. +For example, the separator X01 of the hive file is specified as "\ x01" + +File type: + +Used to specify the type of imported file, such as parquet, csv. The default value is determined by the file suffix name. + +column_list: + +Used to specify the correspondence between columns in the import file and columns in the table. +When you need to skip a column in the import file, specify it as a column name that does not exist in the table. +Grammar: +(col_name1, col_name2, ...) + +SET: + +If this parameter is specified, a column of the source file can be transformed according to a function, and then the transformed result can be imported into the table. +The functions currently supported are: + +Strftime (fmt, column) date conversion function +Fmt: Date format, such as% Y% m% d% H% M% S (year, month, day, hour, second) +Column: Column in column_list, which is the column in the input file. Storage content should be a digital timestamp. +If there is no column_list, the columns of the input file are entered by default in the column order of the Palo table. + +time_format(output_fmt, input_fmt, column) 日期格式转化 +Output_fmt: Converted date format, such as% Y% m% d% H% M% S (year, month, day, hour, second) +Input_fmt: The date format of the column before transformation, such as% Y% m% d% H% M% S (days, hours, seconds, months, years) +Column: Column in column_list, which is the column in the input file. Storage content should be a date string in input_fmt format. +If there is no column_list, the columns of the input file are entered by default in the column order of the Palo table. + +alignment_timestamp(precision, column) 将时间戳对齐到指定精度 +Precision: year 124month;124day;124hour; +Column: Column in column_list, which is the column in the input file. Storage content should be a digital timestamp. +If there is no column_list, the columns of the input file are entered by default in the column order of the Palo table. +Note: When the alignment accuracy is year and month, only the time stamps in the range of 20050101-20191231 are supported. + +Default_value (value) sets the default value for a column import +Use default values of columns when creating tables without specifying + +Md5sum (column1, column2,...) evaluates the value of the specified imported column to md5sum, returning a 32-bit hexadecimal string + +Replace_value (old_value [, new_value]) replaces old_value specified in the import file with new_value +New_value, if not specified, uses the default value of the column when building the table + +Hll_hash (column) is used to transform a column in a table or data into a data structure of a HLL column + +3. opt_properties + +Used to specify some special parameters. +Grammar: +[PROPERTIES ("key"="value", ...)] + +The following parameters can be specified: +Cluster: Import the Hadoop computed queue used. +Timeout: Specifies the timeout time of the import operation. The default timeout is 3 days. Unit seconds. +Max_filter_ratio: The ratio of data that is most tolerant of being filterable (for reasons such as data irregularities). Default zero tolerance. +Load_delete_flag: Specifies whether the import deletes data by importing the key column, which applies only to UNIQUE KEY. +Value column is not specified when importing. The default is false. + +5. Import data format sample + +Integer classes (TINYINT/SMALLINT/INT/BIGINT/LARGEINT): 1,1000,1234 +Floating Point Class (FLOAT/DOUBLE/DECIMAL): 1.1, 0.23, 356 +Date class (DATE/DATETIME): 2017-10-03, 2017-06-13 12:34:03. +(Note: If it's in other date formats, you can use strftime or time_format functions to convert in the import command) +字符串类(CHAR/VARCHAR):"I am a student", "a" +NULL value: N + +'35;'35; example + +1. Import a batch of data, specify timeout time and filtering ratio. Specify the import queue as my_cluster. + +LOAD LABEL example db.label1 +( +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") +INTO TABLE `my_table` +) +PROPERTIES +( +"cluster" ="my" cluster, +Timeout ="3600", +"max_filter_ratio" = "0.1" +); + +Where hdfs_host is the host of the namenode and hdfs_port is the fs.defaultFS port (default 9000) + +2. Import a batch of data, including multiple files. Import different tables, specify separators, and specify column correspondences + +LOAD LABEL example db.label2 +( +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file1") +INTO TABLE `my_table_1` +COLUMNS TERMINATED BY "," +(k1, k3, k2, v1, v2), +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file2") +INTO TABLE `my_table_2` +COLUMNS TERMINATED BY "\t" +(k1, k2, k3, v2, v1) +); + +3. Import a batch of data, specify hive's default delimiter x01, and use wildcard * to specify all files in the directory + +LOAD LABEL example db.label3 +( +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/*") +NEGATIVE +INTO TABLE `my_table` +COLUMNS TERMINATED BY "\\x01" +); + +4. Import a batch of "negative" data + +LOAD LABEL example db.label4 +( +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/old_file) +NEGATIVE +INTO TABLE `my_table` +COLUMNS TERMINATED BY "\t" +); + +5. Import a batch of data and specify partitions + +LOAD LABEL example db.label5 +( +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") +INTO TABLE `my_table` +PARTITION (p1, P2) +COLUMNS TERMINATED BY "," +(k1, k3, k2, v1, v2) +); + +6. Import a batch of data, specify partitions, and make some transformations to the columns of the imported files, as follows: +The table structure is as follows: +K1 date +date +k3 bigint +k4 varchar (20) +k5 varchar (64) +k6 int + +Assume that the data file has only one row of data, five columns, and comma-separated: + +1537002087,2018-08-09 11:12:13,1537002087,-,1 + +The columns in the data file correspond to the columns specified in the import statement: +tmp -u k1, tmp -u k2, tmp u k3, k6, v1 + +The conversion is as follows: + +1) k1: Transform tmp_k1 timestamp column into datetime type data +2) k2: Converting tmp_k2 datetime-type data into date data +3) k3: Transform tmp_k3 timestamp column into day-level timestamp +4) k4: Specify import default value of 1 +5) k5: Calculate MD5 values from tmp_k1, tmp_k2, tmp_k3 columns +6) k6: Replace the - value in the imported file with 10 + +LOAD LABEL example db.label6 +( +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") +INTO TABLE `my_table` +PARTITION (p1, P2) +COLUMNS TERMINATED BY "," +(tmp /u k1, tmp /u k2, tmp /u k3, k6, v1) +SET ( +K1 = strftime (%Y -%m -%d%H:%M:%S ", TMP u K1), +K2 = Time = UFormat ("% Y-% M-% D% H:% M:% S", "% Y-% M-% D", "TMP = UK2), +k3 = alignment_timestamp("day", tmp_k3), +k4 = default_value("1"), +K5 = MD5Sum (TMP = UK1, TMP = UK2, TMP = UK3) +k6 = replace value ("-", "10") +) +); + +7. Import data into tables containing HLL columns, which can be columns in tables or columns in data + +LOAD LABEL example db.label7 +( +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") +INTO TABLE `my_table` +PARTITION (p1, P2) +COLUMNS TERMINATED BY "," +SET ( +v1 = hll, u hash (k1), +v2 = hll, u hash (k2) +) +); + +LOAD LABEL example db.label8 +( +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") +INTO TABLE `my_table` +PARTITION (p1, P2) +COLUMNS TERMINATED BY "," +(k1, k2, tmp u k3, tmp u k4, v1, v2) +SET ( +v1 = hll, u hash (tmp +v2 = hll, u hash (tmp +) +) +WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); + +8. Importing data into Parquet file specifies FORMAT as parquet, which is judged by file suffix by default. +LOAD LABEL example db.label9 +( +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") +INTO TABLE `my_table` +FORMAT AS "parquet" +(k1, k2, k3) +) +WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); + +## keyword +LOAD + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/MINI LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/MINI LOAD_EN.md new file mode 100644 index 00000000000000..e446a66a6a2e2d --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/MINI LOAD_EN.md @@ -0,0 +1,103 @@ +# MINI LOAD +Description + +MINI LOAD and STEAM LOAD are implemented in exactly the same way. MINI LOAD is a subset of STREAM LOAD in import support. +Subsequent imports of new features will only be supported in STEAM LOAD, MINI LOAD will no longer add features. It is suggested that STREAM LOAD be used instead. Please use HELP STREAM LOAD. + +MINI LOAD is imported through HTTP protocol. Users can import without relying on Hadoop or Mysql client. +The user describes the import through HTTP protocol, and the data is streamed into Doris in the process of receiving http requests. After the ** import job is completed, the ** returns to the user the imported results. + +* Note: In order to be compatible with the old version of mini load usage habits, users can still view the import results through the'SHOW LOAD'command. + +Grammar: +Import: + +curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table}/_load?label=xxx + +View import information + +curl -u user:passwd http://host:port/api/{db}/_load_info?label=xxx + +HTTP Protocol Specification + +Privilege Authentication Currently Doris uses the Basic mode of HTTP for privilege authentication. So you need to specify a username and password when importing +This way is to pass the password in plaintext, and does not support encrypted transmission for the time being. + +Expect Doris needs to send an HTTP request with the'Expect'header information,'100-continue'. +Why? Because we need to redirect the request, we have to transfer the data content before. +This can avoid causing multiple data transmission, thereby improving efficiency. + +Content-Length Doris needs to send a request with the header'Content-Length'. If the content ratio is sent +'Content-Length'is less, so Doris believes that if there is a transmission problem, the submission task fails. +NOTE: If you send more data than'Content-Length', Doris reads only'Content-Length'. +Length content and import + + +Description of parameters: + +User: User is user_name if the user is in default_cluster. Otherwise, it is user_name@cluster_name. + +Label: The label used to specify this batch of imports for later job queries, etc. +This parameter must be passed in. + +Columns: Used to describe the corresponding column name in the import file. +If it is not passed in, the column order in the file is considered to be the same as the order in which the table is built. +The specified method is comma-separated, such as columns = k1, k2, k3, K4 + +Column_separator: Used to specify the separator between columns, default is' t' +NOTE: Url encoding is required, for example +If you need to specify' t'as a separator, you should pass in'column_separator=% 09' +If you need to specify'x01'as a delimiter, you should pass in'column_separator=% 01' +If you need to specify','as a separator, you should pass in'column_separator=% 2c' + + +Max_filter_ratio: Used to specify the maximum percentage allowed to filter irregular data, default is 0, not allowed to filter +Custom specification should be as follows:'max_filter_ratio = 0.2', meaning that 20% error rate is allowed. + +Timeout: Specifies the timeout time of the load job in seconds. When the load execution time exceeds this threshold, it is automatically cancelled. The default timeout time is 86400 seconds. +It is recommended to specify a timeout time of less than 86400 seconds. + +Hll: Used to specify the corresponding relationship between the HLL columns in the data and the tables, the columns in the tables and the columns specified in the data. +(If columns are not specified, the columns of the data column surface can also be other non-HLL columns in the table.) By "partition" +Specify multiple HLL columns using ":" splitting, for example:'hll1, cuid: hll2, device' + +NOTE: +1. This method of importing is currently completed on a single machine, so it is not suitable to import a large amount of data. +It is recommended that the amount of data imported should not exceed 1 GB. + +2. Currently, it is not possible to submit multiple files in the form of `curl-T', `{file1, file2}', because curl splits them into multiple files. +Request sent, multiple requests can not share a label number, so it can not be used + +3. Miniload is imported in exactly the same way as streaming. It returns the results synchronously to users after the import of streaming is completed. +Although the information of mini load can be found in subsequent queries, it can not be operated on. The queries are only compatible with the old ways of use. + +4. When importing from the curl command line, you need to add escape before & or the parameter information will be lost. + +'35;'35; example + +1. Import the data from the local file'testData'into the table of'testTbl' in the database'testDb'(the user is in defalut_cluster) +curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123 + +2. Import the data from the local file'testData'into the table of'testTbl' in the database'testDb'(the user is in test_cluster). The timeout time is 3600 seconds. +curl --location-trusted -u root@test_cluster:root -T testData http://fe.host:port/api/testDb/testTbl/_load?label=123&timeout=3600 + +3. Import data from the local file'testData'into the'testTbl' table in the database'testDb', allowing a 20% error rate (the user is in defalut_cluster) +curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2 + +4. Import the data from the local file'testData'into the table'testTbl' in the database'testDb', allowing a 20% error rate, and specify the column name of the file (the user is in defalut_cluster) +curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2\&columns=k1,k2,k3 + +5. Import in streaming mode (user is in defalut_cluster) +seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_load?label=123 + +6. Import tables containing HLL columns, which can be columns in tables or columns in data to generate HLL columns (users are in defalut_cluster) + +curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2 +\&hll=hll_column1,tmp_k4:hll_column2,tmp_k5\&columns=k1,k2,k3,tmp_k4,tmp_k5 + +7. View imports after submission + +curl -u root http://host:port/api/testDb/_load_info?label=123 + +## keyword +MINI, LOAD diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/MULTI LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/MULTI LOAD_EN.md new file mode 100644 index 00000000000000..b55f0b1d5ee870 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/MULTI LOAD_EN.md @@ -0,0 +1,81 @@ +# MULTI LOAD +Description + +Syntax: +curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_start?label=xxx +curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table1}/_load?label=xxx\&sub_label=yyy +curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table2}/_load?label=xxx\&sub_label=zzz +curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_commit?label=xxx +curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_desc?label=xxx + +'MULTI LOAD'can support users to import multiple tables at the same time on the basis of'MINI LOAD'. The specific commands are shown above. +'/api/{db}/_multi_start'starts a multi-table import task +'/api/{db}/{table}/_load'adds a table to be imported to an import task. The main difference from'MINI LOAD' is that the'sub_label'parameter needs to be passed in. +'/api/{db}/_multi_commit'submits the entire multi-table import task and the background begins processing +'/api/{db}/_multi_abort'Abandons a multi-table import task +'/api/{db}/_multi_desc'shows the number of jobs submitted by a multi-table import task + +HTTP Protocol Specification +Privilege Authentication Currently Doris uses the Basic mode of HTTP for privilege authentication. So you need to specify a username and password when importing +This way is to pass passwords in plaintext, since we are all in the Intranet environment at present... + +Expect Doris needs to send an HTTP request, and needs the'Expect'header information with the content of'100-continue'. +Why? Because we need to redirect the request, we have to transfer the data content before. +This can avoid causing multiple data transmission, thereby improving efficiency. + +Content-Length Doris needs to send a request with the header'Content-Length'. If the content ratio is sent +If'Content-Length'is less, Palo believes that if there is a transmission problem, the submission of the task fails. +NOTE: If you send more data than'Content-Length', Doris reads only'Content-Length'. +Length content and import + +Description of parameters: +User: User is user_name if the user is in default_cluster. Otherwise, it is user_name@cluster_name. + +Label: Used to specify the label number imported in this batch for later job status queries, etc. +This parameter must be passed in. + +Sub_label: Used to specify a subversion number within a multi-table import task. For multi-table imported loads, this parameter must be passed in. + +Columns: Used to describe the corresponding column name in the import file. +If it is not passed in, the column order in the file is considered to be the same as the order in which the table is built. +The specified method is comma-separated, such as columns = k1, k2, k3, K4 + +Column_separator: Used to specify the separator between columns, default is' t' +NOTE: Url encoding is required, such as specifying't'as a delimiter. +Then you should pass in'column_separator=% 09' + +Max_filter_ratio: Used to specify the maximum percentage allowed to filter irregular data, default is 0, not allowed to filter +Custom specification should be as follows:'max_filter_ratio = 0.2', meaning that 20% error rate is allowed. +Pass in effect at'_multi_start' + +NOTE: +1. This method of importing is currently completed on a single machine, so it is not suitable to import a large amount of data. +It is recommended that the amount of data imported should not exceed 1GB + +2. Currently, it is not possible to submit multiple files in the form of `curl-T', `{file1, file2}', because curl splits them into multiple files. +Request sent, multiple requests can not share a label number, so it can not be used + +3. Supports streaming-like ways to use curl to import data into Doris, but Doris will have to wait until the streaming is over +Real import behavior will occur, and the amount of data in this way can not be too large. + +'35;'35; example + +1. Import the data from the local file'testData1'into the table of'testTbl1' in the database'testDb', and +Import the data from'testData2'into the table'testTbl2' in'testDb'(the user is in defalut_cluster) +curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123 +curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1 +curl --location-trusted -u root -T testData2 http://host:port/api/testDb/testTbl2/_load?label=123\&sub_label=2 +curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_commit?label=123 + +2. Multi-table Import Midway Abandon (User in defalut_cluster) +curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123 +curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1 +curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_abort?label=123 + +3. Multi-table import to see how much content has been submitted (user is in defalut_cluster) +curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123 +curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1 +curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_desc?label=123 + +## keyword +MULTI, MINI, LOAD diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD_EN.md new file mode 100644 index 00000000000000..da2d9b86034161 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD_EN.md @@ -0,0 +1,9 @@ +# PAUSE ROUTINE LOAD +'35;'35; example + +1. Suspend the routine import operation named test 1. + +PAUSE ROUTINE LOAD FOR test1; + +## keyword +PAUSE,ROUTINE,LOAD diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/RESTORE TABLET_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/RESTORE TABLET_EN.md new file mode 100644 index 00000000000000..f0c9f0b25832f4 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/RESTORE TABLET_EN.md @@ -0,0 +1,15 @@ +# RESTORE TABLET +Description + +This function is used to recover the tablet data that was deleted by mistake in the trash directory. + +Note: For the time being, this function only provides an HTTP interface in be service. If it is to be used, +A restore tablet API request needs to be sent to the HTTP port of the be machine for data recovery. The API format is as follows: +Method: Postal +URI: http://be_host:be_http_port/api/restore_tablet?tablet_id=xxx&schema_hash=xxx + +'35;'35; example + +Curl -X POST "http://hostname:8088 /api /restore" tablet? Tablet id =123456 &schema hash =1111111 " +##keyword +RESTORE,TABLET,RESTORE,TABLET diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD_EN.md new file mode 100644 index 00000000000000..924297f1402cd4 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD_EN.md @@ -0,0 +1,9 @@ +# RESUME ROUTINE LOAD +'35;'35; example + +1. Restore the routine import job named test 1. + +RESUME ROUTINE LOAD FOR test1; + +## keyword +RESUME,ROUTINE,LOAD diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD_EN.md new file mode 100644 index 00000000000000..dd677d0e9664ab --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD_EN.md @@ -0,0 +1,248 @@ +# ROUTINE LOAD +Description + +Routine Load enables users to submit a permanent import task and import data into Doris by constantly reading data from a specified data source. +Currently only support importing text format (CSV) data from Kakfa through unauthenticated or SSL authentication. + +Grammar: + +CREATE ROUTINE LOAD [db.]job_name ON tbl_name +[load_properties] +[job_properties] +FROM data_source +[data source properties] + +1. [db.]job_name + +The name of the import job, in the same database, can only have one job running with the same name. + +2. tbl name + +Specifies the name of the table to be imported. + +3. load_properties + +Used to describe imported data. Grammar: + +[Swing separator], +[columns_mapping], +[where_predicates], +[partitions] + +One Column U separator: + +Specify column separators, such as: + +COLUMNS TERMINATED BY "," + +Default: t + +2. columns_mapping: + +Specifies the mapping relationship of columns in source data and defines how derivative columns are generated. + +1. Mapping column: + +Specify in sequence which columns in the source data correspond to those in the destination table. For columns you want to skip, you can specify a column name that does not exist. +Assume that the destination table has three columns k1, k2, v1. Source data has four columns, of which columns 1, 2 and 4 correspond to k2, K1 and v1, respectively. Written as follows: + +COLUMNS (k2, k1, xxx, v1) + +XXX is a non-existent column used to skip the third column in the source data. + +2. Derivative column: + +Columns in the form of col_name = expr are called derived columns. That is to say, it supports calculating the values of the corresponding columns in the destination table by expr. +Derivative columns are usually arranged after the mapping column. Although this is not mandatory, Doris always parses the mapping column first and then the derived column. +Following an example, suppose that the destination table also has the fourth column v2, which is generated by the sum of K1 and k2. It can be written as follows: + +COLUMNS (k2, k1, xxx, v1, v2 = k1 + k2); + +3. where_predicates + +Used to specify filtering conditions to filter out unnecessary columns. The filter column can be a mapping column or a derived column. +For example, if we only want to import columns with K1 greater than 100 and K2 equal to 1000, we write as follows: + +WHERE k1 > 100 and k2 = 1000 + +Four Division + +Specify which partitions to import into the destination table. If not specified, it is automatically imported into the corresponding partition. +Examples: + +Segmentation (P1, P2, P3) + +4. job_properties + +General parameters used to specify routine import jobs. +Grammar: + +PROPERTIES ( +"key1" = "val1", +"key2" = "val2" +) + +At present, we support the following parameters: + +1. desired_concurrent_number + +Expected concurrency. A routine import job is divided into multiple subtasks. This parameter specifies how many tasks a job can perform simultaneously. Must be greater than 0. The default is 3. +This concurrency degree is not the actual concurrency degree. The actual concurrency degree will be considered comprehensively by the node number, load and data source of the cluster. +Example: + +"desired_concurrent_number" = "3" + +2. max_batch_interval/max_batch_rows/max_batch_size + +These three parameters are respectively expressed as follows: +1) Maximum execution time per sub-task in seconds. The range is 5 to 60. The default is 10. +2) The maximum number of rows read by each subtask. Must be greater than or equal to 200000. The default is 2000. +3) The maximum number of bytes read by each subtask. Units are bytes, ranging from 100MB to 1GB. The default is 100MB. + +These three parameters are used to control the execution time and processing capacity of a subtask. When any one reaches the threshold, the task ends. +Example: + +"max_batch_interval" = "20", +"max_batch_rows" = "300000", +"max_batch_size" = "209715200" + +Three The biggest mistake + +The maximum number of error lines allowed in the sampling window. Must be greater than or equal to 0. The default is 0, that is, no error lines are allowed. +The sampling window is max_batch_rows* 10. That is, if the number of error lines is greater than max_error_number in the sampling window, routine jobs will be suspended, and manual intervention is needed to check data quality. +Rows filtered by where conditions are not incorrect rows. + +Five. data source + +Types of data sources. Current support: + +KAFKA + +Six. data source properties + +Specify information about the data source. +Grammar: + +( +"key1" = "val1", +"key2" = "val2" +) + +One. KAFKA -25968;` 25454;`28304; + +One. Kafka -u broker list + +Kafka's broker connection information. The format is ip: host. Multiple brokers are separated by commas. +Examples: + +"broker list"= "broker1:9092,broker2:9092" + +Two Kafkato Pitch + +Specify the topic of Kafka to subscribe to. +Examples: + +"coffee topic" ="my topic" + +Three Kafka score/Kafka offset printing + +Specify the Kafka partition to be subscribed to and the corresponding initial offset for each partition. + +Offset can specify a specific offset from greater than or equal to 0, or: +1) OFFSET_BEGINNING: Subscribe from a location with data. +2) OFFSET_END: Subscribe from the end. + +If not specified, all partitions under topic are subscribed by default from OFFSET_END. +Examples: + +"kafka partitions" ="0,1,2,3", +"Kafka\ xED"= "101.0, ofset\ xEnd" + +4. property + +Specify custom Kafka parameters. +The function is equivalent to the "- property" parameter in the Kafka shell. +When the value of a parameter is a file, you need to add the keyword "FILE:" before the value. +For how to create a file, see "HELP CREATE FILE;" +For more support for custom parameters, see the client-side configuration item in the official CONFIGURATION document of librdkafka. + +Examples: +"property.client.id" = "12345", +"property.ssl.ca.location" ="FILE:ca.pem" + +When connecting Kafka with SSL, you need to specify the following parameters: + +"property.security.protocol" = "ssl", +"property.ssl.ca.location" ="FILE:ca.pem", +"property.ssl.certificate.location" ="FILE:client.pem", +"property.ssl.key.location" ="FILE:client.key", +"property.ssl.key.password" = "abcdefg" + +Among them: +Property. security. protocol and property. ssl. ca. location are required to specify the connection mode is SSL and the location of CA certificates. + +If the client authentication is enabled on the Kafka server side, the following settings are required: + +"property.ssl.certificate.location" +"property.ssl.key.location" +"property.ssl.key.password" + +The passwords used to specify the public key, private key and private key of the client, respectively. + + +7. Import data format sample + +Integer classes (TINYINT/SMALLINT/INT/BIGINT/LARGEINT): 1,1000,1234 +Floating Point Class (FLOAT/DOUBLE/DECIMAL): 1.1, 0.23, 356 +Date class (DATE/DATETIME): 2017-10-03, 2017-06-13 12:34:03. +String class (CHAR/VARCHAR) (without quotation marks): I am a student, a +NULL value: N + +'35;'35; example + +1. Create a Kafka routine import task named test 1 for example_tbl of example_db. + +CREATE ROUTINE LOAD example_db.test1 ON example_tbl +COLUMNS (k1, k2, k3, v1, v2, v3 = k1 *100), +WHERE k1 > 100 and k2 like "%doris%" +PROPERTIES +( +"desired_concurrent_number"="3", +"max_batch_interval" = "20", +"max_batch_rows" = "300000", +"max_batch_size" = "209715200" +) +FROM KAFKA +( +"broker list"= "broker1:9092,broker2:9092,broker3:9092", +"kafu topic" ="my topic", +"kafka partitions" ="0,1,2,3", +"kafka_offsets" = "101,0,0,200" +); + +2. Import data from Kafka cluster through SSL authentication. Set the client. ID parameter at the same time. + +CREATE ROUTINE LOAD example_db.test1 ON example_tbl +COLUMNS (k1, k2, k3, v1, v2, v3 = k1 *100), +WHERE k1 > 100 and k2 like "%doris%" +PROPERTIES +( +"desired_concurrent_number"="3", +"max_batch_interval" = "20", +"max_batch_rows" = "300000", +"max_batch_size" = "209715200" +) +FROM KAFKA +( +"broker list"= "broker1:9092,broker2:9092,broker3:9092", +"kafu topic" ="my topic", +"property.security.protocol" = "ssl", +"property.ssl.ca.location" ="FILE:ca.pem", +"property.ssl.certificate.location" ="FILE:client.pem", +"property.ssl.key.location" ="FILE:client.key", +"property.ssl.key.password" = "abcdefg", +"property.client.id" = "my_client_id" +); + +## keyword +CREATE,ROUTINE,LOAD diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW ALTER_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW ALTER_EN.md new file mode 100644 index 00000000000000..df1894fffd31f4 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW ALTER_EN.md @@ -0,0 +1,25 @@ +# SHOW ALTER +Description +This statement is used to show the execution of various modification tasks currently under way. +Grammar: +SHOW ALTER [CLUSTER | TABLE [COLUMN | ROLLUP] [FROM db_name]]; + +Explain: +TABLE COLUMN:展示修改列的 ALTER 任务 +TABLE ROLLUP: Shows the task of creating or deleting ROLLUP index +If db_name is not specified, use the current default DB +CLUSTER: Show the cluster operation related tasks (only administrators use! To be realized... + +'35;'35; example +1. Show the task execution of all modified columns of default DB +SHOW ALTER TABLE COLUMN; + +2. Show the execution of tasks to create or delete ROLLUP index for specified DB +SHOW ALTER TABLE ROLLUP FROM example_db; + +3. Show cluster operations related tasks (only administrators use! To be realized... +SHOW ALTER CLUSTER; + +## keyword +SHOW,ALTER + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP_EN.md new file mode 100644 index 00000000000000..7aadc834988e36 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP_EN.md @@ -0,0 +1,36 @@ +# SHOW BACKUP +Description +This statement is used to view BACKUP tasks +Grammar: +SHOW BACKUP [FROM db_name] + +Explain: +1. Only the last BACKUP task is saved in Palo. +2. Each column has the following meanings: +JobId: Unique job ID +SnapshotName: The name of the backup +DbName: Subordinate database +State: Current phase +PENDING: The initial state after submitting a job +SNAPSHOTING: In the execution snapshot +UPLOAD_SNAPSHOT: Snapshot completed, ready for upload +UPLOADING: Snapshot uploading +SAVE_META: Save job meta-information as a local file +UPLOAD_INFO: Upload job meta-information +FINISHED: Operation Successful +CANCELLED: Job Failure +Backup Objs: Backup tables and partitions +CreateTime: Task submission time +Snapshot Finished Time: Snapshot completion time +Upload Finished Time: Snapshot Upload Completion Time +FinishedTime: Job End Time +Unfinished Tasks: The unfinished sub-task ID is displayed in the SNAP HOTING and UPLOADING phases +Status: Display failure information if the job fails +Timeout: Job timeout, per second + +'35;'35; example +1. See the last BACKUP task under example_db. +SHOW BACKUP FROM example_db; + +## keyword +SHOW, BACKUP diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES_EN.md new file mode 100644 index 00000000000000..db9b072109fe72 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES_EN.md @@ -0,0 +1,9 @@ +# SHOW DATABASES +Description +This statement is used to show the currently visible DB +Grammar: +SHOW DATABASES; + +## keyword +SHOW,DATABASES + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA_EN.md new file mode 100644 index 00000000000000..6857c50452d07b --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA_EN.md @@ -0,0 +1,20 @@ +# SHOW DATA +Description +This statement is used to show the amount of data +Grammar: +SHOW DATA [FROM db_name[.table_name]]; + +Explain: +1. If you do not specify the FROM clause, use the amount of data that shows the current DB subdivided into tables +2. If the FROM clause is specified, the amount of data subdivided into indices under the table is shown. +3. If you want to see the size of individual Partitions, see help show partitions + +'35;'35; example +1. Display the data volume and aggregate data volume of each table of default DB +SHOW DATA; + +2. Display the subdivision data volume of the specified table below the specified DB +SHOW DATA FROM example_db.table_name; + +## keyword +SHOW,DATA diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW DELETE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW DELETE_EN.md new file mode 100644 index 00000000000000..6a6bdacea64259 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW DELETE_EN.md @@ -0,0 +1,13 @@ +# SHOW DELETE +Description +This statement is used to show successful historical delete tasks performed +Grammar: +SHOW DELETE [FROM db_name] + +'35;'35; example +1. Show all historical delete tasks for database +SHOW DELETE FROM database; + +## keyword +SHOW,DELETE + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT_EN.md new file mode 100644 index 00000000000000..638429706b81f8 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT_EN.md @@ -0,0 +1,36 @@ +# SHOW EXPORT +Description +This statement is used to show the execution of the specified export task +Grammar: +SHOW EXPORT +[FROM both names] +[ +WHERE +[EXPORT_JOB_ID = your_job_id] +[STATE = ["PENDING"|"EXPORTING"|"FINISHED"|"CANCELLED"]] +] +[ORDER BY ...] +[LIMIT limit]; + +Explain: +1) If db_name is not specified, use the current default DB +2) If STATE is specified, the EXPORT state is matched +3) Any column combination can be sorted using ORDER BY +4) If LIMIT is specified, the limit bar matching record is displayed. Otherwise, all of them will be displayed. + +'35;'35; example +1. Show all export tasks of default DB +SHOW EXPORT; + +2. Show the export tasks of the specified db, sorted in descending order by StartTime +SHOW EXPORT FROM example_db ORDER BY StartTime DESC; + +3. Show the export task of the specified db, state is "exporting" and sorted in descending order by StartTime +SHOW EXPORT FROM example_db WHERE STATE = "exporting" ORDER BY StartTime DESC; + +4. Show the export task of specifying dB and job_id +SHOW EXPORT FROM example_db WHERE EXPORT_JOB_ID = job_id; + +## keyword +SHOW,EXPORT + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW LOAD_EN.md new file mode 100644 index 00000000000000..2cf1b024547430 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW LOAD_EN.md @@ -0,0 +1,48 @@ +# SHOW LOAD +Description +This statement is used to show the execution of the specified import task +Grammar: +SHOW LOAD +[FROM both names] +[ +WHERE +[LABEL [ = "your_label" | LIKE "label_matcher"]] +[STATE = ["PENDING"|"ETL"|"LOADING"|"FINISHED"|"CANCELLED"|]] +] +[ORDER BY ...] +[LIMIT limit][OFFSET offset]; + +Explain: +1) If db_name is not specified, use the current default DB +2) If you use LABEL LIKE, the label that matches the import task contains the import task of label_matcher +3) If LABEL = is used, the specified label is matched accurately. +4) If STATE is specified, the LOAD state is matched +5) Arbitrary column combinations can be sorted using ORDER BY +6) If LIMIT is specified, the limit bar matching record is displayed. Otherwise, all of them will be displayed. +7) If OFFSET is specified, the query results are displayed from offset. By default, the offset is 0. +8) If broker/mini load is used, the connection in the URL column can be viewed using the following command: + +SHOW LOAD WARNINGS ON 'url' + +'35;'35; example +1. Show all import tasks of default DB +SHOW LOAD; + +2. Show the import task of the specified db. The label contains the string "2014_01_02", showing the oldest 10 +SHOW LOAD FROM example_db WHERE LABEL LIKE "2014_01_02" LIMIT 10; + +3. Show the import task of the specified db, specify label as "load_example_db_20140102" and sort it in descending order by LoadStartTime +SHOW LOAD FROM example_db WHERE LABEL = "load_example_db_20140102" ORDER BY LoadStartTime DESC; + +4. Show the import task of the specified db, specify label as "load_example_db_20140102" and state as "load", and sort it in descending order by LoadStartTime +SHOW LOAD FROM example_db WHERE LABEL = "load_example_db_20140102" AND STATE = "loading" ORDER BY LoadStartTime DESC; + +5. Show the import task of the specified dB and sort it in descending order by LoadStartTime, and display 10 query results starting with offset 5 +SHOW LOAD FROM example_db ORDER BY LoadStartTime DESC limit 5,10; +SHOW LOAD FROM example_db ORDER BY LoadStartTime DESC limit 10 offset 5; + +6. Small batch import is a command to view the import status +curl --location-trusted -u {user}:{passwd} http://{hostname}:{port}/api/{database}/_load_info?label={labelname} + +## keyword +SHOW,LOAD diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS_EN.md new file mode 100644 index 00000000000000..4e0dbb933054db --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS_EN.md @@ -0,0 +1,16 @@ +# SHOW PARTITIONS +Description +This statement is used to display partition information +Grammar: +SHOW PARTITIONS FROM [db_name.]table_name [PARTITION partition_name]; + +'35;'35; example +1. Display partition information for the specified table below the specified DB +SHOW PARTITIONS FROM example_db.table_name; + +1. Display information about the specified partition of the specified table below the specified DB +SHOW PARTITIONS FROM example_db.table_name PARTITION p1; + +## keyword +SHOW,PARTITIONS + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY_EN.md new file mode 100644 index 00000000000000..44b94b79491d1b --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY_EN.md @@ -0,0 +1,16 @@ +# SHOW PROPERTY +Description +This statement is used to view user attributes +Grammar: +SHOW PROPERTY [FOR user] [LIKE key] + +'35;'35; example +1. View the attributes of the jack user +SHOW PROPERTY FOR 'jack' + +2. View Jack user import cluster related properties +SHOW PROPERTY FOR 'jack' LIKE '%load_cluster%' + +## keyword +SHOW, PROPERTY + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES_EN.md new file mode 100644 index 00000000000000..286670a84aa566 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES_EN.md @@ -0,0 +1,23 @@ +# SHOW REPOSITORIES +Description +This statement is used to view the currently created warehouse. +Grammar: +SHOW REPOSITORIES; + +Explain: +1. Each column has the following meanings: +RepoId: Unique Warehouse ID +RepoName: Warehouse name +CreateTime: The first time the warehouse was created +IsReadOnly: Is it a read-only warehouse? +Location: The root directory in the repository for backing up data +Broker: Dependent Broker +ErrMsg: Palo regularly checks the connectivity of the warehouse, and if problems occur, error messages are displayed here. + +'35;'35; example +1. View the warehouse that has been created: +SHOW REPOSITORIES; + +## keyword +SHOW, REPOSITORY, REPOSITORIES + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE_EN.md new file mode 100644 index 00000000000000..d7d1b5cbc12714 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE_EN.md @@ -0,0 +1,41 @@ +# SHOW RESTORE +Description +This statement is used to view RESTORE tasks +Grammar: +SHOW RESTORE [FROM db_name] + +Explain: +1. Palo -20165;- 20445;- 233844;-36817;- 27425RESTORE -21153s; +2. Each column has the following meanings: +JobId: Unique job ID +Label: The name of the backup to be restored +Timestamp: Time version of backup to be restored +DbName: Subordinate database +State: Current phase +PENDING: The initial state after submitting a job +SNAPSHOTING: In the execution snapshot +DOWNLOAD: The snapshot is complete, ready to download the snapshot in the warehouse +DOWNLOADING: Snapshot Download +COMMIT: Snapshot download completed, ready to take effect +COMMITING: In force +FINISHED: Operation Successful +CANCELLED: Job Failure +AllowLoad: Is import allowed on recovery (currently not supported) +ReplicationNum: Specifies the number of replicas recovered +Restore Jobs: Tables and partitions to be restored +CreateTime: Task submission time +MetaPreparedTime: Metadata Readiness Completion Time +Snapshot Finished Time: Snapshot completion time +Download Finished Time: Snapshot download completion time +FinishedTime: Job End Time +Unfinished Tasks: The unfinished sub-task ID is displayed in the SNAP HOTING, DOWNLOADING, and COMMITING phases +Status: Display failure information if the job fails +Timeout: Job timeout, per second + +'35;'35; example +1. Check the last RESTORE task under example_db. +SHOW RESTORE FROM example_db; + +## keyword +SHOW, RESTORE + diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK_EN.md new file mode 100644 index 00000000000000..e1c4f7adcecbe9 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK_EN.md @@ -0,0 +1,9 @@ +# SHOW ROUTINE LOAD TASK +'35;'35; example + +1. Show sub-task information for a routine import task called test 1. + +SHOW ROUTINE LOAD TASK WHERE JobName = "test1"; + +## keyword +SHOW,ROUTINE,LOAD,TASK diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD_EN.md new file mode 100644 index 00000000000000..b4723dcde6c321 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD_EN.md @@ -0,0 +1,31 @@ +# SHOW ROUTINE LOAD +'35;'35; example + +1. Show all routine import jobs named test 1 (including stopped or cancelled jobs). The result is one or more lines. + +SHOW ALL ROUTINE LOAD FOR test1; + +2. Show the current running routine import job named test1 + +SHOW ROUTINE LOAD FOR test1; + +3. Display all routine import jobs (including stopped or cancelled jobs) under example_db. The result is one or more lines. + +use example_db; +SHOW ALL ROUTINE LOAD; + +4. Display all running routine import jobs under example_db + +use example_db; +SHOW ROUTINE LOAD; + +5. Display the current running routine import job named test1 under example_db + +SHOW ROUTINE LOAD FOR example_db.test1; + +6. Display all routine import jobs named test1 (including stopped or cancelled jobs) under example_db. The result is one or more lines. + +SHOW ALL ROUTINE LOAD FOR example_db.test1; + +## keyword +SHOW,ROUTINE,LOAD diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT_EN.md new file mode 100644 index 00000000000000..5eeef7b296d556 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT_EN.md @@ -0,0 +1,30 @@ +# SHOW SNAPSHOT +Description +This statement is used to view existing backups in the warehouse. +Grammar: +SHOW SNAPSHOT ON `repo_name` +[WHERE SNAPSHOT = "snapshot" [AND TIMESTAMP = "backup_timestamp"]]; + +Explain: +1. Each column has the following meanings: +Snapshot: The name of the backup +Timestamp: Time version for backup +Status: If the backup is normal, the OK will be displayed, otherwise the error message will be displayed. + +2. If TIMESTAMP is specified, the following additional information will be displayed: +Database: The name of the database where the backup data belongs +Details: Shows the entire backup data directory and file structure in the form of Json + +'35;'35; example +1. Check the existing backups in warehouse example_repo: +SHOW SNAPSHOT ON example_repo; + +2. View only the backup named backup1 in warehouse example_repo: +SHOW SNAPSHOT ON example_repo WHERE SNAPSHOT = "backup1"; + +2. Check the backup named backup1 in the warehouse example_repo for details of the time version "2018-05-05-15-34-26": +SHOW SNAPSHOT ON example_repo +WHERE SNAPSHOT = "backup1" AND TIMESTAMP = "2018-05-05-15-34-26"; + +## keyword +SHOW, SNAPSHOT diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLES_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLES_EN.md new file mode 100644 index 00000000000000..1bc7aaf9de66ef --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLES_EN.md @@ -0,0 +1,8 @@ +# SHOW TABLES +Description +This statement is used to show all tables under the current DB +Grammar: +SHOW TABLES; + +## keyword +SHOW,TABLES diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLET_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLET_EN.md new file mode 100644 index 00000000000000..0bd827cfbe0f7b --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLET_EN.md @@ -0,0 +1,16 @@ +# SHOW TABLET +Description +This statement is used to display tablet-related information (for administrators only) +Grammar: +SHOW TABLET +[From [db-uu name]] table U name.; Table U Id] + +'35;'35; example +1. Display all tablet information in the specified table below the specified DB +SHOW TABLET FROM example_db.table_name; + +2. Display parent level ID information of tablet with specified tablet ID of 10000 +Performance board 10000; + +## keyword +SHOW,TABLET diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD_EN.md new file mode 100644 index 00000000000000..3860ea34ec453d --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD_EN.md @@ -0,0 +1,9 @@ +# STOP ROUTINE LOAD +'35;'35; example + +1. Stop the routine import job named test 1. + +STOP ROUTINE LOAD FOR test1; + +## keyword +STOP,ROUTINE,LOAD diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md new file mode 100644 index 00000000000000..7c4efcc03e96b1 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md @@ -0,0 +1,91 @@ +# STREAM LOAD +Description +NAME: +stream-load: load data to table in streaming + +SYNOPSIS +curl --location-trusted -u user:passwd [-H ""...] -T data.file -XPUT http://fe_host:http_port/api/{db}/{table}/_stream_load + +DESCRIPTION +This statement is used to import data to a specified table, which differs from ordinary Load in that it is imported synchronously. +This import method can still guarantee the atomicity of a batch of import tasks, either all data imports succeed or all failures. +This operation updates the rollup table data associated with the base table at the same time. +This is a synchronous operation, the entire data import work is completed and returned to the user import results. +Currently, HTTP chunked and non-chunked uploads are supported. For non-chunked uploads, Content-Length must be used to indicate the length of uploaded content, so as to ensure the integrity of data. +In addition, it is better for users to set Expect Header field content 100-continue, which can avoid unnecessary data transmission in some error scenarios. + +OPTIONS +Users can pass in import parameters through the Header section of HTTP + +Label: A label that is imported at one time. Data from the same label cannot be imported many times. Users can avoid the problem of duplicate data import by specifying Label. +Currently Palo retains the recently successful label within 30 minutes. + +Column_separator: Specifies the column separator in the import file, defaulting to t. If the character is invisible, it needs to be prefixed with x, using hexadecimal to represent the separator. +For example, the separator X01 of the hit file needs to be specified as - H "column_separator: x01" + +Columns: Used to specify the correspondence between columns in the import file and columns in the table. If the column in the source file corresponds exactly to the content in the table, then you do not need to specify the content of this field. +If the source file does not correspond to the table schema, some data conversion is required for this field. There are two forms of column. One is to directly correspond to the field in the imported file, which is represented by the field name. +One is a derived column with the grammar `column_name'= expression. Give me a few examples to help understand. +Example 1: There are three columns "c1, c2, c3" in the table, and the three columns in the source file correspond to "c3, c2, c1" at one time; then - H "columns: c3, c2, c1" needs to be specified. +Example 2: There are three columns "c1, c2, c3" in the table, and the first three columns in the source file correspond in turn, but there is one more column; then - H"columns: c1, c2, c3, XXX"need to be specified; +The last column is free to specify a name placeholder. +Example 3: There are three columns "year, month, day" in the table. There is only one time column in the source file, in the format of "2018-06-01:02:03"; +那么可以指定-H "columns: col, year = year(col), month=month(col), day=day(col)"完成导入 + +Where: Used to extract some data. Users can set this option if they need to filter out unnecessary data. +Example 1: If you import only data whose column is larger than K1 equals 20180601, you can specify - H "where: K1 = 20180601" at import time. + +Max_filter_ratio: The ratio of data that is most tolerant of being filterable (for reasons such as data irregularities). Default zero tolerance. Data irregularities do not include rows filtered through where conditions. + +Partitions: Used to specify the partitions designed for this import. If the user can determine the partition corresponding to the data, it is recommended to specify the item. Data that does not satisfy these partitions will be filtered out. +For example, specify imports to p1, P2 partitions, - H "partitions: p1, p2" + +RETURN VALUES +When the import is complete, the relevant content of the import will be returned in Json format. Currently includes the following fields +Status: Import the final state. +Success: This means that the import is successful and the data is visible. +Publish Timeout: Represents that the import job has been successfully Commit, but for some reason it is not immediately visible. Users can view imports as successful without retrying +Label Already Exists: Indicates that the Label has been occupied by other jobs, either successfully imported or being imported. +Users need to use the get label state command to determine subsequent operations +Others: The import failed, and the user can specify Label to retry the job. +Message: Detailed description of import status. Failure returns the specific cause of failure. +NumberTotal Rows: The total number of rows read from the data stream +Number Loaded Rows: Number of rows imported for this time is valid only for Success +Number Filtered Rows: The number of rows filtered out by this import, that is, the number of rows whose data quality is not up to par +Number Unselected Rows: Number of rows filtered out by where condition in this import +LoadBytes: The amount of data in the source file imported +LoadTime Ms: The time taken for this import +ErrorURL: Specific content of filtered data, retaining only the first 1000 items + +ERRORS +The import error details can be viewed by the following statement: + +SHOW LOAD WARNINGS ON 'url' + +The URL is the URL given by Error URL. + +'35;'35; example + +1. Import the data from the local file'testData'into the table'testTbl' in the database'testDb', and use Label for de-duplication. +curl --location-trusted -u root -H "label:123" -T testData http://host:port/api/testDb/testTbl/_stream_load + +2. Import the data from the local file'testData'into the table'testTbl' in the database'testDb', use Label for de-duplication, and import only the data whose K1 equals 20180601. +curl --location-trusted -u root -H "label:123" -H "where: k1=20180601" -T testData http://host:port/api/testDb/testTbl/_stream_load + +3. Import data from the local file'testData'into the'testTbl' table in the database'testDb', allowing a 20% error rate (the user is in defalut_cluster) +curl --location-trusted -u root -H "label:123" -H "max_filter_ratio:0.2" -T testData http://host:port/api/testDb/testTbl/_stream_load + +4. Import the data from the local file'testData'into the table'testTbl' in the database'testDb', allowing a 20% error rate, and specify the column name of the file (the user is in defalut_cluster) +curl --location-trusted -u root -H "label:123" -H "max_filter_ratio:0.2" -H "columns: k2, k1, v1" -T testData http://host:port/api/testDb/testTbl/_stream_load + +5. Import the data from the local file'testData'into the tables of'testTbl' in'testDb'of the database, allowing 20% error rate. +curl --location-trusted -u root -H "label:123" -H "max_filter_ratio:0.2" -H "partitions: p1, p2" -T testData http://host:port/api/testDb/testTbl/_stream_load + +6. Import in streaming mode (user is in defalut_cluster) +seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_stream_load + +7. Import tables containing HLL columns, which can be columns in tables or columns in data to generate HLL columns +curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1)" -T testData http://host:port/api/testDb/testTbl/_stream_load + +## keyword +STREAM,LOAD diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/broker_load_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/broker_load_EN.md new file mode 100644 index 00000000000000..8d5a19c3244ee1 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/broker_load_EN.md @@ -0,0 +1,366 @@ +# BROKER LOAD +Description + +Broker load accesses data from corresponding data sources and imports data through broker deployed with Palo cluster. +You can view the deployed broker through the show broker command. +The following four data sources are currently supported: + +1. Baidu HDFS: Baidu's internal HDFS are limited to Baidu's internal use. +2. Baidu AFS: Baidu's internal AFs are limited to Baidu's internal use. +3. Baidu Object Storage (BOS): Baidu Object Storage. Only Baidu internal users, public cloud users or other users who can access BOS. +Four. Apache HDFS + +Grammar: + +LOAD LABEL load_label +( +Date of date of date of entry +) +WITH BROKER broker_name +[broker_properties] +[opt_properties]; + +1. load label + +The label of the current imported batch. Unique in a database. +Grammar: +[database_name.]your_label + +2. data_desc + +Used to describe a batch of imported data. +Grammar: +DATA INFILE +( +"file_path1"[, file_path2, ...] +) +[NEGATIVE] +INTO TABLE `table_name` +[PARTITION (p1, P2)] +[COLUMNS TERMINATED BY "column_separator"] +[FORMAT AS "file_type"] +[(column_list)] +[set (k1 = fun (k2)]] + +Explain: +file_path: + +File paths can be specified to a file, or * wildcards can be used to specify all files in a directory. Wildcards must match to files, not directories. + +PARTICIPATION: + +If this parameter is specified, only the specified partition will be imported, and data outside the imported partition will be filtered out. +If not specified, all partitions of the table are imported by default. + +NEGATIVE: +If this parameter is specified, it is equivalent to importing a batch of "negative" data. Used to offset the same batch of data imported before. +This parameter applies only to the case where there are value columns and the aggregation type of value columns is SUM only. + +Column U separator: + +Used to specify the column separator in the import file. Default tot +If the character is invisible, it needs to be prefixed with \x, using hexadecimal to represent the separator. +For example, the separator X01 of the hive file is specified as "\ x01" + +File type: + +Used to specify the type of imported file, such as parquet, csv. The default value is determined by the file suffix name. + +column_list: + +Used to specify the correspondence between columns in the import file and columns in the table. +When you need to skip a column in the import file, specify it as a column name that does not exist in the table. +Grammar: +(col_name1, col_name2, ...) + +SET: + +If this parameter is specified, a column of the source file can be transformed according to a function, and then the transformed result can be imported into the table. +The functions currently supported are: + +Strftime (fmt, column) date conversion function +Fmt: Date format, such as% Y% m% d% H% i% S (year, month, day, hour, second) +Column: Column in column_list, which is the column in the input file. Storage content should be a digital timestamp. +If there is no column_list, the columns of the input file are entered by default in the column order of the Palo table. +Note: The digital timestamp is in seconds. + +time_format(output_fmt, input_fmt, column) 日期格式转化 +Output_fmt: Converted date format, such as% Y% m% d% H% i% S (year, month, day, hour, second) +Input_fmt: The date format of the column before transformation, such as% Y%m%d%H%i%S (year, month, day, hour, second) +Column: Column in column_list, which is the column in the input file. Storage content should be a date string in input_fmt format. +If there is no column_list, the columns of the input file are entered by default in the column order of the Palo table. + +alignment_timestamp(precision, column) 将时间戳对齐到指定精度 +Precision: year 124month;124day;124hour; +Column: Column in column_list, which is the column in the input file. Storage content should be a digital timestamp. +If there is no column_list, the columns of the input file are entered by default in the column order of the Palo table. +Note: When the alignment accuracy is year and month, only the time stamps in the range of 20050101-20191231 are supported. + +Default_value (value) sets the default value for a column import +Use default values of columns when creating tables without specifying + +Md5sum (column1, column2,...) evaluates the value of the specified imported column to md5sum, returning a 32-bit hexadecimal string + +Replace_value (old_value [, new_value]) replaces old_value specified in the import file with new_value +New_value, if not specified, uses the default value of the column when building the table + +Hll_hash (column) is used to transform a column in a table or data into a data structure of a HLL column + +Now () sets the data imported by a column to the point at which the import executes. The column must be of DATE/DATETIME type. + +Three. broker name + +The name of the broker used can be viewed through the show broker command. + +4. broker_properties + +Used to provide information to access data sources through broker. Different brokers, as well as different access methods, need to provide different information. + +1. HDFS /AFS Baidu + +Access to Baidu's internal hdfs/afs currently only supports simple authentication, which needs to be provided: +Username: HDFS username +password -hdfs + +2. BOS + +Need to provide: +Bos_endpoint: endpoint of BOS +Bos_accesskey: Accesskey for public cloud users +Bos_secret_access key: secret_access key for public cloud users + +Three. Apache HDFS + +Community version of HDFS supports simple authentication and Kerberos authentication. And support HA configuration. +Simple authentication: +hadoop.security.authentication = simple (默认) +Username: HDFS username +password -hdfs + +Kerberos authentication: +hadoop.security.authentication = kerberos +Kerberos_principal: Specifies the principal of Kerberos +Kerberos_keytab: Specifies the KeyTab file path for kerberos. This file must be a file on the server where the broker process resides. +Kerberos_keytab_content: Specifies the content of the KeyTab file in Kerberos after base64 encoding. This is a choice from the kerberos_keytab configuration. + +HA code +By configuring namenode HA, new namenode can be automatically identified when the namenode is switched +Dfs. nameservices: Specify the name of the HDFS service and customize it, such as: "dfs. nameservices" = "my_ha" +Dfs.ha.namenodes.xxx: Customize the name of the namenode, with multiple names separated by commas. Where XXX is a custom name in dfs. name services, such as "dfs. ha. namenodes. my_ha" = "my_nn" +Dfs.namenode.rpc-address.xxx.nn: Specify RPC address information for namenode. Where NN denotes the name of the namenode configured in dfs.ha.namenodes.xxx, such as: "dfs.namenode.rpc-address.my_ha.my_nn"= "host:port" +Dfs.client.failover.proxy.provider: Specifies the provider that client connects to namenode by default: org.apache.hadoop.hdfs.server.namenode.ha.Configured Failover ProxyProvider + +4. opt_properties + +Used to specify some special parameters. +Grammar: +[PROPERTIES ("key"="value", ...)] + +The following parameters can be specified: +Timeout: Specifies the timeout time of the import operation. The default timeout is 4 hours. Unit seconds. +Max_filter_ratio: The ratio of data that is most tolerant of being filterable (for reasons such as data irregularities). Default zero tolerance. +Exec_mem_limit: Sets the upper memory limit for import use. Default is 2G, unit byte. This refers to the upper memory limit of a single BE node. +An import may be distributed across multiple BEs. We assume that processing 1GB data at a single node requires up to 5GB of memory. Assuming that a 1GB file is distributed among two nodes, then theoretically, each node needs 2.5GB of memory. Then the parameter can be set to 268454560, or 2.5GB. +Strict mode: is there a strict restriction on data? The default is true. + +5. Import data format sample + +Integer classes (TINYINT/SMALLINT/INT/BIGINT/LARGEINT): 1,1000,1234 +Floating Point Class (FLOAT/DOUBLE/DECIMAL): 1.1, 0.23, 356 + +(Note: If it's in other date formats, you can use strftime or time_format functions to convert in the import command) +字符串类(CHAR/VARCHAR):"I am a student", "a" +NULL value: N + +'35;'35; example + +1. Import a batch of data from HDFS, specifying the timeout time and filtering ratio. Use the broker with the inscription my_hdfs_broker. Simple authentication. + +LOAD LABEL example db.label1 +( +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") +INTO TABLE `my_table` +) +WITH BROKER my_hdfs_broker +( +"Username" = "HDFS\\ user" +"password" = "hdfs_passwd" +) +PROPERTIES +( +Timeout ="3600", +"max_filter_ratio" = "0.1" +); + +Where hdfs_host is the host of the namenode and hdfs_port is the fs.defaultFS port (default 9000) + +2. A batch of data from AFS, including multiple files. Import different tables, specify separators, and specify column correspondences. + +LOAD LABEL example db.label2 +( +DATA INFILE ("afs http://afs host:hdfs /u port /user /palo /data /input /file1") +INTO TABLE `my_table_1` +COLUMNS TERMINATED BY "," +(k1, k3, k2, v1, v2), +DATA INFILE ("afs http://afs host:hdfs /u port /user /palo /data /input /file2") +INTO TABLE `my_table_2` +COLUMNS TERMINATED BY "\t" +(k1, k2, k3, v2, v1) +) +WITH BROKER my_afs_broker +( +"username" ="abu user", +"password" = "afs_passwd" +) +PROPERTIES +( +Timeout ="3600", +"max_filter_ratio" = "0.1" +); + + +3. Import a batch of data from HDFS, specify hive's default delimiter x01, and use wildcard * to specify all files in the directory. +Use simple authentication and configure namenode HA at the same time + +LOAD LABEL example db.label3 +( +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/*") +INTO TABLE `my_table` +COLUMNS TERMINATED BY "\\x01" +) +WITH BROKER my_hdfs_broker +( +"Username" = "HDFS\\ user" +"password" = "hdfs_passwd", +"dfs.nameservices" = "my_ha", +"dfs.ha.namodes.my -ha" ="we named1, we named2", +"dfs.namode.rpc -address.my ha.my name1" ="nn1 guest:rpc port", +"dfs.namode.rpc -address.my ha.my name2" ="nn2 guest:rpc port", +"dfs.client.failover.proxy.provider" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" +) + +4. Import a batch of "negative" data from HDFS. At the same time, Kerberos authentication is used. Provide KeyTab file path. + +LOAD LABEL example db.label4 +( +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/old_file) +NEGATIVE +INTO TABLE `my_table` +COLUMNS TERMINATED BY "\t" +) +WITH BROKER my_hdfs_broker +( +"hadoop.security.authentication" = "kerberos", +"kerberos" principal ="doris @YOUR.COM", +"kerberos" keytab ="/home /palo /palo.keytab" +) + +5. Import a batch of data from HDFS and specify partitions. At the same time, Kerberos authentication is used. Provides the KeyTab file content encoded by base64. + +LOAD LABEL example db.label5 +( +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") +INTO TABLE `my_table` +PARTITION (p1, P2) +COLUMNS TERMINATED BY "," +(k1, k3, k2, v1, v2) +) +WITH BROKER my_hdfs_broker +( +"hadoop.security.authentication"="kerberos", +"kerberos" principal ="doris @YOUR.COM", +"kerberos" keytab "content"="BQIAAABEAAEACUJBSURVLkNPTQAEcGFsbw" +) + +6. Import a batch of data from BOS, specify partitions, and make some transformations to the columns of imported files, as follows: +The table structure is as follows: +K1 date +date +k3 bigint +k4 varchar (20) +k5 varchar (64) +k6 int + +Assume that the data file has only one row of data: + +1537002087,2018-08-09 11:12:13,1537002087,-,1 + +The columns in the data file correspond to the columns specified in the import statement: +tmp -u k1, tmp -u k2, tmp u k3, k6, v1 + +The conversion is as follows: + +1) k1: Transform tmp_k1 timestamp column into datetime type data +2) k2: Converting tmp_k2 datetime-type data into date data +3) k3: Transform tmp_k3 timestamp column into day-level timestamp +4) k4: Specify import default value of 1 +5) k5: Calculate MD5 values from tmp_k1, tmp_k2, tmp_k3 columns +6) k6: Replace the - value in the imported file with 10 + +LOAD LABEL example db.label6 +( +DATA INFILE("bos://my_bucket/input/file") +INTO TABLE `my_table` +PARTITION (p1, P2) +COLUMNS TERMINATED BY "," +(tmp /u k1, tmp /u k2, tmp /u k3, k6, v1) +SET ( + +K2 = Time = UFormat ("% Y -% M -% D% H:% I = S", "% Y -% M -% D", TMP = UK2), +k3 = alignment_timestamp("day", tmp_k3), +k4 = default_value("1"), +K5 = MD5Sum (TMP = UK1, TMP = UK2, TMP = UK3) +k6 = replace value ("-", "10") +) +) +WITH BROKER my_bos_broker +( +"bosu endpoint" ="http://bj.bcebos.com", +"bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx", +"bos_secret_accesskey"="yyyyyyyyyyyyyyyyyyyy" +) + +7. Import data into tables containing HLL columns, which can be columns in tables or columns in data + +If there are three columns in the table (id, v1, v2). Where V1 and V2 columns are HLL columns. The imported source file has three columns. In column_list, it is declared that the first column is ID and the second and third column is K1 and k2, which are temporarily named. +In SET, the HLL column in the table must be specifically declared hll_hash. The V1 column in the table is equal to the hll_hash (k1) column in the original data. +LOAD LABEL example db.label7 +( +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") +INTO TABLE `my_table` +PARTITION (p1, P2) +COLUMNS TERMINATED BY "," +(id, k1, k2) +SET ( +v1 = hll, u hash (k1), +v2 = hll, u hash (k2) +) +) +WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); + +LOAD LABEL example db.label8 +( +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") +INTO TABLE `my_table` +PARTITION (p1, P2) +COLUMNS TERMINATED BY "," +(k1, k2, tmp u k3, tmp u k4, v1, v2) +SET ( +v1 = hll, u hash (tmp +v2 = hll, u hash (tmp +) +) +WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); + +8. Importing data into Parquet file specifies FORMAT as parquet, which is judged by file suffix by default. +LOAD LABEL example db.label9 +( +DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") +INTO TABLE `my_table` +FORMAT AS "parquet" +(k1, k2, k3) +) +WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); + +## keyword +BROKER,LOAD diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/insert_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/insert_EN.md new file mode 100644 index 00000000000000..fef0baf56ab737 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/insert_EN.md @@ -0,0 +1,80 @@ +"35; INSERT +Description +'35;'35;' 35; Syntax + +``` +INSERT INTO table_name +[PARTICIPATION [...] +[ (column [, ...]) ] +[ \[ hint [, ...] \] ] +{ VALUES ( { expression | DEFAULT } [, ...] ) [, ...] | query } +``` + +### Parameters + +> tablet_name: Target table for importing data. It can be in the form of `db_name. table_name'. +> +> partition_names: Specifies that the partition to be imported must be a partition that exists in `table_name', with multiple partition names separated by commas +> +> column_name: The specified destination column must be a column that exists in `table_name'. +> +> expression: The corresponding expression that needs to be assigned to a column +> +> DEFAULT: Let the corresponding columns use default values +> +> query: A common query whose results are written to the target +> +> hint: Indicators used to indicate `INSERT'execution. ` Both streaming `and default non `streaming'methods use synchronization to complete `INSERT' statement execution +> The non `streaming'mode returns a label after execution to facilitate users to query the imported status through `SHOW LOAD'. + +'35;'35;' 35; Note + +When the `INSERT'statement is currently executed, the default behavior for data that does not conform to the target table is filtering, such as string length. However, for business scenarios where data is not filtered, the session variable `enable_insert_strict'can be set to `true' to ensure that `INSERT'will not be successfully executed when data is filtered out. + +'35;'35; example + +` The test `table contains two columns `c1', `c2'. + +1. Import a row of data into the `test'table + +``` +INSERT INTO test VALUES (1, 2); +INSERT INTO test (c1, c2) VALUES (1, 2); +INSERT INTO test (c1, c2) VALUES (1, DEFAULT); +INSERT INTO test (c1) VALUES (1); +``` + +The first and second sentences have the same effect. When the target column is not specified, the column order in the table is used as the default target column. +The third and fourth statements express the same meaning, using the default value of `c2'column to complete data import. + +2. Import multiline data into the `test'table at one time + +``` +INSERT INTO test VALUES (1, 2), (3, 2 + 2) +INSERT INTO test (c1, c2) VALUES (1, 2), (3, 2 * 2) +INSERT INTO test (c1) VALUES (1), (3) +Insert in test (C1, C2) values (1, Default), (3, Default) +``` + +The effect of the first and second statements is the same, and two data are imported into the `test'table at one time. +The effect of the third and fourth statements is known, using the default value of the `c2'column to import two data into the `test' table. + +3. Return results of importing a query statement synchronously into the `test'table + +``` +INSERT INTO test [streaming] SELECT * FROM test2 +INSERT INTO test (c1, c2) [streaming] SELECT * from test2 +``` + +4. Import an asynchronous query result into the `test'table + +``` +INSERT INTO test SELECT * FROM test2 +INSERT INTO test (c1, c2) SELECT * from test2 +``` + +Asynchronous imports are, in fact, encapsulated asynchronously by a synchronous import. Filling in streaming is as efficient as not filling in * execution. + +Since Doris used to import asynchronously, in order to be compatible with the old usage habits, the `INSERT'statement without streaming will still return a label. Users need to view the status of the `label' import job through the `SHOW LOAD command. +##keyword +INSERT diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Types/BIGINT_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Types/BIGINT_EN.md new file mode 100644 index 00000000000000..423ab66b49fcc9 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Types/BIGINT_EN.md @@ -0,0 +1,7 @@ +35; BIGINT +Description +BIGINT +8-byte signed integer, range [-9223372036854775808, 9223372036854775807] + +##keyword +BIGINT diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Types/BOOLEAN_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Types/BOOLEAN_EN.md new file mode 100644 index 00000000000000..80bf1892ea0d9a --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Types/BOOLEAN_EN.md @@ -0,0 +1,7 @@ +# BOOLEAN +Description +BOOL, BOOLEN +Like TINYINT, 0 stands for false and 1 for true. + +##keyword +BOOLEAN diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Types/CHAR_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Types/CHAR_EN.md new file mode 100644 index 00000000000000..a7c00f14274dbd --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Types/CHAR_EN.md @@ -0,0 +1,7 @@ +# CHAR +Description +CHAR(M) +A fixed-length string, M represents the length of a fixed-length string. The range of M is 1-255. + +##keyword +CHAR diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Types/DATETIME_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Types/DATETIME_EN.md new file mode 100644 index 00000000000000..052e0d664b65b4 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Types/DATETIME_EN.md @@ -0,0 +1,8 @@ +DATETIME +Description +DATETIME +Date and time type, value range is ['1000-01-01 00:00:00','9999-12-31 23:59:59']. +The form of printing is'YYYY-MM-DD HH:MM:SS' + +##keyword +DATETIME diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Types/DATE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Types/DATE_EN.md new file mode 100644 index 00000000000000..2edc00ec82d29d --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Types/DATE_EN.md @@ -0,0 +1,15 @@ +date +Description +DATE function +Syntax: +Date +Convert input type to DATE type +date +Date type, the current range of values is ['1900-01-01','9999-12-31'], and the default print form is'YYYYY-MM-DD'. + +'35;'35; example +mysql> SELECT DATE('2003-12-31 01:02:03'); +-> '2003-12-31' + +##keyword +DATE diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Types/DECIMAL_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Types/DECIMAL_EN.md new file mode 100644 index 00000000000000..cf8ef08387a5cf --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Types/DECIMAL_EN.md @@ -0,0 +1,8 @@ +'35; DECIMAL +Description +DECIMAL (M [,D]) +High-precision fixed-point, M stands for the total number of significant numbers (precision), D stands for the maximum number of decimal points (scale) +The range of M is [1,27], the range of D is [1,9], in addition, M must be greater than or equal to the value of D. The default value of D is 0. + +##keyword +DECIMAL diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Types/DOUBLE_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Types/DOUBLE_EN.md new file mode 100644 index 00000000000000..df853cf4f099b4 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Types/DOUBLE_EN.md @@ -0,0 +1,7 @@ + +Description +DOUBLE +8-byte floating point number + +##keyword +DOUBLE diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Types/FLOAT_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Types/FLOAT_EN.md new file mode 100644 index 00000000000000..eb76968e449869 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Types/FLOAT_EN.md @@ -0,0 +1,7 @@ +# FLOAT +Description +FLOAT +4-byte floating point number + +##keyword +FLOAT diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Types/HLL(HyperLogLog)_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Types/HLL(HyperLogLog)_EN.md new file mode 100644 index 00000000000000..9c011b2ff26ada --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Types/HLL(HyperLogLog)_EN.md @@ -0,0 +1,9 @@ +HLL (Hyloglog) +Description +MARKETING (M) +A variable length string, M represents the length of a variable length string. The range of M is 1-16385. +Users do not need to specify length and default values. Length is controlled within the system according to the aggregation degree of data +And HLL columns can only be queried or used by matching hll_union_agg, hll_raw_agg, hll_cardinality, hll_hash. + +##keyword +High loglog, hll, hyloglog diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Types/INT_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Types/INT_EN.md new file mode 100644 index 00000000000000..82d88429fc50f9 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Types/INT_EN.md @@ -0,0 +1,7 @@ +# INT +Description +INT +4-byte signed integer, range [-2147483648, 2147483647] + +##keyword +INT diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Types/SMALLINT_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Types/SMALLINT_EN.md new file mode 100644 index 00000000000000..dbbf8a12ed63d6 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Types/SMALLINT_EN.md @@ -0,0 +1,7 @@ +SMALLINT +Description +SMALLINT +2-byte signed integer, range [-32768, 32767] + +##keyword +SMALLINT diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Types/TINYINT_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Types/TINYINT_EN.md new file mode 100644 index 00000000000000..b71c9c64a32314 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Types/TINYINT_EN.md @@ -0,0 +1,7 @@ +# TINYINT +Description +TINYINT +1 byte signed integer, range [-128, 127] + +##keyword +TINYINT diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Types/VARCHAR_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Types/VARCHAR_EN.md new file mode 100644 index 00000000000000..ef25a6b522b876 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Data Types/VARCHAR_EN.md @@ -0,0 +1,7 @@ +"35; VARCHAR +Description +MARKETING (M) +A variable length string, M represents the length of a variable length string. The range of M is 1-65535. + +##keyword +VARCHAR diff --git a/docs/documentation/en/sql-reference/sql-statements/Utility/util_stmt_EN.md b/docs/documentation/en/sql-reference/sql-statements/Utility/util_stmt_EN.md new file mode 100644 index 00000000000000..06511e76e41ebb --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/Utility/util_stmt_EN.md @@ -0,0 +1,13 @@ +35; Description +Description +This statement is used to display schema information for the specified table +Grammar: +DESC [FISH] [dbu name.]table name [ALL]; + +Explain: +If ALL is specified, the schema of all indexes of the table is displayed + +'35;'35; example + +## keyword +DESCRIBE,DESC diff --git a/docs/documentation/en/sql-reference/sql-statements/index.rst b/docs/documentation/en/sql-reference/sql-statements/index.rst new file mode 100644 index 00000000000000..4f3490142cc122 --- /dev/null +++ b/docs/documentation/en/sql-reference/sql-statements/index.rst @@ -0,0 +1,8 @@ +============ +DDL 语句 +============ + +.. toctree:: + :glob: + + *