From bf1917aead3ac0386372587022252772683d4b93 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Fri, 29 May 2020 16:57:16 +0800 Subject: [PATCH 01/18] Support independently add and build custom udf in Doris *Source code: The new directory nanmed `custom_udf` is used to add and build custom udf. The directory structure is as follows: Doris -- be -- custom_udf -- fe If user wants to add a new udf in Doris, the source code will be placed in custom_udf such as: Doris -- be -- custom_udf -- src -- udf_samples -- udf_sample.h -- fe *Build: It can be built independently also the udf does not effect BE. The file 'build_custom_udf.sh' is used to build udf with 2 params: --udf, --clean. The default behavior is that building udf only. *Output: The dynamic link library will be placed in Doris -- be -- output -- custom_udf -- udf_samples -- libUdfSample.so Change-Id: Ic8ae16d37b53a904c3ec04e25f99e63442850e5a --- be/src/udf/udf.h | 2 +- be/src/udf/udf_internal.h | 2 +- build_custom_udf.sh | 154 ++++++++++++++++++ custom_udf/CMakeLists.txt | 58 +++++++ .../src/udf_samples/CMakeLists.txt | 0 .../src/udf_samples/uda_sample.cpp | 2 +- .../src/udf_samples/uda_sample.h | 2 +- .../src/udf_samples/udf_sample.cpp | 2 +- .../src/udf_samples/udf_sample.h | 2 +- 9 files changed, 218 insertions(+), 6 deletions(-) create mode 100755 build_custom_udf.sh create mode 100644 custom_udf/CMakeLists.txt rename {be => custom_udf}/src/udf_samples/CMakeLists.txt (100%) rename {be => custom_udf}/src/udf_samples/uda_sample.cpp (97%) rename {be => custom_udf}/src/udf_samples/uda_sample.h (98%) rename {be => custom_udf}/src/udf_samples/udf_sample.cpp (97%) rename {be => custom_udf}/src/udf_samples/udf_sample.h (98%) diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h index 9013d20cfb411b..226b199fd9b34f 100755 --- a/be/src/udf/udf.h +++ b/be/src/udf/udf.h @@ -18,7 +18,7 @@ #ifndef DORIS_BE_UDF_UDF_H #define DORIS_BE_UDF_UDF_H -#include +#include #include // This is the only Doris header required to develop UDFs and UDAs. This header diff --git a/be/src/udf/udf_internal.h b/be/src/udf/udf_internal.h index 2c7a78d71726e6..85398691a0ce99 100755 --- a/be/src/udf/udf_internal.h +++ b/be/src/udf/udf_internal.h @@ -18,7 +18,7 @@ #ifndef DORIS_BE_UDF_UDF_INTERNAL_H #define DORIS_BE_UDF_UDF_INTERNAL_H -#include +#include #include #include #include diff --git a/build_custom_udf.sh b/build_custom_udf.sh new file mode 100755 index 00000000000000..79a921f2c9e327 --- /dev/null +++ b/build_custom_udf.sh @@ -0,0 +1,154 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +############################################################## +# This script is used to compile Apache Doris(incubating) +# Usage: +# sh build.sh build both Backend and Frontend. +# sh build.sh -clean clean previous output and build. +# +# You need to make sure all thirdparty libraries have been +# compiled and installed correctly. +############################################################## + +set -eo pipefail + +ROOT=`dirname "$0"` +ROOT=`cd "$ROOT"; pwd` + +export DORIS_HOME=${ROOT} + +. ${DORIS_HOME}/env.sh + +# build thirdparty libraries if necessary +if [[ ! -f ${DORIS_THIRDPARTY}/installed/lib/libs2.a ]]; then + echo "Thirdparty libraries need to be build ..." + ${DORIS_THIRDPARTY}/build-thirdparty.sh +fi + +PARALLEL=$[$(nproc)/4+1] + +# Check args +usage() { + echo " +Usage: $0 + Optional options: + --udf build custom UDF + --clean clean and build target + + Eg. + $0 build UDF without clean + $0 --udf build UDF without clean + $0 --udf --clean clean and build UDF + " + exit 1 +} + +OPTS=$(getopt \ + -n $0 \ + -o '' \ + -o 'h' \ + -l 'udf' \ + -l 'clean' \ + -l 'help' \ + -- "$@") + +if [ $? != 0 ] ; then + usage +fi + +eval set -- "$OPTS" + +BUILD_UDF= +CLEAN= +HELP=0 +if [ $# == 1 ] ; then + # defuat + BUILD_UDF=1 + CLEAN=0 +else + BUILD_UDF=0 + CLEAN=0 + while true; do + case "$1" in + --udf) BUILD_UDF=1 ; shift ;; + --clean) CLEAN=1 ; shift ;; + -h) HELP=1; shift ;; + --help) HELP=1; shift ;; + --) shift ; break ;; + *) ehco "Internal error" ; exit 1 ;; + esac + done +fi + +if [[ ${HELP} -eq 1 ]]; then + usage + exit +fi + +if [ ${CLEAN} -eq 1 -a ${BUILD_UDF} -eq 0 ]; then + echo "--clean can not be specified without --udf" + exit 1 +fi + +echo "Get params: + BUILD_UDF -- $BUILD_UDF + CLEAN -- $CLEAN +" + +cd ${DORIS_HOME} +# Clean and build UDF +if [ ${BUILD_UDF} -eq 1 ] ; then + CMAKE_BUILD_TYPE=${BUILD_TYPE:-Release} + echo "Build UDF: ${CMAKE_BUILD_TYPE}" + CMAKE_BUILD_DIR=${DORIS_HOME}/custom_udf/build_${CMAKE_BUILD_TYPE} + if [ ${CLEAN} -eq 1 ]; then + rm -rf $CMAKE_BUILD_DIR + rm -rf ${DORIS_HOME}/custom_udf/output/ + fi + mkdir -p ${CMAKE_BUILD_DIR} + cd ${CMAKE_BUILD_DIR} + ${CMAKE_CMD} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} ../ + make -j${PARALLEL} VERBOSE=1 + make install + cd ${DORIS_HOME} +fi + +# Clean and prepare output dir +DORIS_OUTPUT=${DORIS_HOME}/output/ +mkdir -p ${DORIS_OUTPUT} + +#Copy UDF +if [ ${BUILD_UDF} -eq 1 ]; then + install -d ${DORIS_OUTPUT}/custom_udf/lib + for dir in "$(ls ${CMAKE_BUILD_DIR}/src)" + do + mkdir -p ${DORIS_OUTPUT}/custom_udf/lib/$dir + cp -r -p ${CMAKE_BUILD_DIR}/src/$dir/*.so ${DORIS_OUTPUT}/custom_udf/lib/$dir/ + done +fi + +echo "***************************************" +echo "Successfully build Doris UDF" +echo "***************************************" + +if [[ ! -z ${DORIS_POST_BUILD_HOOK} ]]; then + eval ${DORIS_POST_BUILD_HOOK} +fi + +exit 0 diff --git a/custom_udf/CMakeLists.txt b/custom_udf/CMakeLists.txt new file mode 100644 index 00000000000000..af2768d7bb23ed --- /dev/null +++ b/custom_udf/CMakeLists.txt @@ -0,0 +1,58 @@ +cmake_minimum_required(VERSION 2.8.10) + +# set CMAKE_C_COMPILER, this must set before project command +if (DEFINED ENV{DORIS_GCC_HOME}) + set(CMAKE_C_COMPILER "$ENV{DORIS_GCC_HOME}/bin/gcc") + set(CMAKE_CXX_COMPILER "$ENV{DORIS_GCC_HOME}/bin/g++") + set(GCC_HOME $ENV{DORIS_GCC_HOME}) +else() + message(FATAL_ERROR "DORIS_GCC_HOME environment variable is not set") +endif() + +project(doris) + +# set CMAKE_BUILD_TYPE +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE RELEASE) +endif() + +string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE) +message(STATUS "Build type is ${CMAKE_BUILD_TYPE}") + +set(BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") +set(ENV{DORIS_HOME} "${BASE_DIR}/../") +set(THIRDPARTY_DIR "$ENV{DORIS_THIRDPARTY}/installed/") +set(BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}") +set(SRC_DIR "${BASE_DIR}/src/") +set(OUTPUT_DIR "${BASE_DIR}/output") + +# Check gcc +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.8.2") + message(FATAL_ERROR "Need GCC version at least 4.8.2") + endif() + + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "7.3.0") + message(STATUS "GCC version is greater than 7.3.0, disable -Werror. Be careful with compile warnings.") + else() + # -Werror: compile warnings should be errors when using the toolchain compiler. + set(CXX_GCC_FLAGS "${CXX_GCC_FLAGS} -Werror") + endif() +elseif (NOT APPLE) + message(FATAL_ERROR "Compiler should be GNU") +endif() + +# Just for clang-tidy: -Wno-expansion-to-defined -Wno-deprecated-declaration +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -g -ggdb -std=c++11 -Wall -Werror -Wno-unused-variable -Wno-expansion-to-defined -Wno-deprecated-declarations -O3") +message(STATUS "Compiler Flags: ${CMAKE_CXX_FLAGS}") + +# Include udf +include_directories($ENV{DORIS_HOME}/output/udf/include) + +# Set all libraries +add_library(udf STATIC IMPORTED) +set_target_properties(udf PROPERTIES IMPORTED_LOCATION $ENV{DORIS_HOME}/output/udf/lib/libDorisUdf.a) + +add_subdirectory(${SRC_DIR}/udf_samples) + +install(DIRECTORY DESTINATION ${OUTPUT_DIR}) diff --git a/be/src/udf_samples/CMakeLists.txt b/custom_udf/src/udf_samples/CMakeLists.txt similarity index 100% rename from be/src/udf_samples/CMakeLists.txt rename to custom_udf/src/udf_samples/CMakeLists.txt diff --git a/be/src/udf_samples/uda_sample.cpp b/custom_udf/src/udf_samples/uda_sample.cpp similarity index 97% rename from be/src/udf_samples/uda_sample.cpp rename to custom_udf/src/udf_samples/uda_sample.cpp index c9bd223244c69d..054ecc1f952ae1 100644 --- a/be/src/udf_samples/uda_sample.cpp +++ b/custom_udf/src/udf_samples/uda_sample.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "udf_samples/udf_sample.h" +#include "udf_sample.h" namespace doris_udf { diff --git a/be/src/udf_samples/uda_sample.h b/custom_udf/src/udf_samples/uda_sample.h similarity index 98% rename from be/src/udf_samples/uda_sample.h rename to custom_udf/src/udf_samples/uda_sample.h index 08419c88e2f3f6..719195c20e2fba 100644 --- a/be/src/udf_samples/uda_sample.h +++ b/custom_udf/src/udf_samples/uda_sample.h @@ -17,7 +17,7 @@ #pragma once -#include "udf/udf.h" +#include "udf.h" namespace doris_udf { diff --git a/be/src/udf_samples/udf_sample.cpp b/custom_udf/src/udf_samples/udf_sample.cpp similarity index 97% rename from be/src/udf_samples/udf_sample.cpp rename to custom_udf/src/udf_samples/udf_sample.cpp index 6c26c0c419bf59..594d9843f3bcec 100644 --- a/be/src/udf_samples/udf_sample.cpp +++ b/custom_udf/src/udf_samples/udf_sample.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "udf_samples/udf_sample.h" +#include "udf_sample.h" namespace doris_udf { diff --git a/be/src/udf_samples/udf_sample.h b/custom_udf/src/udf_samples/udf_sample.h similarity index 98% rename from be/src/udf_samples/udf_sample.h rename to custom_udf/src/udf_samples/udf_sample.h index f0adfc51179004..6b4a2dc74fe33f 100644 --- a/be/src/udf_samples/udf_sample.h +++ b/custom_udf/src/udf_samples/udf_sample.h @@ -17,7 +17,7 @@ #pragma once -#include "udf/udf.h" +#include "udf.h" namespace doris_udf { From ea6b300c9a987ecd60470caaeff63cb58ed22754 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Wed, 3 Jun 2020 20:32:25 +0800 Subject: [PATCH 02/18] Change udf documents Change-Id: Iaac181a887a0e2855457e7a7dda3e3330cce6f12 --- .../build_custom_udf.sh | 14 +- .../extending-doris/user-defined-function.md | 187 +++++++++++++----- .../extending-doris/user-defined-function.md | 111 ++++++++++- .../Data Definition/create-function.md | 9 + 4 files changed, 265 insertions(+), 56 deletions(-) rename build_custom_udf.sh => custom_udf/build_custom_udf.sh (92%) mode change 100755 => 100644 diff --git a/build_custom_udf.sh b/custom_udf/build_custom_udf.sh old mode 100755 new mode 100644 similarity index 92% rename from build_custom_udf.sh rename to custom_udf/build_custom_udf.sh index 79a921f2c9e327..8a56352d6bf81c --- a/build_custom_udf.sh +++ b/custom_udf/build_custom_udf.sh @@ -31,7 +31,9 @@ set -eo pipefail ROOT=`dirname "$0"` ROOT=`cd "$ROOT"; pwd` -export DORIS_HOME=${ROOT} +export DORIS_HOME=$(dirname "$PWD") +echo ${DORIS_HOME} +export CUSTOM_UDF_HOME=${ROOT} . ${DORIS_HOME}/env.sh @@ -111,22 +113,22 @@ echo "Get params: CLEAN -- $CLEAN " -cd ${DORIS_HOME} +cd ${CUSTOM_UDF_HOME} # Clean and build UDF if [ ${BUILD_UDF} -eq 1 ] ; then CMAKE_BUILD_TYPE=${BUILD_TYPE:-Release} echo "Build UDF: ${CMAKE_BUILD_TYPE}" - CMAKE_BUILD_DIR=${DORIS_HOME}/custom_udf/build_${CMAKE_BUILD_TYPE} + CMAKE_BUILD_DIR=${CUSTOM_UDF_HOME}/build_${CMAKE_BUILD_TYPE} if [ ${CLEAN} -eq 1 ]; then rm -rf $CMAKE_BUILD_DIR - rm -rf ${DORIS_HOME}/custom_udf/output/ + rm -rf ${CUSTOM_UDF_HOME}/output/ fi mkdir -p ${CMAKE_BUILD_DIR} cd ${CMAKE_BUILD_DIR} ${CMAKE_CMD} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} ../ make -j${PARALLEL} VERBOSE=1 make install - cd ${DORIS_HOME} + cd ${CUSTOM_UDF_HOME} fi # Clean and prepare output dir @@ -135,7 +137,7 @@ mkdir -p ${DORIS_OUTPUT} #Copy UDF if [ ${BUILD_UDF} -eq 1 ]; then - install -d ${DORIS_OUTPUT}/custom_udf/lib + install -d ${DORIS_OUTPUT}/custom_udf/lib for dir in "$(ls ${CMAKE_BUILD_DIR}/src)" do mkdir -p ${DORIS_OUTPUT}/custom_udf/lib/$dir diff --git a/docs/en/extending-doris/user-defined-function.md b/docs/en/extending-doris/user-defined-function.md index 68791104b3689f..aa8fc9efe8cbe8 100644 --- a/docs/en/extending-doris/user-defined-function.md +++ b/docs/en/extending-doris/user-defined-function.md @@ -1,61 +1,61 @@ --- { - "title": "User Define Function", - "language": "en" +    "title": "User Define Function", +    "language": "en" } --- - # User Define Function -Users can extend Doris's capabilities through UDF mechanisms. Through this document, users can create their own UDF. +Users can extend Doris' capabilities through the UDF mechanism. Through this document, users can create their own UDF. ## Writing UDF functions -Before using UDF, users need to write their own UDF functions in Doris's UDF framework. In the `be/src/udf_samples/udf_sample.h | cpp` file, it is a simple UDF Demo. +Before using UDF, users need to write their own UDF functions under Doris' UDF framework. In the `custom_udf/src/udf_samples/udf_sample.h|cpp` file is a simple UDF Demo. Writing a UDF function requires the following steps. ### Writing functions -Create the corresponding header file, CPP file, and implement the logic you need in the CPP file. The corresponding relationship between the format of implementation function in CPP file and UDF. +Create the corresponding header file and CPP file, and implement the logic you need in the CPP file. Correspondence between the implementation function format and UDF in the CPP file. #### Non-variable parameters -For UDF with non-variable parameters, the corresponding relationship between them is very direct. -For example, `INT MyADD'(INT, INT) ` UDF corresponds to `IntVal AddUdf(FunctionContext* context, const IntVal & arg1, const IntVal & arg2)`. +For UDFs with non-variable parameters, the correspondence between the two is straightforward. +For example, UTF of `INT MyADD(INT, INT)` will correspond to `IntVal AddUdf(FunctionContext* context, const IntVal& arg1, const IntVal& arg2)` -1. `AddUdf` can be any name, as long as it is specified when UDF is created. -2. The first parameter in the implementation function is always `FunctionContext*`. The implementer can obtain some query-related content and apply for some memory to be used through this structure. Specific interfaces can be defined in `udf/udf.h`. -3. Implementing functions from the second parameter requires one-to-one correspondence with UDF parameters, such as `IntVal` corresponding to `INT` type. All types in this section are referenced by `const`. -4. Return parameters should correspond to the type of UDF parameters. +1. `AddUdf` can be any name, as long as it is specified when creating UDF. +2. The first parameter in the implementation function is always `FunctionContext*`. The implementer can obtain some query-related content through this structure, and apply for some memory to be used. The specific interface used can refer to the definition in `udf/udf.h`. +3. In the implementation function, the second parameter needs to correspond to the UDF parameter one by one, for example, `IntVal` corresponds to `INT` type. All types in this part must be referenced with `const`. +4. The return parameter must correspond to the type of UDF parameter. -#### Variable parameters +#### variable parameter -For variable parameters, see the following example, UDF `String md5sum (String,...)` corresponds to -`StringVal md5sumUdf (FunctionContext * ctx, int num args, const StringVal * args)` +For variable parameters, you can refer to the following example, corresponding to UDF`String md5sum(String, ...)` +The implementation function is `StringVal md5sumUdf(FunctionContext* ctx, int num_args, const StringVal* args)` -1. The `md5sumUdf` can also be changed at will. It can be specified at the time of creation. -2. The first parameter, like a non-variable parameter function, is passed in a `FunctionContext*`. -3. The variable parameter part consists of two parts. First, an integer is passed in, which shows that there are several parameters. Later, an array of variable parameter parts is passed in. +1. `md5sumUdf` can also be changed arbitrarily, just specify it when creating. +2. The first parameter is the same as the non-variable parameter function, and the passed in is a `FunctionContext*`. +3. The variable parameter part consists of two parts. First, an integer is passed in, indicating that there are several parameters behind. An array of variable parameter parts is passed in later. #### Type correspondence @@ -74,45 +74,144 @@ For variable parameters, see the following example, UDF `String md5sum (String,. |Varchar|StringVal| |Decimal|DecimalVal| -## Compiling UDF functions +### Directory structure after writing + +Taking udf_sample as an example here, create a `udf_samples` directory under src dir to store source code. + +``` + +├── be +├── custom_udf +│ ├── CMakeLists.txt +│ ├── build_custom_udf.sh +│ └── src +│ └── udf_samples +│ ├── CMakeLists.txt +│ ├── uda_sample.cpp +│ ├── uda_sample.h +│ ├── udf_sample.cpp +│ └── udf_sample.h + +``` + +## Compile UDF function + +Since the function implemented by the user depends on the udf of Doris, the first step is to compile Doris when compiling the UDF function. Then compile the UDF implemented by the user. ### Compile Doris -Executing `sh build.sh` in the Doris root directory generates the corresponding `headers|libs` in `output/udf/` +Running `sh build.sh` in the root directory of Doris will generate the corresponding `headers|libs` in `output/udf/` + +``` +├── output +│   └── udf +│   ├── include +│   │   ├── uda_test_harness.h +│   │   └── udf.h +│   └── lib +│   └── libDorisUdf.a + +``` + +### Write CMakeLists.txt for custom UDF + +1. Add custom UDF compilation under `custom_udf/CMakeLists.txt`. Take udf_samples as an example + +    ``` +    ├── be +    ├── custom_udf +    │ ├── CMakeLists.txt +    │ └── src + + +    custom_udf/CMakeLists.txt +    ... +    add_subdirectory(${SRC_DIR}/udf_samples) +    ... + +    ``` -### Edit CMakeLists.txt +2. Add dependency in custom UDF. Take udf_samples as an example, -Based on the `headers | libs` generated in the previous step, users can introduce the dependency using tools such as `CMakeLists`; in `CMakeLists`, dynamic libraries can be added by adding `-I|L` to `CMAKE_CXX_FLAGS`, respectively. For example, in `be/src/udf_samples/CMakeLists.txt`, a `udf sample` dynamic library is added using `add_library` (udfsample SHARED udf_sample.cpp) `target_link_libraries`(udfsample -static-libstdc++ -static-libgcc). You need to write down all the source files involved later (no header files included). +    Since the code in udf_samples does not depend on any other libraries, there is no need to declare. + +    If the code depends on functions such as `StringVal` in Doris UDF, you need to declare that it depends on udf. Modify `udf_samples/CMakeFiles.txt`: + +    ``` + ├── be + ├── custom_udf + │   ├── CMakeLists.txt + │   └── src + │   └── udf_samples + │   ├── CMakeLists.txt + +    custom_udf/src/udf_samples/CMakeFiles.txt +    ... +    target_link_libraries(udfsample +        udf +        -static-libstdc++ +        -static-libgcc +    ) +    ... + +    ``` ### Execute compilation -Create a `build` directory under this directory and execute `cmake ../` generate `Makefile` under `build`, and execute `make` to generate corresponding dynamic libraries. +Run `build_custom_udf.sh` under custom_udf + +``` +├── be +├── custom_udf +│ ├── build_custom_udf.sh + +build_custom_udf.sh --udf --clean + +``` + +If no parameters are passed in by default, the compilation script is directly compiled and not clean. If you need to clean and then compile, you need to add the parameter `--udf --clean` + +### Compilation result + +After the compilation is completed, the dynamic link library is placed under `output/custom_udf/`. Taking udf_samples as an example, the directory structure is as follows: + +``` + +├── output +│   ├── be +│   ├── custom_udf +│   │   └── lib +│   │   └── udf_samples +│   │   ├── libudasample.so +│   │   └── libudfsample.so + +``` -## Create UDF functions +## Create UDF function -Through the above steps, you can get a dynamic library. You need to put this dynamic library in a location that can be accessed through the HTTP protocol. Then execute the create UDF function to create a UDF inside the Doris system. You need AMDIN privileges to do this. +After going through the above steps, you can get a dynamic library. You need to put this dynamic library in a location that can be accessed through the HTTP protocol. Then execute the create UDF function to create a UDF inside the Doris system. You need to have AMDIN permission to complete this operation. ``` -CREATE [AGGREGATE] FUNCTION - name ([argtype][,...]) - [RETURNS] rettype - PROPERTIES (["key"="value"][,...]) +CREATE [AGGREGATE] FUNCTION +name ([argtype][,...]) +[RETURNS] rettype +PROPERTIES (["key"="value"][,...]) ``` -Explain: +Description: -1. In PROPERTIES, `symbol` denotes the corresponding symbol for the execution of the entry function, which must be set. You can get the corresponding symbol by the `nm` command, such as `nm libudfsample.so`, `grep AddUdf`, `ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4`. -2. In PROPERTIES, `object_file` denotes where to download to the corresponding dynamic library. This parameter must be set. -3. name: A function belongs to a DB in the form of `dbName`. `funcName`. When `dbName` is not specified explicitly, the DB where the current session is located is used as `dbName`. +1. "Symbol" in PROPERTIES means that the symbol corresponding to the entry function is executed. This parameter must be set. You can get the corresponding symbol through the `nm` command, such as `_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_` obtained by `nm libudfsample.so | grep AddUdf` is the corresponding symbol. +2. The object_file in PROPERTIES indicates where it can be downloaded to the corresponding dynamic library. This parameter must be set. +3. name: A function belongs to a certain DB, and the name is in the form of `dbName`.`funcName`. When `dbName` is not explicitly specified, the db where the current session is located is used as `dbName`. -For more details, see `CREATE FUNCTION`. +For specific use, please refer to `CREATE FUNCTION` for more detailed information. -## Using UDF +## Use UDF -Users using UDF/UDAF must have `SELECT` privileges for the corresponding database. +Users must have the `SELECT` permission of the corresponding database to use UDF/UDAF. -UDF is used in the same way as normal functions. The only difference is that the scope of built-in functions is global, while the scope of UDF is internal to DB. When the link session is inside the data, using the UDF name directly will find the corresponding UDF within the current DB. Otherwise, the user needs to display the database name of the specified UDF, such as `dbName`. `funcName`. +The use of UDF is consistent with ordinary function methods. The only difference is that the scope of built-in functions is global, and the scope of UDF is internal to DB. When the link session is inside the data, directly using the UDF name will find the corresponding UDF inside the current DB. Otherwise, the user needs to display the specified UDF database name, such as `dbName`.`funcName`. -## Delete UDF functions +## Delete UDF function -When you no longer need UDF functions, you can delete a UDF function by using the following command, referring to `DROP FUNCTION`. +When you no longer need UDF functions, you can delete a UDF function by the following command, you can refer to `DROP FUNCTION`. diff --git a/docs/zh-CN/extending-doris/user-defined-function.md b/docs/zh-CN/extending-doris/user-defined-function.md index fdd2bcdfe61958..06ebc9be7ca306 100644 --- a/docs/zh-CN/extending-doris/user-defined-function.md +++ b/docs/zh-CN/extending-doris/user-defined-function.md @@ -30,7 +30,7 @@ under the License. ## 编写UDF函数 -在使用UDF之前,用户需要先在Doris的UDF框架下,编写自己的UDF函数。在`be/src/udf_samples/udf_sample.h|cpp`文件中是一个简单的UDF Demo。 +在使用UDF之前,用户需要先在Doris的UDF框架下,编写自己的UDF函数。在`custom_udf/src/udf_samples/udf_sample.h|cpp`文件中是一个简单的UDF Demo。 编写一个UDF函数需要以下几个步骤。 @@ -74,23 +74,122 @@ under the License. |Varchar|StringVal| |Decimal|DecimalVal| -## 编译UDF函数 +### 编写完成后的目录结构 + +这里以 udf_sample 为例, 在 src dir 下面创建一个 `udf_samples` 目录用于存放 source code. + +``` + +├── be +├── custom_udf +│   ├── CMakeLists.txt +│   ├── build_custom_udf.sh +│   └── src +│   └── udf_samples +│   ├── CMakeLists.txt +│   ├── uda_sample.cpp +│   ├── uda_sample.h +│   ├── udf_sample.cpp +│   └── udf_sample.h + +``` + +## 编译 UDF 函数 + +由于用户自己实现的 function 中依赖了 Doris 的 udf , 所以在编译 UDF 函数的时候首先对 Doris 进行编译。然后再编译用户自己实现的 UDF 即可。 ### 编译Doris 在Doris根目录下执行`sh build.sh`就会在`output/udf/`生成对应`headers|libs` -### 编写CMakeLists.txt +``` +├── output +│   └── udf +│   ├── include +│   │   ├── uda_test_harness.h +│   │   └── udf.h +│   └── lib +│   └── libDorisUdf.a + +``` -基于上一步生成的`headers|libs`,用户可以使用`CMakeLists`等工具引入该依赖;在`CMakeLists`中,可以通过向`CMAKE_CXX_FLAGS`添加`-I|L`分别指定`headers|libs`路径;然后使用`add_library`添加动态库。例如,在`be/src/udf_samples/CMakeLists.txt`中,使用`add_library(udfsample SHARED udf_sample.cpp)` `target_link_libraries`(udfsample -static-libstdc++ -static-libgcc)增加了一个`udfsample`动态库。后面需要写上涉及的所有源文件(不包含头文件)。 +### 编写自定义 UDF 的 CMakeLists.txt + +1. 在 `custom_udf/CMakeLists.txt` 下增加对自定义 UDF 的编译。以 udf_samples 为例 + + ``` + ├── be + ├── custom_udf + │   ├── CMakeLists.txt + │   └── src + + + custom_udf/CMakeLists.txt + ... + add_subdirectory(${SRC_DIR}/udf_samples) + ... + + ``` + +2. 在自定义 UDF 中增加依赖。以 udf_samples 为例, + + 由于 udf_samples 中的代码都没有依赖任何其他库,则不需要声明。 + + 如果代码中依赖了比如 Doris UDF 中对 `StringVal` 的函数,则需要声明依赖了 udf。修改 `udf_samples/CMakeFiles.txt`: + + ``` + ├── be + ├── custom_udf + │   ├── CMakeLists.txt + │   └── src + │   └── udf_samples + │   ├── CMakeLists.txt + + custom_udf/src/udf_samples/CMakeFiles.txt + ... + target_link_libraries(udfsample + udf + -static-libstdc++ + -static-libgcc + ) + ... + + ``` ### 执行编译 -在该目录下创建一个`build`目录并在`build`下执行`cmake ../`生成`Makefile`,并执行`make`就会生成对应动态库。 +运行 custom_udf 下的 `build_custom_udf.sh` + +``` +├── be +├── custom_udf +│   ├── build_custom_udf.sh + +build_custom_udf.sh --udf --clean + +``` + +这个编译脚本如果默认不传入任何参数,则直接编译并且不 clean。如果需要 clean 后再编译则需要加上参数 `--udf --clean` + +### 编译结果 + +编译完成后的动态链接库被放在了 `output/custom_udf/` 下,以 udf_samples 为例,目录结构如下: + +``` + +├── output +│   ├── be +│   ├── custom_udf +│   │   └── lib +│   │   └── udf_samples +│   │   ├── libudasample.so +│   │   └── libudfsample.so + +``` ## 创建UDF函数 -通过上述的步骤后,你可以得到一个动态库。你需要将这个动态库放到一个能够通过HTTP协议访问到的位置。然后执行创建UDF函数在Doris系统内部创建一个UDF,你需要拥有AMDIN权限才能够完成这个操作。 +通过上述的步骤后,你可以得到一个动态库。你需要将这个动态库放到一个能够通过 HTTP 协议访问到的位置。然后执行创建 UDF 函数在 Doris 系统内部创建一个 UDF,你需要拥有AMDIN权限才能够完成这个操作。 ``` CREATE [AGGREGATE] FUNCTION diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/create-function.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/create-function.md index 2bd2b7efae93c5..0c3f3b7e75d6d9 100644 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/create-function.md +++ b/docs/zh-CN/sql-reference/sql-statements/Data Definition/create-function.md @@ -109,6 +109,15 @@ CREATE [AGGREGATE] FUNCTION function_name ); ``` +3. 创建一个变长参数的标量函数 + + ``` + CREATE FUNCTION strconcat(varchar, ...) RETURNS varchar properties ( + "symbol" = "_ZN9doris_udf6StrConcatUdfEPNS_15FunctionContextERKNS_6IntValES4_", + "object_file" = "http://host:port/libmyStrConcat.so" + ); + ``` + ## keyword CREATE,FUNCTION From 2932f915e31f2fe6aed395785bfbe2dd47cdbc94 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Thu, 4 Jun 2020 12:15:35 +0800 Subject: [PATCH 03/18] Docs of contributing third-party udf to Doris Change-Id: Id5a36aca627c1d87a3c83920b2cc978e0131d9ca --- docs/.vuepress/sidebar/en.js | 6 + docs/.vuepress/sidebar/zh-CN.js | 6 + docs/en/extending-doris/contribute_udf.md | 118 +++++++++++++++++++ docs/zh-CN/extending-doris/contribute_udf.md | 117 ++++++++++++++++++ 4 files changed, 247 insertions(+) create mode 100644 docs/en/extending-doris/contribute_udf.md create mode 100644 docs/zh-CN/extending-doris/contribute_udf.md diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js index 1975d59557842d..d8d513fa9c448e 100644 --- a/docs/.vuepress/sidebar/en.js +++ b/docs/.vuepress/sidebar/en.js @@ -127,6 +127,12 @@ module.exports = [ "user-defined-function", "spark-doris-connector", "logstash", + "contribute_udf", + { + title: "Third-party UDF", + directoryPath: "third-party-udf/", + children:[], + }, ], }, { diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js index e1d15086fbb55c..72b0d3022e1ab6 100644 --- a/docs/.vuepress/sidebar/zh-CN.js +++ b/docs/.vuepress/sidebar/zh-CN.js @@ -138,6 +138,12 @@ module.exports = [ "user-defined-function", "spark-doris-connector", "logstash", + "contribute_udf", + { + title: "第三方 UDF", + directoryPath: "third-party-udf/", + children:[], + }, ], }, { diff --git a/docs/en/extending-doris/contribute_udf.md b/docs/en/extending-doris/contribute_udf.md new file mode 100644 index 00000000000000..bcbd68fd7c0dca --- /dev/null +++ b/docs/en/extending-doris/contribute_udf.md @@ -0,0 +1,118 @@ +--- +{ + "title": "Contribute UDF", + "language": "en" +} +--- + + + +# Contribute UDF + +This manual mainly introduces how external users can contribute their own UDF functions to the Doris community. + +# Prerequisites + +1. UDF function is universal + +The versatility here mainly refers to: UDF functions are widely used in certain business scenarios. Such UDF functions are valuable and can be used directly by other users in the community. + +If you are not sure whether the UDF function you wrote is universal, you can send an email to `dev@doris.apache.org` or directly create an ISSUE to initiate the discussion. + +2. UDF has completed testing and is running normally in the user's production environment + +# Ready to work + +1. UDF source code +2. User Manual of UDF + +## Source code + +The placement path should be under `custom_udf/src/my_udf`. Here with udf_samples, first create a new folder under the `custom_udf/src/` path and store the original code. + +``` + + ├── custom_udf + │ ├── CMakeLists.txt + │ └── src + │ └── udf_samples + │ ├── CMakeLists.txt + │ ├── uda_sample.cpp + │ ├── uda_sample.h + │ ├── udf_sample.cpp + │ └── udf_sample.h + +``` + +## manual + +The user manual needs to include: UDF function definition description, applicable scenarios, function syntax, how to compile UDF, how to use UDF in Doris, and use examples. + +1. The user manual must contain both Chinese and English versions, and be stored under `docs/zh-CN/extending-doris/third-party-udf/` and `docs/en/extending-doris/third-party-udf`, respectively. + + ``` + ├── docs + │   └── zh-CN + │   └──extending-doris + │ └──third-party-udf + │ ├── udf simple 使用手册 + + ``` + + ``` + ├── docs + │   └── en + │   └──extending-doris + │ └──third-party-udf + │ ├── udf simple manual + ``` + +2. Add the two manual files to the sidebar in Chinese and English. + + ``` + vi docs/.vuepress/sidebar/zh-CN.js + { + title: "第三方 UDF", + directoryPath: "third-party-udf/", + children: + [ + "udf simple 使用手册", + ], + }, + ``` + + ``` + vi docs/.vuepress/sidebar/en.js + { + title: "Third-party UDF", + directoryPath: "third-party-udf/", + children: + [ + "udf simple manual", + ], + }, + + ``` + +# Contribute UDF to the community + + When you meet the conditions and prepare the code, you can contribute UDF to the Doris community after the document. Simply submit the request (PR) on [Github] (https://github.com/apache/incubator-doris). See the specific submission method: [Pull Request (PR)] (https://help.github.com/articles/about-pull-requests/). + + Finally, when the PR assessment is passed and merged. Congratulations, your UDF becomes a third-party UDF supported by Doris. You can check it out in the extended functions section of [Doris official website] (http://doris.apache.org/master/zh-CN/)~. diff --git a/docs/zh-CN/extending-doris/contribute_udf.md b/docs/zh-CN/extending-doris/contribute_udf.md new file mode 100644 index 00000000000000..9bd475cbf021bc --- /dev/null +++ b/docs/zh-CN/extending-doris/contribute_udf.md @@ -0,0 +1,117 @@ +--- +{ + "title": "Contribute UDF", + "language": "zh-CN" +} +--- + + + +# Contribute UDF + +该手册主要讲述了外部用户如何将自己编写的 UDF 函数贡献给 Doris 社区。 + +# 前提条件 + +1. UDF 函数具有通用性 + + 这里的通用性主要指的是:UDF 函数在某些业务场景下,被广泛使用。也就是说 UDF 函数具有复用价值,可被社区内其他用户直接使用。 + + 如果你不确定自己写的 UDF 函数是否具有通用性,可以发邮件到 `dev@doris.apache.org` 或直接创建 ISSUE 发起讨论。 + +2. UDF 已经完成测试,并正常运行在用户的生产环境中 + +# 准备工作 + +1. UDF 的 source code +2. UDF 的使用手册 + +## 源代码 + + 待贡献的源代码应该包含: `.h` , `.cpp`, `CMakeFile.txt`。存放路径应该在 `custom_udf/src/my_udf` 下。这里以 udf_samples 为例,首先在 `custom_udf/src/` 路径下创建一个新的文件夹,并存放源码。 + +``` + ├── custom_udf + │ ├── CMakeLists.txt + │ └── src + │ └── udf_samples + │ ├── CMakeLists.txt + │ ├── uda_sample.cpp + │ ├── uda_sample.h + │ ├── udf_sample.cpp + │ └── udf_sample.h + +``` + +## 使用手册 + + 使用手册需要包含:UDF 函数含义说明,适用的场景,函数的语法,如何编译 UDF ,如何在 Doris 集群中使用 UDF, 以及使用示例。 + +1. 使用手册需包含中英文两个版本,并分别存放在 `docs/zh-CN/extending-doris/third-party-udf/` 和 `docs/en/extending-doris/third-party-udf` 下。 + + ``` + ├── docs + │   └── zh-CN + │   └──extending-doris + │ └──third-party-udf + │ ├── udf simple 使用手册 + + ``` + + ``` + ├── docs + │   └── en + │   └──extending-doris + │ └──third-party-udf + │ ├── udf simple manual + ``` + +2. 将两个使用手册的文件,加入中文和英文的 sidebar 中。 + + ``` + vi docs/.vuepress/sidebar/zh-CN.js + { + title: "第三方 UDF", + directoryPath: "third-party-udf/", + children: + [ + "udf simple 使用手册", + ], + }, + ``` + + ``` + vi docs/.vuepress/sidebar/en.js + { + title: "Third-party UDF", + directoryPath: "third-party-udf/", + children: + [ + "udf simple manual", + ], + }, + + ``` + +# 贡献 UDF 到社区 + + 当你符合前提条件并准备好代码,文档后就可以将 UDF 贡献到 Doris 社区了。在 [Github](https://github.com/apache/incubator-doris) 上面提交 Pull Request (PR) 即可。具体提交方式见:[Pull Request (PR)](https://help.github.com/articles/about-pull-requests/)。 + + 最后,当 PR 评审通过并 Merge 后。恭喜你,你的 UDF 成为 Doris 支持的第三方 UDF。你可以在 [Doris 官网](http://doris.apache.org/master/zh-CN/) 的扩展功能部分查看到啦~。 From 05605f6c6dad8aec37ebd03206d69d9e86d13aa0 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Thu, 4 Jun 2020 12:23:46 +0800 Subject: [PATCH 04/18] Add license Change-Id: I9391a79f12614e78cce19b11df1e5d92eb4817c4 --- custom_udf/CMakeLists.txt | 17 +++++++++++++++++ custom_udf/build_custom_udf.sh | 8 +++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/custom_udf/CMakeLists.txt b/custom_udf/CMakeLists.txt index af2768d7bb23ed..ce8d86d28e5f07 100644 --- a/custom_udf/CMakeLists.txt +++ b/custom_udf/CMakeLists.txt @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + cmake_minimum_required(VERSION 2.8.10) # set CMAKE_C_COMPILER, this must set before project command diff --git a/custom_udf/build_custom_udf.sh b/custom_udf/build_custom_udf.sh index 8a56352d6bf81c..f09307471721c7 100644 --- a/custom_udf/build_custom_udf.sh +++ b/custom_udf/build_custom_udf.sh @@ -17,13 +17,11 @@ # under the License. ############################################################## -# This script is used to compile Apache Doris(incubating) +# This script is used to compile UDF # Usage: -# sh build.sh build both Backend and Frontend. -# sh build.sh -clean clean previous output and build. +# sh build.sh build udf without clean. +# sh build.sh -clean --udf clean previous output and build. # -# You need to make sure all thirdparty libraries have been -# compiled and installed correctly. ############################################################## set -eo pipefail From 7cbfa4692db360c9a6759653fdbf974461c2488f Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Thu, 4 Jun 2020 14:33:20 +0800 Subject: [PATCH 05/18] Fix compile error Change-Id: Iad5cd3be688731992709b080d1842530fe7a2a5b --- be/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 25d02974d77005..cce76ec5917c09 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -498,7 +498,6 @@ add_subdirectory(${SRC_DIR}/testutil) #add_subdirectory(${SRC_DIR}/tools) add_subdirectory(${SRC_DIR}/udf) add_subdirectory(${SRC_DIR}/tools) -add_subdirectory(${SRC_DIR}/udf_samples) add_subdirectory(${SRC_DIR}/util) add_subdirectory(${SRC_DIR}/plugin) From ee299bee263c3694dbdd5e8046d1a51aa1f2b764 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Thu, 4 Jun 2020 19:51:12 +0800 Subject: [PATCH 06/18] Fix error Change-Id: I2eb883be97ddab1548c817bc832cf7f205116d00 --- custom_udf/build_custom_udf.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/custom_udf/build_custom_udf.sh b/custom_udf/build_custom_udf.sh index f09307471721c7..6d1b0861fb52d2 100644 --- a/custom_udf/build_custom_udf.sh +++ b/custom_udf/build_custom_udf.sh @@ -136,7 +136,7 @@ mkdir -p ${DORIS_OUTPUT} #Copy UDF if [ ${BUILD_UDF} -eq 1 ]; then install -d ${DORIS_OUTPUT}/custom_udf/lib - for dir in "$(ls ${CMAKE_BUILD_DIR}/src)" + for dir in $(ls ${CMAKE_BUILD_DIR}/src) do mkdir -p ${DORIS_OUTPUT}/custom_udf/lib/$dir cp -r -p ${CMAKE_BUILD_DIR}/src/$dir/*.so ${DORIS_OUTPUT}/custom_udf/lib/$dir/ From 402ce1d496cf733557653cc26707e7c630602329 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Mon, 8 Jun 2020 20:04:05 +0800 Subject: [PATCH 07/18] Change udf doc Change-Id: I6f5ffe72463146046914c6c64d448d51901478fb --- docs/zh-CN/extending-doris/user-defined-function.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/zh-CN/extending-doris/user-defined-function.md b/docs/zh-CN/extending-doris/user-defined-function.md index 06ebc9be7ca306..905f88845dafb3 100644 --- a/docs/zh-CN/extending-doris/user-defined-function.md +++ b/docs/zh-CN/extending-doris/user-defined-function.md @@ -26,7 +26,14 @@ under the License. # User Define Function -用户可以通过UDF机制来扩展Doris的能力。通过这篇文档,用户能够创建自己的UDF。 +UDF 主要适用于,用户需要的分析能力 Doris 并不具备的场景。用户可以自行根据自己的需求,实现自定义的函数,并且通过 UDF 的方式注册到 Doris 中,来扩展 Doris 的能力,并解决用户分析需求。 + +UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指的是二者的统称。 + +1. UDF: 用户自定义函数,这种函数会对单行进行操作,并且输出单行结果。当用户在查询时使用 UDF ,每行数据最终都会出现在结果集中。典型的 UDF 比如字符串操作 concat() 等。 +2. UDAF: 用户自定义的聚合函数,这种函数对多行进行操作,并且输出单行结果。当用户在查询时使用 UDAF,分组后的每组数据最后会计算出一个值并展结果集中。典型的 UDAF 比如集合操作 sum() 等。一般来说 UDAF 都会结合 group by 一起使用。 + +这篇文档主要讲述了,如何编写自定义的 UDF 函数,以及如何在 Doris 中使用它。 ## 编写UDF函数 From 9dcab5618b3bc4e567e3787c9cbee5da9c8c20cc Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Wed, 10 Jun 2020 20:32:43 +0800 Subject: [PATCH 08/18] Add doc Change-Id: Ic9e6567b2920bb465af8c7896d4ab22a0b53635d --- docs/en/extending-doris/user-defined-function.md | 11 ++++++++++- docs/zh-CN/extending-doris/contribute_udf.md | 4 ++-- docs/zh-CN/extending-doris/user-defined-function.md | 2 ++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/docs/en/extending-doris/user-defined-function.md b/docs/en/extending-doris/user-defined-function.md index aa8fc9efe8cbe8..833612e58568dd 100644 --- a/docs/en/extending-doris/user-defined-function.md +++ b/docs/en/extending-doris/user-defined-function.md @@ -26,7 +26,16 @@ under the License. # User Define Function -Users can extend Doris' capabilities through the UDF mechanism. Through this document, users can create their own UDF. +UDF is mainly suitable for scenarios where the analytical capabilities that users need do not possess. Users can implement custom functions according to their own needs, and register with Doris through UDF to expand Doris' capabilities and solve user analysis needs. + +There are two types of analysis requirements that UDF can meet: UDF and UDAF. UDF in this article refers to both. + +1. UDF: User-defined function, this function will operate on a single line and output a single line result. When users use UDFs for queries, each row of data will eventually appear in the result set. Typical UDFs are string operations such as concat(). +2. UDAF: User-defined aggregation function. This function operates on multiple lines and outputs a single line of results. When the user uses UDAF in the query, each group of data after grouping will finally calculate a value and expand the result set. A typical UDAF is the set operation sum(). Generally speaking, UDAF will be used together with group by. + +This document mainly describes how to write a custom UDF function and how to use it in Doris. + +If users use the UDF function and extend Doris' function analysis, and want to contribute their own UDF functions back to the Doris community for other users, please see the document [Contribute UDF to Doris](http://doris.apache.org/master/en/extending-doris/contribute_udf.html). ## Writing UDF functions diff --git a/docs/zh-CN/extending-doris/contribute_udf.md b/docs/zh-CN/extending-doris/contribute_udf.md index 9bd475cbf021bc..b4959d84877a1a 100644 --- a/docs/zh-CN/extending-doris/contribute_udf.md +++ b/docs/zh-CN/extending-doris/contribute_udf.md @@ -1,6 +1,6 @@ --- { - "title": "Contribute UDF", + "title": "贡献用户 UDF 函数到社区", "language": "zh-CN" } --- @@ -24,7 +24,7 @@ specific language governing permissions and limitations under the License. --> -# Contribute UDF +# 贡献用户的 UDF 函数到社区 该手册主要讲述了外部用户如何将自己编写的 UDF 函数贡献给 Doris 社区。 diff --git a/docs/zh-CN/extending-doris/user-defined-function.md b/docs/zh-CN/extending-doris/user-defined-function.md index 905f88845dafb3..238898c5a4507e 100644 --- a/docs/zh-CN/extending-doris/user-defined-function.md +++ b/docs/zh-CN/extending-doris/user-defined-function.md @@ -35,6 +35,8 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 这篇文档主要讲述了,如何编写自定义的 UDF 函数,以及如何在 Doris 中使用它。 +如果用户使用 UDF 功能并扩展了 Doris 的函数分析,并且希望将自己实现的 UDF 函数贡献回 Doris 社区给其他用户使用,这时候请看文档 [Contribute UDF to Doris](http://doris.apache.org/master/zh-CN/extending-doris/contribute_udf.html)。 + ## 编写UDF函数 在使用UDF之前,用户需要先在Doris的UDF框架下,编写自己的UDF函数。在`custom_udf/src/udf_samples/udf_sample.h|cpp`文件中是一个简单的UDF Demo。 From 0f119153d40624df1f33030c31ab9c038ae21850 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Thu, 11 Jun 2020 11:18:17 +0800 Subject: [PATCH 09/18] Change doc Change-Id: Ibfa7260193953611bf0cbdd3d3a38086230d0de6 --- custom_udf/CMakeLists.txt | 5 +++-- docs/en/extending-doris/contribute_udf.md | 10 ++++++---- docs/zh-CN/extending-doris/contribute_udf.md | 8 ++++---- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/custom_udf/CMakeLists.txt b/custom_udf/CMakeLists.txt index ce8d86d28e5f07..1cf6e51797f376 100644 --- a/custom_udf/CMakeLists.txt +++ b/custom_udf/CMakeLists.txt @@ -45,8 +45,8 @@ set(OUTPUT_DIR "${BASE_DIR}/output") # Check gcc if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.8.2") - message(FATAL_ERROR "Need GCC version at least 4.8.2") + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "7.3.0") + message(FATAL_ERROR "Need GCC version at least 7.3.0") endif() if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "7.3.0") @@ -70,6 +70,7 @@ include_directories($ENV{DORIS_HOME}/output/udf/include) add_library(udf STATIC IMPORTED) set_target_properties(udf PROPERTIES IMPORTED_LOCATION $ENV{DORIS_HOME}/output/udf/lib/libDorisUdf.a) +# Add the subdirector of new UDF in here add_subdirectory(${SRC_DIR}/udf_samples) install(DIRECTORY DESTINATION ${OUTPUT_DIR}) diff --git a/docs/en/extending-doris/contribute_udf.md b/docs/en/extending-doris/contribute_udf.md index bcbd68fd7c0dca..b3d42960cfcc58 100644 --- a/docs/en/extending-doris/contribute_udf.md +++ b/docs/en/extending-doris/contribute_udf.md @@ -72,7 +72,8 @@ The user manual needs to include: UDF function definition description, applicabl │   └── zh-CN │   └──extending-doris │ └──third-party-udf - │ ├── udf simple 使用手册 + │ ├── udf-simple-manual.md + ``` @@ -81,7 +82,8 @@ The user manual needs to include: UDF function definition description, applicabl │   └── en │   └──extending-doris │ └──third-party-udf - │ ├── udf simple manual + │ ├── udf-simple-manual.md + ``` 2. Add the two manual files to the sidebar in Chinese and English. @@ -93,7 +95,7 @@ The user manual needs to include: UDF function definition description, applicabl directoryPath: "third-party-udf/", children: [ - "udf simple 使用手册", + "udf-simple-manual", ], }, ``` @@ -105,7 +107,7 @@ The user manual needs to include: UDF function definition description, applicabl directoryPath: "third-party-udf/", children: [ - "udf simple manual", + "udf-simple-manual", ], }, diff --git a/docs/zh-CN/extending-doris/contribute_udf.md b/docs/zh-CN/extending-doris/contribute_udf.md index b4959d84877a1a..957d2d0e5e4db8 100644 --- a/docs/zh-CN/extending-doris/contribute_udf.md +++ b/docs/zh-CN/extending-doris/contribute_udf.md @@ -71,7 +71,7 @@ under the License. │   └── zh-CN │   └──extending-doris │ └──third-party-udf - │ ├── udf simple 使用手册 + │ ├── udf-simple-manual.md ``` @@ -80,7 +80,7 @@ under the License. │   └── en │   └──extending-doris │ └──third-party-udf - │ ├── udf simple manual + │ ├── udf-simple-manual.md ``` 2. 将两个使用手册的文件,加入中文和英文的 sidebar 中。 @@ -92,7 +92,7 @@ under the License. directoryPath: "third-party-udf/", children: [ - "udf simple 使用手册", + "udf-simple-manual", ], }, ``` @@ -104,7 +104,7 @@ under the License. directoryPath: "third-party-udf/", children: [ - "udf simple manual", + "udf-simple-manual", ], }, From 81a7d46ba2628ead1da8fc70a7624f379dda4685 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Thu, 11 Jun 2020 11:41:07 +0800 Subject: [PATCH 10/18] Remove --udf Change-Id: Idfcda8bede3d980618f5b249129c54f9b15567f5 --- custom_udf/build_custom_udf.sh | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/custom_udf/build_custom_udf.sh b/custom_udf/build_custom_udf.sh index 6d1b0861fb52d2..098035d685070c 100644 --- a/custom_udf/build_custom_udf.sh +++ b/custom_udf/build_custom_udf.sh @@ -19,8 +19,8 @@ ############################################################## # This script is used to compile UDF # Usage: -# sh build.sh build udf without clean. -# sh build.sh -clean --udf clean previous output and build. +# sh build-custom-udf.sh build udf without clean. +# sh build-custom-udf.sh -clean clean previous output and build. # ############################################################## @@ -48,13 +48,11 @@ usage() { echo " Usage: $0 Optional options: - --udf build custom UDF --clean clean and build target Eg. - $0 build UDF without clean - $0 --udf build UDF without clean - $0 --udf --clean clean and build UDF + $0 build UDF without clean + $0 --clean clean and build UDF " exit 1 } @@ -63,7 +61,6 @@ OPTS=$(getopt \ -n $0 \ -o '' \ -o 'h' \ - -l 'udf' \ -l 'clean' \ -l 'help' \ -- "$@") @@ -74,19 +71,16 @@ fi eval set -- "$OPTS" -BUILD_UDF= -CLEAN= +BUILD_UDF=1 +CLEAN=0 HELP=0 if [ $# == 1 ] ; then - # defuat - BUILD_UDF=1 + # default CLEAN=0 else - BUILD_UDF=0 CLEAN=0 while true; do case "$1" in - --udf) BUILD_UDF=1 ; shift ;; --clean) CLEAN=1 ; shift ;; -h) HELP=1; shift ;; --help) HELP=1; shift ;; @@ -101,13 +95,7 @@ if [[ ${HELP} -eq 1 ]]; then exit fi -if [ ${CLEAN} -eq 1 -a ${BUILD_UDF} -eq 0 ]; then - echo "--clean can not be specified without --udf" - exit 1 -fi - echo "Get params: - BUILD_UDF -- $BUILD_UDF CLEAN -- $CLEAN " From 22c684ea1e4f88de9ae598b38d582ecfe82728b0 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Thu, 11 Jun 2020 15:04:44 +0800 Subject: [PATCH 11/18] Change docs Change-Id: I167b7812a2d780a69c7f24183c9671a8053c6793 --- docs/en/extending-doris/contribute_udf.md | 6 +- .../extending-doris/user-defined-function.md | 193 +++++++++++------- docs/zh-CN/extending-doris/contribute_udf.md | 6 +- .../extending-doris/user-defined-function.md | 166 +++++++++------ 4 files changed, 227 insertions(+), 144 deletions(-) diff --git a/docs/en/extending-doris/contribute_udf.md b/docs/en/extending-doris/contribute_udf.md index b3d42960cfcc58..eae29f3df957aa 100644 --- a/docs/en/extending-doris/contribute_udf.md +++ b/docs/en/extending-doris/contribute_udf.md @@ -45,7 +45,7 @@ If you are not sure whether the UDF function you wrote is universal, you can sen ## Source code -The placement path should be under `custom_udf/src/my_udf`. Here with udf_samples, first create a new folder under the `custom_udf/src/` path and store the original code. +Create a folder to store UDF functions under `custom_udf/src/`, and store the source code and CMAKE files here. The source code to be contributed should include: `.h`, `.cpp`, `CMakeFile.txt`. Taking udf_samples as an example here, first create a new folder under the `custom_udf/src/` path and store the source code. ``` @@ -61,6 +61,10 @@ The placement path should be under `custom_udf/src/my_udf`. Here with udf_sample ``` +1. CMakeLists.txt + + After the user's CMakeLists are placed here, a few changes are required. Just remove `include udf` and `udf lib`. The reason for the removal is that it has been declared in the custom_udf level CMake file. + ## manual The user manual needs to include: UDF function definition description, applicable scenarios, function syntax, how to compile UDF, how to use UDF in Doris, and use examples. diff --git a/docs/en/extending-doris/user-defined-function.md b/docs/en/extending-doris/user-defined-function.md index 833612e58568dd..160f24bd210404 100644 --- a/docs/en/extending-doris/user-defined-function.md +++ b/docs/en/extending-doris/user-defined-function.md @@ -25,8 +25,7 @@ under the License. --> # User Define Function - -UDF is mainly suitable for scenarios where the analytical capabilities that users need do not possess. Users can implement custom functions according to their own needs, and register with Doris through UDF to expand Doris' capabilities and solve user analysis needs. +UDF is mainly suitable for scenarios where the analytical capabilities that users need do not possess. Users can implement customized functions according to their own needs, and register with Doris through UDF to expand Doris' capabilities and solve user analysis needs. There are two types of analysis requirements that UDF can meet: UDF and UDAF. UDF in this article refers to both. @@ -35,7 +34,7 @@ There are two types of analysis requirements that UDF can meet: UDF and UDAF. UD This document mainly describes how to write a custom UDF function and how to use it in Doris. -If users use the UDF function and extend Doris' function analysis, and want to contribute their own UDF functions back to the Doris community for other users, please see the document [Contribute UDF to Doris](http://doris.apache.org/master/en/extending-doris/contribute_udf.html). +If users use the UDF function and extend Doris' function analysis, and want to contribute their own UDF functions back to the Doris community for other users, please see the document [Contribute UDF to Doris](http://doris.apache. org/master/zh-CN/extending-doris/contribute_udf.html). ## Writing UDF functions @@ -47,10 +46,20 @@ Writing a UDF function requires the following steps. Create the corresponding header file and CPP file, and implement the logic you need in the CPP file. Correspondence between the implementation function format and UDF in the CPP file. +Users can put their own source code in a folder. Taking udf_sample as an example, the directory structure is as follows: + +``` +└── udf_samples + ├── uda_sample.cpp + ├── uda_sample.h + ├── udf_sample.cpp + └── udf_sample.h +``` + #### Non-variable parameters For UDFs with non-variable parameters, the correspondence between the two is straightforward. -For example, UTF of `INT MyADD(INT, INT)` will correspond to `IntVal AddUdf(FunctionContext* context, const IntVal& arg1, const IntVal& arg2)` +For example, the UDF of `INT MyADD(INT, INT)` will correspond to `IntVal AddUdf(FunctionContext* context, const IntVal& arg1, const IntVal& arg2)`. 1. `AddUdf` can be any name, as long as it is specified when creating UDF. 2. The first parameter in the implementation function is always `FunctionContext*`. The implementer can obtain some query-related content through this structure, and apply for some memory to be used. The specific interface used can refer to the definition in `udf/udf.h`. @@ -83,29 +92,10 @@ The implementation function is `StringVal md5sumUdf(FunctionContext* ctx, int nu |Varchar|StringVal| |Decimal|DecimalVal| -### Directory structure after writing - -Taking udf_sample as an example here, create a `udf_samples` directory under src dir to store source code. - -``` - -├── be -├── custom_udf -│ ├── CMakeLists.txt -│ ├── build_custom_udf.sh -│ └── src -│ └── udf_samples -│ ├── CMakeLists.txt -│ ├── uda_sample.cpp -│ ├── uda_sample.h -│ ├── udf_sample.cpp -│ └── udf_sample.h - -``` ## Compile UDF function -Since the function implemented by the user depends on the udf of Doris, the first step is to compile Doris when compiling the UDF function. Then compile the UDF implemented by the user. + Since the function implemented by the user depends on the udf of Doris, the first step is to compile Doris when compiling the UDF function. Then compile the UDF implemented by the user. ### Compile Doris @@ -113,86 +103,133 @@ Running `sh build.sh` in the root directory of Doris will generate the correspon ``` ├── output -│   └── udf -│   ├── include -│   │   ├── uda_test_harness.h -│   │   └── udf.h -│   └── lib -│   └── libDorisUdf.a +│ └── udf +│ ├── include +│ │ ├── uda_test_harness.h +│ │ └── udf.h +│ └── lib +│ └── libDorisUdf.a ``` -### Write CMakeLists.txt for custom UDF +### Writing UDF compilation files -1. Add custom UDF compilation under `custom_udf/CMakeLists.txt`. Take udf_samples as an example +1. Prepare third_party -    ``` -    ├── be -    ├── custom_udf -    │ ├── CMakeLists.txt -    │ └── src + The third_party folder is mainly used to store third-party libraries that users' UDF functions depend on, including header files and static libraries. The two files that must be included are `udf.h` and `libDorisUdf.a`. + Taking udf_sample as an example here, the source code is stored in the user's own `udf_samples` directory. Create a third_party folder in the same directory to store the dependent static library generated in the previous step. The directory structure is as follows: -    custom_udf/CMakeLists.txt -    ... -    add_subdirectory(${SRC_DIR}/udf_samples) -    ... + ``` + ├── third_party + │ │── include + │ │ └── udf.h + │ └── lib + │ └── libDorisUdf.a + └── udf_samples -    ``` + ``` -2. Add dependency in custom UDF. Take udf_samples as an example, + `udf.h` is a header file that UDF functions must depend on. The original storage path is `doris/be/src/udf/udf.h`. Users need to copy this header file in the Doris project to their include folder of `third_party`. -    Since the code in udf_samples does not depend on any other libraries, there is no need to declare. + `libDorisUdf.a` is a static library that UDF functions must depend on. The output of the BE step in the previous compilation. After the compilation is complete, the file is stored in `doris/output/udf/lib/libDorisUdf.a`. The user needs to copy this file to the lib folder of his `third_party`. -    If the code depends on functions such as `StringVal` in Doris UDF, you need to declare that it depends on udf. Modify `udf_samples/CMakeFiles.txt`: + *Note: Static libraries will only be generated after BE compilation is completed. -    ``` - ├── be - ├── custom_udf - │   ├── CMakeLists.txt - │   └── src - │   └── udf_samples - │   ├── CMakeLists.txt +2. Prepare to compile UDF's CMakeFiles.txt -    custom_udf/src/udf_samples/CMakeFiles.txt -    ... -    target_link_libraries(udfsample -        udf -        -static-libstdc++ -        -static-libgcc -    ) -    ... + CMakeFiles.txt is used to declare how UDF functions are compiled. Stored in the source code folder, level with user code. Here, taking udf_samples as an example, the directory structure is as follows: -    ``` + ``` + ├── third_party + └── udf_samples + ├── CMakeLists.txt + ├── uda_sample.cpp + ├── uda_sample.h + ├── udf_sample.cpp + └── udf_sample.h + ``` -### Execute compilation + + Need to show declaration reference `libDorisUdf.a` + + Declare `udf.h` header file location -Run `build_custom_udf.sh` under custom_udf -``` -├── be -├── custom_udf -│ ├── build_custom_udf.sh + Take udf_sample as an example + + ``` + # Include udf + include_directories(third_party/include) + + # Set all libraries + add_library(udf STATIC IMPORTED) + set_target_properties(udf PROPERTIES IMPORTED_LOCATION third_party/lib/libDorisUdf.a) -build_custom_udf.sh --udf --clean + # where to put generated libraries + set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/udf_samples") + # where to put generated binaries + set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/udf_samples") + + add_library(udfsample SHARED udf_sample.cpp) + target_link_libraries(udfsample + udf + -static-libstdc++ + -static-libgcc + ) + + add_library(udasample SHARED uda_sample.cpp) + target_link_libraries(udasample + udf + -static-libstdc++ + -static-libgcc + ) + ``` + + If the user's UDF function also depends on other third-party libraries, you need to declare include, lib, and add dependencies in `add_library`. + +The complete directory structure after all files are prepared is as follows: + +``` + ├── third_party + │ │── include + │ │ └── udf.h + │ └── lib + │ └── libDorisUdf.a + └── udf_samples + ├── CMakeLists.txt + ├── uda_sample.cpp + ├── uda_sample.h + ├── udf_sample.cpp + └── udf_sample.h ``` -If no parameters are passed in by default, the compilation script is directly compiled and not clean. If you need to clean and then compile, you need to add the parameter `--udf --clean` +Prepare the above files and you can compile UDF directly + +### Execute compilation + +Create a build folder under the udf_samples folder to store the compilation output. + +Run the command `cmake ../` in the build folder to generate a Makefile, and execute make to generate the corresponding dynamic library. + +``` +├── third_party +├── build +└── udf_samples +``` ### Compilation result -After the compilation is completed, the dynamic link library is placed under `output/custom_udf/`. Taking udf_samples as an example, the directory structure is as follows: +After the compilation is completed, the dynamic link library is placed under `build/src/`. Taking udf_samples as an example, the directory structure is as follows: ``` -├── output -│   ├── be -│   ├── custom_udf -│   │   └── lib -│   │   └── udf_samples -│   │   ├── libudasample.so -│   │   └── libudfsample.so +├── third_party +├── build +│ └── src +│ └── udf_samples +│ ├── libudasample.so +│ └── libudfsample.so +└── udf_samples ``` @@ -208,7 +245,7 @@ PROPERTIES (["key"="value"][,...]) ``` Description: -1. "Symbol" in PROPERTIES means that the symbol corresponding to the entry function is executed. This parameter must be set. You can get the corresponding symbol through the `nm` command, such as `_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_` obtained by `nm libudfsample.so | grep AddUdf` is the corresponding symbol. +1. "Symbol" in PROPERTIES means that the symbol corresponding to the entry function is executed. This parameter must be set. You can get the corresponding symbol through the `nm` command, for example, `_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_` obtained by `nm libudfsample.so | grep AddUdf` is the corresponding symbol. 2. The object_file in PROPERTIES indicates where it can be downloaded to the corresponding dynamic library. This parameter must be set. 3. name: A function belongs to a certain DB, and the name is in the form of `dbName`.`funcName`. When `dbName` is not explicitly specified, the db where the current session is located is used as `dbName`. diff --git a/docs/zh-CN/extending-doris/contribute_udf.md b/docs/zh-CN/extending-doris/contribute_udf.md index 957d2d0e5e4db8..8398e7208bf82e 100644 --- a/docs/zh-CN/extending-doris/contribute_udf.md +++ b/docs/zh-CN/extending-doris/contribute_udf.md @@ -45,7 +45,7 @@ under the License. ## 源代码 - 待贡献的源代码应该包含: `.h` , `.cpp`, `CMakeFile.txt`。存放路径应该在 `custom_udf/src/my_udf` 下。这里以 udf_samples 为例,首先在 `custom_udf/src/` 路径下创建一个新的文件夹,并存放源码。 + 在 `custom_udf/src/` 下创建一个存放 UDF 函数的文件夹,并将源码和 CMAKE 文件存放在此处。待贡献的源代码应该包含: `.h` , `.cpp`, `CMakeFile.txt`。这里以 udf_samples 为例,首先在 `custom_udf/src/` 路径下创建一个新的文件夹,并存放源码。 ``` ├── custom_udf @@ -60,6 +60,10 @@ under the License. ``` +1. CMakeLists.txt + + 用户的 CMakeLists 放在此处后,需要进行少量更改。去掉 `include udf` 和 `udf lib` 即可。去掉的原因是,在 custom_udf 层级的 CMake 文件中,已经声明了。 + ## 使用手册 使用手册需要包含:UDF 函数含义说明,适用的场景,函数的语法,如何编译 UDF ,如何在 Doris 集群中使用 UDF, 以及使用示例。 diff --git a/docs/zh-CN/extending-doris/user-defined-function.md b/docs/zh-CN/extending-doris/user-defined-function.md index 238898c5a4507e..886cac3f64f9b1 100644 --- a/docs/zh-CN/extending-doris/user-defined-function.md +++ b/docs/zh-CN/extending-doris/user-defined-function.md @@ -47,6 +47,16 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 创建对应的头文件、CPP文件,在CPP文件中实现你需要的逻辑。CPP文件中的实现函数格式与UDF的对应关系。 +用户可以把自己的 source code 统一放在一个文件夹下。这里以 udf_sample 为例,目录结构如下: + +``` +└── udf_samples + ├── uda_sample.cpp + ├── uda_sample.h + ├── udf_sample.cpp + └── udf_sample.h +``` + #### 非可变参数 对于非可变参数的UDF,那么两者之间的对应关系很直接。 @@ -83,33 +93,14 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 |Varchar|StringVal| |Decimal|DecimalVal| -### 编写完成后的目录结构 - -这里以 udf_sample 为例, 在 src dir 下面创建一个 `udf_samples` 目录用于存放 source code. - -``` - -├── be -├── custom_udf -│   ├── CMakeLists.txt -│   ├── build_custom_udf.sh -│   └── src -│   └── udf_samples -│   ├── CMakeLists.txt -│   ├── uda_sample.cpp -│   ├── uda_sample.h -│   ├── udf_sample.cpp -│   └── udf_sample.h - -``` ## 编译 UDF 函数 -由于用户自己实现的 function 中依赖了 Doris 的 udf , 所以在编译 UDF 函数的时候首先对 Doris 进行编译。然后再编译用户自己实现的 UDF 即可。 + 由于用户自己实现的 function 中依赖了 Doris 的 udf , 所以在编译 UDF 函数的时候首先对 Doris 进行编译。然后再编译用户自己实现的 UDF 即可。 ### 编译Doris -在Doris根目录下执行`sh build.sh`就会在`output/udf/`生成对应`headers|libs` +在Doris根目录下执行 `sh build.sh` 就会在 `output/udf/` 生成对应 `headers|libs` ``` ├── output @@ -122,77 +113,124 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 ``` -### 编写自定义 UDF 的 CMakeLists.txt +### 编写 UDF 编译文件 -1. 在 `custom_udf/CMakeLists.txt` 下增加对自定义 UDF 的编译。以 udf_samples 为例 - - ``` - ├── be - ├── custom_udf - │   ├── CMakeLists.txt - │   └── src +1. 准备 third_party + third_party 文件夹主要用于存放用户 UDF 函数依赖的第三方库,包括头文件及静态库。其中必须包含的是 `udf.h` 和 `libDorisUdf.a` 这两个文件。 - custom_udf/CMakeLists.txt - ... - add_subdirectory(${SRC_DIR}/udf_samples) - ... + 这里以 udf_sample 为例, 在 用户自己 `udf_samples` 目录用于存放 source code。在同级目录下再创建一个 `third_party` 文件夹用于存放上一步生成的依赖静态库。目录结构如下: ``` + ├── third_party + │ │── include + │ │ └── udf.h + │ └── lib + │ └── libDorisUdf.a + └── udf_samples -2. 在自定义 UDF 中增加依赖。以 udf_samples 为例, + ``` - 由于 udf_samples 中的代码都没有依赖任何其他库,则不需要声明。 - - 如果代码中依赖了比如 Doris UDF 中对 `StringVal` 的函数,则需要声明依赖了 udf。修改 `udf_samples/CMakeFiles.txt`: + `udf.h` 是 UDF 函数必须依赖的头文件。原始存放路径为 `doris/be/src/udf/udf.h`。 用户需要将 Doris 工程中的这个头文件拷贝到自己的 `third_party` 的 include 文件夹下。 + + `libDorisUdf.a` 是 UDF 函数必须依赖的静态库。在前面编译 BE 步骤的产出,编译完成后该文件存放在 `doris/output/udf/lib/libDorisUdf.a`。用户需要将该文件拷贝到自己的 `third_party` 的 lib 文件夹下。 + + *注意:静态库只有完成 BE 编译后才会生成。 + +2. 准备编译 UDF 的 CMakeFiles.txt + + CMakeFiles.txt 用于声明 UDF 函数如何进行编译。存放在源码文件夹下,与用户代码平级。这里以 `udf_samples` 为例目录结构如下: ``` - ├── be - ├── custom_udf - │   ├── CMakeLists.txt - │   └── src - │   └── udf_samples - │   ├── CMakeLists.txt + ├── third_party + └── udf_samples + ├── CMakeLists.txt + ├── uda_sample.cpp + ├── uda_sample.h + ├── udf_sample.cpp + └── udf_sample.h + ``` + + + 需要显示声明引用 `libDorisUdf.a` + + 声明 `udf.h` 头文件位置 + + + 以 udf_sample 为例 - custom_udf/src/udf_samples/CMakeFiles.txt - ... - target_link_libraries(udfsample + ``` + # Include udf + include_directories(third_party/include) + + # Set all libraries + add_library(udf STATIC IMPORTED) + set_target_properties(udf PROPERTIES IMPORTED_LOCATION third_party/lib/libDorisUdf.a) + + # where to put generated libraries + set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/udf_samples") + + # where to put generated binaries + set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/udf_samples") + + add_library(udfsample SHARED udf_sample.cpp) + target_link_libraries(udfsample udf -static-libstdc++ -static-libgcc - ) - ... + ) + add_library(udasample SHARED uda_sample.cpp) + target_link_libraries(udasample + udf + -static-libstdc++ + -static-libgcc + ) ``` -### 执行编译 + 如果用户的 UDF 函数还依赖了其他的三方库,则需要声明include,lib,并在 `add_library` 中增加依赖。 -运行 custom_udf 下的 `build_custom_udf.sh` +所有文件准备齐后完整的目录结构如下: ``` -├── be -├── custom_udf -│   ├── build_custom_udf.sh + ├── third_party + │ │── include + │ │ └── udf.h + │ └── lib + │ └── libDorisUdf.a + └── udf_samples + ├── CMakeLists.txt + ├── uda_sample.cpp + ├── uda_sample.h + ├── udf_sample.cpp + └── udf_sample.h +``` -build_custom_udf.sh --udf --clean +准备好上述文件就可以直接编译 UDF 了 -``` +### 执行编译 + +在 udf_samples 文件夹下创建一个 build 文件夹,用于存放编译产出。 -这个编译脚本如果默认不传入任何参数,则直接编译并且不 clean。如果需要 clean 后再编译则需要加上参数 `--udf --clean` +在 build 文件夹下运行命令 `cmake ../` 生成Makefile,并执行 make 就会生成对应动态库。 + +``` +├── third_party +├── build +└── udf_samples +``` ### 编译结果 -编译完成后的动态链接库被放在了 `output/custom_udf/` 下,以 udf_samples 为例,目录结构如下: +编译完成后的动态链接库被放在了 `build/src/` 下,以 udf_samples 为例,目录结构如下: ``` -├── output -│   ├── be -│   ├── custom_udf -│   │   └── lib -│   │   └── udf_samples -│   │   ├── libudasample.so -│   │   └── libudfsample.so +├── third_party +├── build +│ └── src +│ └── udf_samples +│ ├── libudasample.so +│   └── libudfsample.so +└── udf_samples ``` From 285389b60a6db33c6b212d79e6218c777813bbc1 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Fri, 12 Jun 2020 18:39:30 +0800 Subject: [PATCH 12/18] Change name contrib/udf Change-Id: I932a1759fbca676206a228193e6e7697aca68e8a --- {custom_udf => contrib/udf}/CMakeLists.txt | 2 +- .../udf/build_udf.sh | 26 +++++++------------ .../udf}/src/udf_samples/CMakeLists.txt | 0 .../udf}/src/udf_samples/uda_sample.cpp | 0 .../udf}/src/udf_samples/uda_sample.h | 0 .../udf}/src/udf_samples/udf_sample.cpp | 0 .../udf}/src/udf_samples/udf_sample.h | 0 7 files changed, 11 insertions(+), 17 deletions(-) rename {custom_udf => contrib/udf}/CMakeLists.txt (99%) rename custom_udf/build_custom_udf.sh => contrib/udf/build_udf.sh (82%) rename {custom_udf => contrib/udf}/src/udf_samples/CMakeLists.txt (100%) rename {custom_udf => contrib/udf}/src/udf_samples/uda_sample.cpp (100%) rename {custom_udf => contrib/udf}/src/udf_samples/uda_sample.h (100%) rename {custom_udf => contrib/udf}/src/udf_samples/udf_sample.cpp (100%) rename {custom_udf => contrib/udf}/src/udf_samples/udf_sample.h (100%) diff --git a/custom_udf/CMakeLists.txt b/contrib/udf/CMakeLists.txt similarity index 99% rename from custom_udf/CMakeLists.txt rename to contrib/udf/CMakeLists.txt index 1cf6e51797f376..ae9d5fa1ef8343 100644 --- a/custom_udf/CMakeLists.txt +++ b/contrib/udf/CMakeLists.txt @@ -26,7 +26,7 @@ else() message(FATAL_ERROR "DORIS_GCC_HOME environment variable is not set") endif() -project(doris) +project(doris_udf) # set CMAKE_BUILD_TYPE if (NOT CMAKE_BUILD_TYPE) diff --git a/custom_udf/build_custom_udf.sh b/contrib/udf/build_udf.sh similarity index 82% rename from custom_udf/build_custom_udf.sh rename to contrib/udf/build_udf.sh index 098035d685070c..02d95e421fc842 100644 --- a/custom_udf/build_custom_udf.sh +++ b/contrib/udf/build_udf.sh @@ -19,8 +19,8 @@ ############################################################## # This script is used to compile UDF # Usage: -# sh build-custom-udf.sh build udf without clean. -# sh build-custom-udf.sh -clean clean previous output and build. +# sh build-udf.sh build udf without clean. +# sh build-udf.sh --clean clean previous output and build. # ############################################################## @@ -29,18 +29,12 @@ set -eo pipefail ROOT=`dirname "$0"` ROOT=`cd "$ROOT"; pwd` -export DORIS_HOME=$(dirname "$PWD") +export UDF_HOME=${ROOT} +export DORIS_HOME=$(cd ../..; printf %s "$PWD") echo ${DORIS_HOME} -export CUSTOM_UDF_HOME=${ROOT} . ${DORIS_HOME}/env.sh -# build thirdparty libraries if necessary -if [[ ! -f ${DORIS_THIRDPARTY}/installed/lib/libs2.a ]]; then - echo "Thirdparty libraries need to be build ..." - ${DORIS_THIRDPARTY}/build-thirdparty.sh -fi - PARALLEL=$[$(nproc)/4+1] # Check args @@ -99,7 +93,7 @@ echo "Get params: CLEAN -- $CLEAN " -cd ${CUSTOM_UDF_HOME} +cd ${UDF_HOME} # Clean and build UDF if [ ${BUILD_UDF} -eq 1 ] ; then CMAKE_BUILD_TYPE=${BUILD_TYPE:-Release} @@ -107,14 +101,14 @@ if [ ${BUILD_UDF} -eq 1 ] ; then CMAKE_BUILD_DIR=${CUSTOM_UDF_HOME}/build_${CMAKE_BUILD_TYPE} if [ ${CLEAN} -eq 1 ]; then rm -rf $CMAKE_BUILD_DIR - rm -rf ${CUSTOM_UDF_HOME}/output/ + rm -rf ${UDF_HOME}/output/ fi mkdir -p ${CMAKE_BUILD_DIR} cd ${CMAKE_BUILD_DIR} ${CMAKE_CMD} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} ../ make -j${PARALLEL} VERBOSE=1 make install - cd ${CUSTOM_UDF_HOME} + cd ${UDF_HOME} fi # Clean and prepare output dir @@ -123,11 +117,11 @@ mkdir -p ${DORIS_OUTPUT} #Copy UDF if [ ${BUILD_UDF} -eq 1 ]; then - install -d ${DORIS_OUTPUT}/custom_udf/lib + install -d ${DORIS_OUTPUT}/contrib/udf/lib for dir in $(ls ${CMAKE_BUILD_DIR}/src) do - mkdir -p ${DORIS_OUTPUT}/custom_udf/lib/$dir - cp -r -p ${CMAKE_BUILD_DIR}/src/$dir/*.so ${DORIS_OUTPUT}/custom_udf/lib/$dir/ + mkdir -p ${DORIS_OUTPUT}/contrib/udf/lib/$dir + cp -r -p ${CMAKE_BUILD_DIR}/src/$dir/*.so ${DORIS_OUTPUT}/contrib/udf/lib/$dir/ done fi diff --git a/custom_udf/src/udf_samples/CMakeLists.txt b/contrib/udf/src/udf_samples/CMakeLists.txt similarity index 100% rename from custom_udf/src/udf_samples/CMakeLists.txt rename to contrib/udf/src/udf_samples/CMakeLists.txt diff --git a/custom_udf/src/udf_samples/uda_sample.cpp b/contrib/udf/src/udf_samples/uda_sample.cpp similarity index 100% rename from custom_udf/src/udf_samples/uda_sample.cpp rename to contrib/udf/src/udf_samples/uda_sample.cpp diff --git a/custom_udf/src/udf_samples/uda_sample.h b/contrib/udf/src/udf_samples/uda_sample.h similarity index 100% rename from custom_udf/src/udf_samples/uda_sample.h rename to contrib/udf/src/udf_samples/uda_sample.h diff --git a/custom_udf/src/udf_samples/udf_sample.cpp b/contrib/udf/src/udf_samples/udf_sample.cpp similarity index 100% rename from custom_udf/src/udf_samples/udf_sample.cpp rename to contrib/udf/src/udf_samples/udf_sample.cpp diff --git a/custom_udf/src/udf_samples/udf_sample.h b/contrib/udf/src/udf_samples/udf_sample.h similarity index 100% rename from custom_udf/src/udf_samples/udf_sample.h rename to contrib/udf/src/udf_samples/udf_sample.h From b462cd526568794158547af3577add4366c8a7cf Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Mon, 15 Jun 2020 10:30:48 +0800 Subject: [PATCH 13/18] Change dir1 Change-Id: I52bf4d19ae915444867f783f2e0534eaca9e8c36 --- contrib/udf/CMakeLists.txt | 2 +- contrib/udf/build_udf.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/udf/CMakeLists.txt b/contrib/udf/CMakeLists.txt index ae9d5fa1ef8343..e0feef1fdeaf83 100644 --- a/contrib/udf/CMakeLists.txt +++ b/contrib/udf/CMakeLists.txt @@ -37,7 +37,7 @@ string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE) message(STATUS "Build type is ${CMAKE_BUILD_TYPE}") set(BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") -set(ENV{DORIS_HOME} "${BASE_DIR}/../") +set(ENV{DORIS_HOME} "${BASE_DIR}/../../") set(THIRDPARTY_DIR "$ENV{DORIS_THIRDPARTY}/installed/") set(BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(SRC_DIR "${BASE_DIR}/src/") diff --git a/contrib/udf/build_udf.sh b/contrib/udf/build_udf.sh index 02d95e421fc842..fd260de44356a5 100644 --- a/contrib/udf/build_udf.sh +++ b/contrib/udf/build_udf.sh @@ -98,7 +98,7 @@ cd ${UDF_HOME} if [ ${BUILD_UDF} -eq 1 ] ; then CMAKE_BUILD_TYPE=${BUILD_TYPE:-Release} echo "Build UDF: ${CMAKE_BUILD_TYPE}" - CMAKE_BUILD_DIR=${CUSTOM_UDF_HOME}/build_${CMAKE_BUILD_TYPE} + CMAKE_BUILD_DIR=${UDF_HOME}/build_${CMAKE_BUILD_TYPE} if [ ${CLEAN} -eq 1 ]; then rm -rf $CMAKE_BUILD_DIR rm -rf ${UDF_HOME}/output/ From cade6a422fb8bb2139043155c1e3cfba321db41b Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Sun, 21 Jun 2020 16:50:50 +0800 Subject: [PATCH 14/18] Change docs Change-Id: If60dde5acb6cb7500f9e704636afabdf3907432d --- docs/.vuepress/sidebar/en.js | 4 +- docs/.vuepress/sidebar/zh-CN.js | 4 +- docs/en/extending-doris/contribute_udf.md | 50 +++++++++--------- .../extending-doris/user-defined-function.md | 51 ++++++++++--------- docs/zh-CN/extending-doris/contribute_udf.md | 45 ++++++++-------- .../extending-doris/user-defined-function.md | 50 +++++++++--------- 6 files changed, 107 insertions(+), 97 deletions(-) diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js index d8d513fa9c448e..909f18e33b4897 100644 --- a/docs/.vuepress/sidebar/en.js +++ b/docs/.vuepress/sidebar/en.js @@ -129,8 +129,8 @@ module.exports = [ "logstash", "contribute_udf", { - title: "Third-party UDF", - directoryPath: "third-party-udf/", + title: "Users contribute UDF", + directoryPath: "contrib/udf", children:[], }, ], diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js index 72b0d3022e1ab6..4c9f00bbf93c84 100644 --- a/docs/.vuepress/sidebar/zh-CN.js +++ b/docs/.vuepress/sidebar/zh-CN.js @@ -140,8 +140,8 @@ module.exports = [ "logstash", "contribute_udf", { - title: "第三方 UDF", - directoryPath: "third-party-udf/", + title: "用户贡献的 UDF", + directoryPath: "contrib/udf/", children:[], }, ], diff --git a/docs/en/extending-doris/contribute_udf.md b/docs/en/extending-doris/contribute_udf.md index eae29f3df957aa..c1efab8849d469 100644 --- a/docs/en/extending-doris/contribute_udf.md +++ b/docs/en/extending-doris/contribute_udf.md @@ -45,49 +45,49 @@ If you are not sure whether the UDF function you wrote is universal, you can sen ## Source code -Create a folder to store UDF functions under `custom_udf/src/`, and store the source code and CMAKE files here. The source code to be contributed should include: `.h`, `.cpp`, `CMakeFile.txt`. Taking udf_samples as an example here, first create a new folder under the `custom_udf/src/` path and store the source code. +Create a folder for UDF functions under `contrib/udf/src/`, and store the source code and CMAKE files here. The source code to be contributed should include: `.h`, `.cpp`, `CMakeFile.txt`. Taking udf_samples as an example here, first create a new folder under the `contrib/udf/src/` path and store the source code. ``` - - ├── custom_udf - │ ├── CMakeLists.txt - │ └── src - │ └── udf_samples - │ ├── CMakeLists.txt - │ ├── uda_sample.cpp - │ ├── uda_sample.h - │ ├── udf_sample.cpp - │ └── udf_sample.h + ├──contrib + │ └── udf + │ ├── CMakeLists.txt + │ └── src + │ └── udf_samples + │ ├── CMakeLists.txt + │ ├── uda_sample.cpp + │ ├── uda_sample.h + │ ├── udf_sample.cpp + │ └── udf_sample.h ``` 1. CMakeLists.txt - After the user's CMakeLists are placed here, a few changes are required. Just remove `include udf` and `udf lib`. The reason for the removal is that it has been declared in the custom_udf level CMake file. +After the user's `CMakeLists.txt` is placed here, a small amount of changes are required. Just remove `include udf` and `udf lib`. The reason for the removal is that it has been declared in the CMake file at the `contrib/udf` level. ## manual The user manual needs to include: UDF function definition description, applicable scenarios, function syntax, how to compile UDF, how to use UDF in Doris, and use examples. -1. The user manual must contain both Chinese and English versions, and be stored under `docs/zh-CN/extending-doris/third-party-udf/` and `docs/en/extending-doris/third-party-udf`, respectively. +1. The user manual must contain both Chinese and English versions and be stored under `docs/zh-CN/extending-doris/contrib/udf` and `docs/en/extending-doris/contrib/udf` respectively. ``` ├── docs │   └── zh-CN │   └──extending-doris - │ └──third-party-udf - │ ├── udf-simple-manual.md - - - ``` + │ └──contrib + │ └──udf + │ ├── udf-simple-manual.md + + ``` ``` ├── docs │   └── en │   └──extending-doris - │ └──third-party-udf - │ ├── udf-simple-manual.md - + │ └──contrib + │ └──udf + │ ├── udf-simple-manual.md ``` 2. Add the two manual files to the sidebar in Chinese and English. @@ -95,8 +95,8 @@ The user manual needs to include: UDF function definition description, applicabl ``` vi docs/.vuepress/sidebar/zh-CN.js { - title: "第三方 UDF", - directoryPath: "third-party-udf/", + title: "用户贡献的 UDF", + directoryPath: "contrib/udf/", children: [ "udf-simple-manual", @@ -107,8 +107,8 @@ The user manual needs to include: UDF function definition description, applicabl ``` vi docs/.vuepress/sidebar/en.js { - title: "Third-party UDF", - directoryPath: "third-party-udf/", + title: "Users contribute UDF", + directoryPath: "contrib/udf/", children: [ "udf-simple-manual", diff --git a/docs/en/extending-doris/user-defined-function.md b/docs/en/extending-doris/user-defined-function.md index 160f24bd210404..33ff9aee5686c9 100644 --- a/docs/en/extending-doris/user-defined-function.md +++ b/docs/en/extending-doris/user-defined-function.md @@ -25,7 +25,7 @@ under the License. --> # User Define Function -UDF is mainly suitable for scenarios where the analytical capabilities that users need do not possess. Users can implement customized functions according to their own needs, and register with Doris through UDF to expand Doris' capabilities and solve user analysis needs. +UDF is mainly suitable for scenarios where the analytical capabilities that users need do not possess. Users can implement custom functions according to their own needs, and register with Doris through the UDF framework to expand Doris' capabilities and solve user analysis needs. There are two types of analysis requirements that UDF can meet: UDF and UDAF. UDF in this article refers to both. @@ -38,7 +38,7 @@ If users use the UDF function and extend Doris' function analysis, and want to c ## Writing UDF functions -Before using UDF, users need to write their own UDF functions under Doris' UDF framework. In the `custom_udf/src/udf_samples/udf_sample.h|cpp` file is a simple UDF Demo. +Before using UDF, users need to write their own UDF functions under Doris' UDF framework. In the `contrib/udf/src/udf_samples/udf_sample.h|cpp` file is a simple UDF Demo. Writing a UDF function requires the following steps. @@ -95,11 +95,13 @@ The implementation function is `StringVal md5sumUdf(FunctionContext* ctx, int nu ## Compile UDF function - Since the function implemented by the user depends on the udf of Doris, the first step is to compile Doris when compiling the UDF function. Then compile the UDF implemented by the user. +Since the UDF implementation relies on Doris' UDF framework, the first step in compiling UDF functions is to compile Doris, that is, the UDF framework. + +After the compilation is completed, the static library file of the UDF framework will be generated. Then introduce the UDF framework dependency and compile the UDF. ### Compile Doris -Running `sh build.sh` in the root directory of Doris will generate the corresponding `headers|libs` in `output/udf/` +Running `sh build.sh` in the root directory of Doris will generate a static library file of the UDF framework `headers|libs` in `output/udf/` ``` ├── output @@ -116,25 +118,25 @@ Running `sh build.sh` in the root directory of Doris will generate the correspon 1. Prepare third_party - The third_party folder is mainly used to store third-party libraries that users' UDF functions depend on, including header files and static libraries. The two files that must be included are `udf.h` and `libDorisUdf.a`. + The third_party folder is mainly used to store third-party libraries that users' UDF functions depend on, including header files and static libraries. It must contain the two files `udf.h` and `libDorisUdf.a` in the dependent Doris UDF framework. - Taking udf_sample as an example here, the source code is stored in the user's own `udf_samples` directory. Create a third_party folder in the same directory to store the dependent static library generated in the previous step. The directory structure is as follows: + Taking udf_sample as an example here, the source code is stored in the user's own `udf_samples` directory. Create a third_party folder in the same directory to store the static library. The directory structure is as follows: ``` ├── third_party - │ │── include - │ │ └── udf.h - │ └── lib - │ └── libDorisUdf.a + │ │── include + │ │ └── udf.h + │ └── lib + │ └── libDorisUdf.a └── udf_samples ``` - `udf.h` is a header file that UDF functions must depend on. The original storage path is `doris/be/src/udf/udf.h`. Users need to copy this header file in the Doris project to their include folder of `third_party`. + `udf.h` is the UDF frame header file. The storage path is `doris/output/udf/include/udf.h`. Users need to copy the header file in the Doris compilation output to their include folder of `third_party`. - `libDorisUdf.a` is a static library that UDF functions must depend on. The output of the BE step in the previous compilation. After the compilation is complete, the file is stored in `doris/output/udf/lib/libDorisUdf.a`. The user needs to copy this file to the lib folder of his `third_party`. + `libDorisUdf.a` is a static library of UDF framework. After Doris is compiled, the file is stored in `doris/output/udf/lib/libDorisUdf.a`. The user needs to copy the file to the lib folder of his `third_party`. - *Note: Static libraries will only be generated after BE compilation is completed. + *Note: The static library of UDF framework will not be generated until Doris is compiled. 2. Prepare to compile UDF's CMakeFiles.txt @@ -213,29 +215,30 @@ Run the command `cmake ../` in the build folder to generate a Makefile, and exec ``` ├── third_party -├── build -└── udf_samples +├── udf_samples + └── build ``` ### Compilation result -After the compilation is completed, the dynamic link library is placed under `build/src/`. Taking udf_samples as an example, the directory structure is as follows: +After the compilation is completed, the UDF dynamic link library is successfully generated. Under `build/src/`, taking udf_samples as an example, the directory structure is as follows: ``` - ├── third_party -├── build -│ └── src -│ └── udf_samples -│ ├── libudasample.so -│ └── libudfsample.so -└── udf_samples +├── udf_samples + └── build + └── src + └── udf_samples + ├── libudasample.so +   └── libudfsample.so ``` ## Create UDF function -After going through the above steps, you can get a dynamic library. You need to put this dynamic library in a location that can be accessed through the HTTP protocol. Then execute the create UDF function to create a UDF inside the Doris system. You need to have AMDIN permission to complete this operation. +After following the above steps, you can get the UDF dynamic library (that is, the `.so` file in the compilation result). You need to put this dynamic library in a location that can be accessed through the HTTP protocol. + +Then log in to the Doris system and create a UDF function in the mysql-client through the `CREATE FUNCTION` syntax. You need to have AMDIN authority to complete this operation. At this time, there will be a UDF created in the Doris system. ``` CREATE [AGGREGATE] FUNCTION diff --git a/docs/zh-CN/extending-doris/contribute_udf.md b/docs/zh-CN/extending-doris/contribute_udf.md index 8398e7208bf82e..aee1a51a0277ce 100644 --- a/docs/zh-CN/extending-doris/contribute_udf.md +++ b/docs/zh-CN/extending-doris/contribute_udf.md @@ -45,37 +45,39 @@ under the License. ## 源代码 - 在 `custom_udf/src/` 下创建一个存放 UDF 函数的文件夹,并将源码和 CMAKE 文件存放在此处。待贡献的源代码应该包含: `.h` , `.cpp`, `CMakeFile.txt`。这里以 udf_samples 为例,首先在 `custom_udf/src/` 路径下创建一个新的文件夹,并存放源码。 + 在 `contrib/udf/src/` 下创建一个存放 UDF 函数的文件夹,并将源码和 CMAKE 文件存放在此处。待贡献的源代码应该包含: `.h` , `.cpp`, `CMakeFile.txt`。这里以 udf_samples 为例,首先在 `contrib/udf/src/` 路径下创建一个新的文件夹,并存放源码。 ``` - ├── custom_udf - │ ├── CMakeLists.txt - │ └── src - │ └── udf_samples - │ ├── CMakeLists.txt - │ ├── uda_sample.cpp - │ ├── uda_sample.h - │ ├── udf_sample.cpp - │ └── udf_sample.h + ├──contrib + │ └── udf + │ ├── CMakeLists.txt + │ └── src + │ └── udf_samples + │ ├── CMakeLists.txt + │ ├── uda_sample.cpp + │ ├── uda_sample.h + │ ├── udf_sample.cpp + │ └── udf_sample.h ``` 1. CMakeLists.txt - 用户的 CMakeLists 放在此处后,需要进行少量更改。去掉 `include udf` 和 `udf lib` 即可。去掉的原因是,在 custom_udf 层级的 CMake 文件中,已经声明了。 + 用户的 `CMakeLists.txt` 放在此处后,需要进行少量更改。去掉 `include udf` 和 `udf lib` 即可。去掉的原因是,在 `contrib/udf` 层级的 CMake 文件中,已经声明了。 ## 使用手册 使用手册需要包含:UDF 函数含义说明,适用的场景,函数的语法,如何编译 UDF ,如何在 Doris 集群中使用 UDF, 以及使用示例。 -1. 使用手册需包含中英文两个版本,并分别存放在 `docs/zh-CN/extending-doris/third-party-udf/` 和 `docs/en/extending-doris/third-party-udf` 下。 +1. 使用手册需包含中英文两个版本,并分别存放在 `docs/zh-CN/extending-doris/contrib/udf` 和 `docs/en/extending-doris/contrib/udf` 下。 ``` ├── docs │   └── zh-CN │   └──extending-doris - │ └──third-party-udf - │ ├── udf-simple-manual.md + │ └──contrib + │ └──udf + │ ├── udf-simple-manual.md ``` @@ -83,8 +85,9 @@ under the License. ├── docs │   └── en │   └──extending-doris - │ └──third-party-udf - │ ├── udf-simple-manual.md + │ └──contrib + │ └──udf + │ ├── udf-simple-manual.md ``` 2. 将两个使用手册的文件,加入中文和英文的 sidebar 中。 @@ -92,8 +95,8 @@ under the License. ``` vi docs/.vuepress/sidebar/zh-CN.js { - title: "第三方 UDF", - directoryPath: "third-party-udf/", + title: "用户贡献的 UDF", + directoryPath: "contrib/udf/", children: [ "udf-simple-manual", @@ -104,8 +107,8 @@ under the License. ``` vi docs/.vuepress/sidebar/en.js { - title: "Third-party UDF", - directoryPath: "third-party-udf/", + title: "Users contribute UDF", + directoryPath: "contrib/udf/", children: [ "udf-simple-manual", @@ -118,4 +121,4 @@ under the License. 当你符合前提条件并准备好代码,文档后就可以将 UDF 贡献到 Doris 社区了。在 [Github](https://github.com/apache/incubator-doris) 上面提交 Pull Request (PR) 即可。具体提交方式见:[Pull Request (PR)](https://help.github.com/articles/about-pull-requests/)。 - 最后,当 PR 评审通过并 Merge 后。恭喜你,你的 UDF 成为 Doris 支持的第三方 UDF。你可以在 [Doris 官网](http://doris.apache.org/master/zh-CN/) 的扩展功能部分查看到啦~。 + 最后,当 PR 评审通过并 Merge 后。恭喜你,你的 UDF 已经贡献给 Doris 社区,你可以在 [Doris 官网](http://doris.apache.org/master/zh-CN/) 的扩展功能部分查看到啦~。 diff --git a/docs/zh-CN/extending-doris/user-defined-function.md b/docs/zh-CN/extending-doris/user-defined-function.md index 886cac3f64f9b1..5d6ef965d61f11 100644 --- a/docs/zh-CN/extending-doris/user-defined-function.md +++ b/docs/zh-CN/extending-doris/user-defined-function.md @@ -26,7 +26,7 @@ under the License. # User Define Function -UDF 主要适用于,用户需要的分析能力 Doris 并不具备的场景。用户可以自行根据自己的需求,实现自定义的函数,并且通过 UDF 的方式注册到 Doris 中,来扩展 Doris 的能力,并解决用户分析需求。 +UDF 主要适用于,用户需要的分析能力 Doris 并不具备的场景。用户可以自行根据自己的需求,实现自定义的函数,并且通过 UDF 框架注册到 Doris 中,来扩展 Doris 的能力,并解决用户分析需求。 UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指的是二者的统称。 @@ -39,9 +39,9 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 ## 编写UDF函数 -在使用UDF之前,用户需要先在Doris的UDF框架下,编写自己的UDF函数。在`custom_udf/src/udf_samples/udf_sample.h|cpp`文件中是一个简单的UDF Demo。 +在使用UDF之前,用户需要先在 Doris 的 UDF 框架下,编写自己的UDF函数。在`contrib/udf/src/udf_samples/udf_sample.h|cpp`文件中是一个简单的 UDF Demo。 -编写一个UDF函数需要以下几个步骤。 +编写一个 UDF 函数需要以下几个步骤。 ### 编写函数 @@ -96,11 +96,13 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 ## 编译 UDF 函数 - 由于用户自己实现的 function 中依赖了 Doris 的 udf , 所以在编译 UDF 函数的时候首先对 Doris 进行编译。然后再编译用户自己实现的 UDF 即可。 + 由于 UDF 实现中依赖了 Doris 的 UDF 框架 , 所以在编译 UDF 函数的时候首先要对 Doris 进行编译,也就是对 UDF 框架进行编译。 + + 编译完成后会生成,UDF 框架的静态库文件。之后引入 UDF 框架依赖,并编译 UDF 即可。 ### 编译Doris -在Doris根目录下执行 `sh build.sh` 就会在 `output/udf/` 生成对应 `headers|libs` +在 Doris 根目录下执行 `sh build.sh` 就会在 `output/udf/` 生成 UDF 框架的静态库文件 `headers|libs` ``` ├── output @@ -117,9 +119,9 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 1. 准备 third_party - third_party 文件夹主要用于存放用户 UDF 函数依赖的第三方库,包括头文件及静态库。其中必须包含的是 `udf.h` 和 `libDorisUdf.a` 这两个文件。 + third_party 文件夹主要用于存放用户 UDF 函数依赖的第三方库,包括头文件及静态库。其中必须包含依赖的 Doris UDF 框架中 `udf.h` 和 `libDorisUdf.a` 这两个文件。 - 这里以 udf_sample 为例, 在 用户自己 `udf_samples` 目录用于存放 source code。在同级目录下再创建一个 `third_party` 文件夹用于存放上一步生成的依赖静态库。目录结构如下: + 这里以 udf_sample 为例, 在 用户自己 `udf_samples` 目录用于存放 source code。在同级目录下再创建一个 `third_party` 文件夹用于存放静态库。目录结构如下: ``` ├── third_party @@ -131,11 +133,11 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 ``` - `udf.h` 是 UDF 函数必须依赖的头文件。原始存放路径为 `doris/be/src/udf/udf.h`。 用户需要将 Doris 工程中的这个头文件拷贝到自己的 `third_party` 的 include 文件夹下。 + `udf.h` 是 UDF 框架头文件。存放路径为 `doris/output/udf/include/udf.h`。 用户需要将 Doris 编译产出中的这个头文件拷贝到自己的 `third_party` 的 include 文件夹下。 - `libDorisUdf.a` 是 UDF 函数必须依赖的静态库。在前面编译 BE 步骤的产出,编译完成后该文件存放在 `doris/output/udf/lib/libDorisUdf.a`。用户需要将该文件拷贝到自己的 `third_party` 的 lib 文件夹下。 + `libDorisUdf.a` 是 UDF 框架的静态库。Doris 编译完成后该文件存放在 `doris/output/udf/lib/libDorisUdf.a`。用户需要将该文件拷贝到自己的 `third_party` 的 lib 文件夹下。 - *注意:静态库只有完成 BE 编译后才会生成。 + *注意:UDF 框架的静态库只有完成 Doris 编译后才会生成。 2. 准备编译 UDF 的 CMakeFiles.txt @@ -186,7 +188,7 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 ) ``` - 如果用户的 UDF 函数还依赖了其他的三方库,则需要声明include,lib,并在 `add_library` 中增加依赖。 + 如果用户的 UDF 函数还依赖了其他的三方库,则需要声明 include,lib,并在 `add_library` 中增加依赖。 所有文件准备齐后完整的目录结构如下: @@ -214,29 +216,31 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 ``` ├── third_party -├── build -└── udf_samples +├── udf_samples + └── build ``` ### 编译结果 -编译完成后的动态链接库被放在了 `build/src/` 下,以 udf_samples 为例,目录结构如下: +编译完成后的 UDF 动态链接库就生成成功了。在 `build/src/` 下,以 udf_samples 为例,目录结构如下: ``` ├── third_party -├── build -│ └── src -│ └── udf_samples -│ ├── libudasample.so -│   └── libudfsample.so -└── udf_samples +├── udf_samples + └── build + └── src + └── udf_samples + ├── libudasample.so +   └── libudfsample.so ``` ## 创建UDF函数 -通过上述的步骤后,你可以得到一个动态库。你需要将这个动态库放到一个能够通过 HTTP 协议访问到的位置。然后执行创建 UDF 函数在 Doris 系统内部创建一个 UDF,你需要拥有AMDIN权限才能够完成这个操作。 +通过上述的步骤后,你可以得到 UDF 的动态库(也就是编译结果中的 `.so` 文件)。你需要将这个动态库放到一个能够通过 HTTP 协议访问到的位置。 + +然后登录 Doris 系统,在 mysql-client 中通过 `CREATE FUNCTION` 语法创建 UDF 函数。你需要拥有AMDIN权限才能够完成这个操作。这时 Doris 系统内部就会存在刚才创建好的 UDF。 ``` CREATE [AGGREGATE] FUNCTION @@ -254,9 +258,9 @@ CREATE [AGGREGATE] FUNCTION ## 使用UDF -用户使用UDF/UDAF必须拥有对应数据库的 `SELECT` 权限。 +用户使用 UDF 必须拥有对应数据库的 `SELECT` 权限。 -UDF的使用与普通的函数方式一致,唯一的区别在于,内置函数的作用域是全局的,而UDF的作用域是DB内部。当链接session位于数据内部时,直接使用UDF名字会在当前DB内部查找对应的UDF。否则用户需要显示的指定UDF的数据库名字,例如`dbName`.`funcName`。 +UDF 的使用与普通的函数方式一致,唯一的区别在于,内置函数的作用域是全局的,而UDF的作用域是DB内部。当链接session位于数据内部时,直接使用UDF名字会在当前DB内部查找对应的UDF。否则用户需要显示的指定UDF的数据库名字,例如`dbName`.`funcName`。 ## 删除UDF函数 From 0590f37817684768b19e89159ca1f1298da95054 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Mon, 22 Jun 2020 11:11:12 +0800 Subject: [PATCH 15/18] Change docs Change-Id: Ief33d948febd28c9c9866d52e192ab84f561f397 --- docs/.vuepress/sidebar/en.js | 16 +++++++---- docs/.vuepress/sidebar/zh-CN.js | 16 +++++++---- .../contribute-udf.md} | 12 ++++---- .../{ => udf}/user-defined-function.md | 28 +++++++++---------- .../contribute-udf.md} | 16 +++++------ .../{ => udf}/user-defined-function.md | 28 +++++++++---------- 6 files changed, 64 insertions(+), 52 deletions(-) rename docs/en/extending-doris/{contribute_udf.md => udf/contribute-udf.md} (95%) rename docs/en/extending-doris/{ => udf}/user-defined-function.md (92%) rename docs/zh-CN/extending-doris/{contribute_udf.md => udf/contribute-udf.md} (91%) rename docs/zh-CN/extending-doris/{ => udf}/user-defined-function.md (89%) diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js index 909f18e33b4897..75c7a703dc7f86 100644 --- a/docs/.vuepress/sidebar/en.js +++ b/docs/.vuepress/sidebar/en.js @@ -124,14 +124,20 @@ module.exports = [ "audit-plugin", "doris-on-es", "plugin-development-manual", - "user-defined-function", "spark-doris-connector", "logstash", - "contribute_udf", { - title: "Users contribute UDF", - directoryPath: "contrib/udf", - children:[], + title: "UDF", + directoryPath: "udf/", + children: [ + "user-defined-function", + "contribute-udf", + { + title: "Users contribute UDF", + directoryPath: "contrib/", + children:[], + }, + ], }, ], }, diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js index 4c9f00bbf93c84..75f18a9d4deb62 100644 --- a/docs/.vuepress/sidebar/zh-CN.js +++ b/docs/.vuepress/sidebar/zh-CN.js @@ -135,14 +135,20 @@ module.exports = [ "audit-plugin", "doris-on-es", "plugin-development-manual", - "user-defined-function", "spark-doris-connector", "logstash", - "contribute_udf", { - title: "用户贡献的 UDF", - directoryPath: "contrib/udf/", - children:[], + title: "UDF", + directoryPath: "udf/", + children: [ + "user-defined-function", + "contribute-udf", + { + title: "用户贡献的 UDF", + directoryPath: "contrib/", + children:[], + }, + ], }, ], }, diff --git a/docs/en/extending-doris/contribute_udf.md b/docs/en/extending-doris/udf/contribute-udf.md similarity index 95% rename from docs/en/extending-doris/contribute_udf.md rename to docs/en/extending-doris/udf/contribute-udf.md index c1efab8849d469..50fed56f6c5eb3 100644 --- a/docs/en/extending-doris/contribute_udf.md +++ b/docs/en/extending-doris/udf/contribute-udf.md @@ -75,8 +75,8 @@ The user manual needs to include: UDF function definition description, applicabl ├── docs │   └── zh-CN │   └──extending-doris - │ └──contrib - │ └──udf + │ └──udf + │ └──contrib │ ├── udf-simple-manual.md ``` @@ -85,8 +85,8 @@ The user manual needs to include: UDF function definition description, applicabl ├── docs │   └── en │   └──extending-doris - │ └──contrib - │ └──udf + │ └──udf + │ └──contrib │ ├── udf-simple-manual.md ``` @@ -96,7 +96,7 @@ The user manual needs to include: UDF function definition description, applicabl vi docs/.vuepress/sidebar/zh-CN.js { title: "用户贡献的 UDF", - directoryPath: "contrib/udf/", + directoryPath: "contrib/", children: [ "udf-simple-manual", @@ -108,7 +108,7 @@ The user manual needs to include: UDF function definition description, applicabl vi docs/.vuepress/sidebar/en.js { title: "Users contribute UDF", - directoryPath: "contrib/udf/", + directoryPath: "contrib/", children: [ "udf-simple-manual", diff --git a/docs/en/extending-doris/user-defined-function.md b/docs/en/extending-doris/udf/user-defined-function.md similarity index 92% rename from docs/en/extending-doris/user-defined-function.md rename to docs/en/extending-doris/udf/user-defined-function.md index 33ff9aee5686c9..c392bc03667f2a 100644 --- a/docs/en/extending-doris/user-defined-function.md +++ b/docs/en/extending-doris/udf/user-defined-function.md @@ -34,7 +34,7 @@ There are two types of analysis requirements that UDF can meet: UDF and UDAF. UD This document mainly describes how to write a custom UDF function and how to use it in Doris. -If users use the UDF function and extend Doris' function analysis, and want to contribute their own UDF functions back to the Doris community for other users, please see the document [Contribute UDF to Doris](http://doris.apache. org/master/zh-CN/extending-doris/contribute_udf.html). +If users use the UDF function and extend Doris' function analysis, and want to contribute their own UDF functions back to the Doris community for other users, please see the document [Contribute UDF](./contribute_udf.md). ## Writing UDF functions @@ -116,14 +116,14 @@ Running `sh build.sh` in the root directory of Doris will generate a static libr ### Writing UDF compilation files -1. Prepare third_party +1. Prepare thirdparty - The third_party folder is mainly used to store third-party libraries that users' UDF functions depend on, including header files and static libraries. It must contain the two files `udf.h` and `libDorisUdf.a` in the dependent Doris UDF framework. + The thirdparty folder is mainly used to store thirdparty libraries that users' UDF functions depend on, including header files and static libraries. It must contain the two files `udf.h` and `libDorisUdf.a` in the dependent Doris UDF framework. - Taking udf_sample as an example here, the source code is stored in the user's own `udf_samples` directory. Create a third_party folder in the same directory to store the static library. The directory structure is as follows: + Taking udf_sample as an example here, the source code is stored in the user's own `udf_samples` directory. Create a thirdparty folder in the same directory to store the static library. The directory structure is as follows: ``` - ├── third_party + ├── thirdparty │ │── include │ │ └── udf.h │ └── lib @@ -132,9 +132,9 @@ Running `sh build.sh` in the root directory of Doris will generate a static libr ``` - `udf.h` is the UDF frame header file. The storage path is `doris/output/udf/include/udf.h`. Users need to copy the header file in the Doris compilation output to their include folder of `third_party`. + `udf.h` is the UDF frame header file. The storage path is `doris/output/udf/include/udf.h`. Users need to copy the header file in the Doris compilation output to their include folder of `thirdparty`. - `libDorisUdf.a` is a static library of UDF framework. After Doris is compiled, the file is stored in `doris/output/udf/lib/libDorisUdf.a`. The user needs to copy the file to the lib folder of his `third_party`. + `libDorisUdf.a` is a static library of UDF framework. After Doris is compiled, the file is stored in `doris/output/udf/lib/libDorisUdf.a`. The user needs to copy the file to the lib folder of his `thirdparty`. *Note: The static library of UDF framework will not be generated until Doris is compiled. @@ -143,7 +143,7 @@ Running `sh build.sh` in the root directory of Doris will generate a static libr CMakeFiles.txt is used to declare how UDF functions are compiled. Stored in the source code folder, level with user code. Here, taking udf_samples as an example, the directory structure is as follows: ``` - ├── third_party + ├── thirdparty └── udf_samples ├── CMakeLists.txt ├── uda_sample.cpp @@ -160,11 +160,11 @@ Running `sh build.sh` in the root directory of Doris will generate a static libr ``` # Include udf - include_directories(third_party/include) + include_directories(thirdparty/include) # Set all libraries add_library(udf STATIC IMPORTED) - set_target_properties(udf PROPERTIES IMPORTED_LOCATION third_party/lib/libDorisUdf.a) + set_target_properties(udf PROPERTIES IMPORTED_LOCATION thirdparty/lib/libDorisUdf.a) # where to put generated libraries set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/udf_samples") @@ -187,12 +187,12 @@ Running `sh build.sh` in the root directory of Doris will generate a static libr ) ``` - If the user's UDF function also depends on other third-party libraries, you need to declare include, lib, and add dependencies in `add_library`. + If the user's UDF function also depends on other thirdparty libraries, you need to declare include, lib, and add dependencies in `add_library`. The complete directory structure after all files are prepared is as follows: ``` - ├── third_party + ├── thirdparty │ │── include │ │ └── udf.h │ └── lib @@ -214,7 +214,7 @@ Create a build folder under the udf_samples folder to store the compilation outp Run the command `cmake ../` in the build folder to generate a Makefile, and execute make to generate the corresponding dynamic library. ``` -├── third_party +├── thirdparty ├── udf_samples └── build ``` @@ -224,7 +224,7 @@ Run the command `cmake ../` in the build folder to generate a Makefile, and exec After the compilation is completed, the UDF dynamic link library is successfully generated. Under `build/src/`, taking udf_samples as an example, the directory structure is as follows: ``` -├── third_party +├── thirdparty ├── udf_samples └── build └── src diff --git a/docs/zh-CN/extending-doris/contribute_udf.md b/docs/zh-CN/extending-doris/udf/contribute-udf.md similarity index 91% rename from docs/zh-CN/extending-doris/contribute_udf.md rename to docs/zh-CN/extending-doris/udf/contribute-udf.md index aee1a51a0277ce..cc54aee531c8e8 100644 --- a/docs/zh-CN/extending-doris/contribute_udf.md +++ b/docs/zh-CN/extending-doris/udf/contribute-udf.md @@ -1,6 +1,6 @@ --- { - "title": "贡献用户 UDF 函数到社区", + "title": "贡献 UDF ", "language": "zh-CN" } --- @@ -69,14 +69,14 @@ under the License. 使用手册需要包含:UDF 函数含义说明,适用的场景,函数的语法,如何编译 UDF ,如何在 Doris 集群中使用 UDF, 以及使用示例。 -1. 使用手册需包含中英文两个版本,并分别存放在 `docs/zh-CN/extending-doris/contrib/udf` 和 `docs/en/extending-doris/contrib/udf` 下。 +1. 使用手册需包含中英文两个版本,并分别存放在 `docs/zh-CN/extending-doris/udf/contrib` 和 `docs/en/extending-doris/udf/contrib` 下。 ``` ├── docs │   └── zh-CN │   └──extending-doris - │ └──contrib - │ └──udf + │ └──udf + │ └──contrib │ ├── udf-simple-manual.md ``` @@ -85,8 +85,8 @@ under the License. ├── docs │   └── en │   └──extending-doris - │ └──contrib - │ └──udf + │ └──udf + │ └──contrib │ ├── udf-simple-manual.md ``` @@ -96,7 +96,7 @@ under the License. vi docs/.vuepress/sidebar/zh-CN.js { title: "用户贡献的 UDF", - directoryPath: "contrib/udf/", + directoryPath: "contrib/", children: [ "udf-simple-manual", @@ -108,7 +108,7 @@ under the License. vi docs/.vuepress/sidebar/en.js { title: "Users contribute UDF", - directoryPath: "contrib/udf/", + directoryPath: "contrib/", children: [ "udf-simple-manual", diff --git a/docs/zh-CN/extending-doris/user-defined-function.md b/docs/zh-CN/extending-doris/udf/user-defined-function.md similarity index 89% rename from docs/zh-CN/extending-doris/user-defined-function.md rename to docs/zh-CN/extending-doris/udf/user-defined-function.md index 5d6ef965d61f11..b7f557dc31be12 100644 --- a/docs/zh-CN/extending-doris/user-defined-function.md +++ b/docs/zh-CN/extending-doris/udf/user-defined-function.md @@ -35,7 +35,7 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 这篇文档主要讲述了,如何编写自定义的 UDF 函数,以及如何在 Doris 中使用它。 -如果用户使用 UDF 功能并扩展了 Doris 的函数分析,并且希望将自己实现的 UDF 函数贡献回 Doris 社区给其他用户使用,这时候请看文档 [Contribute UDF to Doris](http://doris.apache.org/master/zh-CN/extending-doris/contribute_udf.html)。 +如果用户使用 UDF 功能并扩展了 Doris 的函数分析,并且希望将自己实现的 UDF 函数贡献回 Doris 社区给其他用户使用,这时候请看文档 [Contribute UDF](./contribute_udf.md)。 ## 编写UDF函数 @@ -117,14 +117,14 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 ### 编写 UDF 编译文件 -1. 准备 third_party +1. 准备 thirdparty - third_party 文件夹主要用于存放用户 UDF 函数依赖的第三方库,包括头文件及静态库。其中必须包含依赖的 Doris UDF 框架中 `udf.h` 和 `libDorisUdf.a` 这两个文件。 + `thirdparty` 文件夹主要用于存放用户 UDF 函数依赖的第三方库,包括头文件及静态库。其中必须包含依赖的 Doris UDF 框架中 `udf.h` 和 `libDorisUdf.a` 这两个文件。 - 这里以 udf_sample 为例, 在 用户自己 `udf_samples` 目录用于存放 source code。在同级目录下再创建一个 `third_party` 文件夹用于存放静态库。目录结构如下: + 这里以 `udf_sample` 为例, 在 用户自己 `udf_samples` 目录用于存放 source code。在同级目录下再创建一个 `thirdparty` 文件夹用于存放静态库。目录结构如下: ``` - ├── third_party + ├── thirdparty │ │── include │ │ └── udf.h │ └── lib @@ -133,9 +133,9 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 ``` - `udf.h` 是 UDF 框架头文件。存放路径为 `doris/output/udf/include/udf.h`。 用户需要将 Doris 编译产出中的这个头文件拷贝到自己的 `third_party` 的 include 文件夹下。 + `udf.h` 是 UDF 框架头文件。存放路径为 `doris/output/udf/include/udf.h`。 用户需要将 Doris 编译产出中的这个头文件拷贝到自己的 `thirdparty` 的 include 文件夹下。 - `libDorisUdf.a` 是 UDF 框架的静态库。Doris 编译完成后该文件存放在 `doris/output/udf/lib/libDorisUdf.a`。用户需要将该文件拷贝到自己的 `third_party` 的 lib 文件夹下。 + `libDorisUdf.a` 是 UDF 框架的静态库。Doris 编译完成后该文件存放在 `doris/output/udf/lib/libDorisUdf.a`。用户需要将该文件拷贝到自己的 `thirdparty` 的 lib 文件夹下。 *注意:UDF 框架的静态库只有完成 Doris 编译后才会生成。 @@ -144,7 +144,7 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 CMakeFiles.txt 用于声明 UDF 函数如何进行编译。存放在源码文件夹下,与用户代码平级。这里以 `udf_samples` 为例目录结构如下: ``` - ├── third_party + ├── thirdparty └── udf_samples ├── CMakeLists.txt ├── uda_sample.cpp @@ -161,11 +161,11 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 ``` # Include udf - include_directories(third_party/include) + include_directories(thirdparty/include) # Set all libraries add_library(udf STATIC IMPORTED) - set_target_properties(udf PROPERTIES IMPORTED_LOCATION third_party/lib/libDorisUdf.a) + set_target_properties(udf PROPERTIES IMPORTED_LOCATION thirdparty/lib/libDorisUdf.a) # where to put generated libraries set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/udf_samples") @@ -193,7 +193,7 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 所有文件准备齐后完整的目录结构如下: ``` - ├── third_party + ├── thirdparty │ │── include │ │ └── udf.h │ └── lib @@ -215,7 +215,7 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 在 build 文件夹下运行命令 `cmake ../` 生成Makefile,并执行 make 就会生成对应动态库。 ``` -├── third_party +├── thirdparty ├── udf_samples └── build ``` @@ -226,7 +226,7 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 ``` -├── third_party +├── thirdparty ├── udf_samples └── build └── src @@ -260,7 +260,7 @@ CREATE [AGGREGATE] FUNCTION 用户使用 UDF 必须拥有对应数据库的 `SELECT` 权限。 -UDF 的使用与普通的函数方式一致,唯一的区别在于,内置函数的作用域是全局的,而UDF的作用域是DB内部。当链接session位于数据内部时,直接使用UDF名字会在当前DB内部查找对应的UDF。否则用户需要显示的指定UDF的数据库名字,例如`dbName`.`funcName`。 +UDF 的使用与普通的函数方式一致,唯一的区别在于,内置函数的作用域是全局的,而UDF的作用域是DB内部。当链接 session 位于数据内部时,直接使用 UDF 名字会在当前DB内部查找对应的UDF。否则用户需要显示的指定 UDF 的数据库名字,例如 `dbName`.`funcName`。 ## 删除UDF函数 From 7c2eaa30df8e6273b6c06234acb48e597b1293ac Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Mon, 22 Jun 2020 11:38:16 +0800 Subject: [PATCH 16/18] Change docs Change-Id: I3bf09825e06d629dfeee8f8cc4b7e3ccd9e8d3a1 --- docs/en/extending-doris/udf/contribute-udf.md | 10 +++++----- docs/zh-CN/extending-doris/udf/contribute-udf.md | 10 +++++----- .../zh-CN/extending-doris/udf/user-defined-function.md | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/en/extending-doris/udf/contribute-udf.md b/docs/en/extending-doris/udf/contribute-udf.md index 50fed56f6c5eb3..61f5dcecc957f8 100644 --- a/docs/en/extending-doris/udf/contribute-udf.md +++ b/docs/en/extending-doris/udf/contribute-udf.md @@ -32,9 +32,9 @@ This manual mainly introduces how external users can contribute their own UDF fu 1. UDF function is universal -The versatility here mainly refers to: UDF functions are widely used in certain business scenarios. Such UDF functions are valuable and can be used directly by other users in the community. + The versatility here mainly refers to: UDF functions are widely used in certain business scenarios. Such UDF functions are valuable and can be used directly by other users in the community. -If you are not sure whether the UDF function you wrote is universal, you can send an email to `dev@doris.apache.org` or directly create an ISSUE to initiate the discussion. + If you are not sure whether the UDF function you wrote is universal, you can send an email to `dev@doris.apache.org` or directly create an ISSUE to initiate the discussion. 2. UDF has completed testing and is running normally in the user's production environment @@ -63,7 +63,7 @@ Create a folder for UDF functions under `contrib/udf/src/`, and store the source 1. CMakeLists.txt -After the user's `CMakeLists.txt` is placed here, a small amount of changes are required. Just remove `include udf` and `udf lib`. The reason for the removal is that it has been declared in the CMake file at the `contrib/udf` level. + After the user's `CMakeLists.txt` is placed here, a small amount of changes are required. Just remove `include udf` and `udf lib`. The reason for the removal is that it has been declared in the CMake file at the `contrib/udf` level. ## manual @@ -119,6 +119,6 @@ The user manual needs to include: UDF function definition description, applicabl # Contribute UDF to the community - When you meet the conditions and prepare the code, you can contribute UDF to the Doris community after the document. Simply submit the request (PR) on [Github] (https://github.com/apache/incubator-doris). See the specific submission method: [Pull Request (PR)] (https://help.github.com/articles/about-pull-requests/). +When you meet the conditions and prepare the code, you can contribute UDF to the Doris community after the document. Simply submit the request (PR) on [Github] (https://github.com/apache/incubator-doris). See the specific submission method: [Pull Request (PR)] (https://help.github.com/articles/about-pull-requests/). - Finally, when the PR assessment is passed and merged. Congratulations, your UDF becomes a third-party UDF supported by Doris. You can check it out in the extended functions section of [Doris official website] (http://doris.apache.org/master/zh-CN/)~. +Finally, when the PR assessment is passed and merged. Congratulations, your UDF becomes a third-party UDF supported by Doris. You can check it out in the extended functions section of [Doris official website] (http://doris.apache.org/master/zh-CN/)~. diff --git a/docs/zh-CN/extending-doris/udf/contribute-udf.md b/docs/zh-CN/extending-doris/udf/contribute-udf.md index cc54aee531c8e8..105168a8251ceb 100644 --- a/docs/zh-CN/extending-doris/udf/contribute-udf.md +++ b/docs/zh-CN/extending-doris/udf/contribute-udf.md @@ -24,7 +24,7 @@ specific language governing permissions and limitations under the License. --> -# 贡献用户的 UDF 函数到社区 +# 贡献 UDF 该手册主要讲述了外部用户如何将自己编写的 UDF 函数贡献给 Doris 社区。 @@ -45,7 +45,7 @@ under the License. ## 源代码 - 在 `contrib/udf/src/` 下创建一个存放 UDF 函数的文件夹,并将源码和 CMAKE 文件存放在此处。待贡献的源代码应该包含: `.h` , `.cpp`, `CMakeFile.txt`。这里以 udf_samples 为例,首先在 `contrib/udf/src/` 路径下创建一个新的文件夹,并存放源码。 +在 `contrib/udf/src/` 下创建一个存放 UDF 函数的文件夹,并将源码和 CMAKE 文件存放在此处。待贡献的源代码应该包含: `.h` , `.cpp`, `CMakeFile.txt`。这里以 udf_samples 为例,首先在 `contrib/udf/src/` 路径下创建一个新的文件夹,并存放源码。 ``` ├──contrib @@ -67,7 +67,7 @@ under the License. ## 使用手册 - 使用手册需要包含:UDF 函数含义说明,适用的场景,函数的语法,如何编译 UDF ,如何在 Doris 集群中使用 UDF, 以及使用示例。 +使用手册需要包含:UDF 函数含义说明,适用的场景,函数的语法,如何编译 UDF ,如何在 Doris 集群中使用 UDF, 以及使用示例。 1. 使用手册需包含中英文两个版本,并分别存放在 `docs/zh-CN/extending-doris/udf/contrib` 和 `docs/en/extending-doris/udf/contrib` 下。 @@ -119,6 +119,6 @@ under the License. # 贡献 UDF 到社区 - 当你符合前提条件并准备好代码,文档后就可以将 UDF 贡献到 Doris 社区了。在 [Github](https://github.com/apache/incubator-doris) 上面提交 Pull Request (PR) 即可。具体提交方式见:[Pull Request (PR)](https://help.github.com/articles/about-pull-requests/)。 +当你符合前提条件并准备好代码,文档后就可以将 UDF 贡献到 Doris 社区了。在 [Github](https://github.com/apache/incubator-doris) 上面提交 Pull Request (PR) 即可。具体提交方式见:[Pull Request (PR)](https://help.github.com/articles/about-pull-requests/)。 - 最后,当 PR 评审通过并 Merge 后。恭喜你,你的 UDF 已经贡献给 Doris 社区,你可以在 [Doris 官网](http://doris.apache.org/master/zh-CN/) 的扩展功能部分查看到啦~。 +最后,当 PR 评审通过并 Merge 后。恭喜你,你的 UDF 已经贡献给 Doris 社区,你可以在 [Doris 官网](http://doris.apache.org/master/zh-CN/) 的扩展功能部分查看到啦~。 diff --git a/docs/zh-CN/extending-doris/udf/user-defined-function.md b/docs/zh-CN/extending-doris/udf/user-defined-function.md index b7f557dc31be12..43d5339743695c 100644 --- a/docs/zh-CN/extending-doris/udf/user-defined-function.md +++ b/docs/zh-CN/extending-doris/udf/user-defined-function.md @@ -96,9 +96,9 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 ## 编译 UDF 函数 - 由于 UDF 实现中依赖了 Doris 的 UDF 框架 , 所以在编译 UDF 函数的时候首先要对 Doris 进行编译,也就是对 UDF 框架进行编译。 +由于 UDF 实现中依赖了 Doris 的 UDF 框架 , 所以在编译 UDF 函数的时候首先要对 Doris 进行编译,也就是对 UDF 框架进行编译。 - 编译完成后会生成,UDF 框架的静态库文件。之后引入 UDF 框架依赖,并编译 UDF 即可。 +编译完成后会生成,UDF 框架的静态库文件。之后引入 UDF 框架依赖,并编译 UDF 即可。 ### 编译Doris From a2d3eecfea64c46e75f01c035ca334f03affd2c0 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Mon, 22 Jun 2020 11:43:29 +0800 Subject: [PATCH 17/18] Change docs Change-Id: I930e3007e52a78678c420bbd4288565c3e06edf7 --- docs/en/extending-doris/udf/contribute-udf.md | 10 +++++----- docs/en/extending-doris/udf/user-defined-function.md | 2 +- docs/zh-CN/extending-doris/udf/contribute-udf.md | 10 +++++----- .../extending-doris/udf/user-defined-function.md | 12 ++++++------ 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/en/extending-doris/udf/contribute-udf.md b/docs/en/extending-doris/udf/contribute-udf.md index 61f5dcecc957f8..4c39caf0ecd263 100644 --- a/docs/en/extending-doris/udf/contribute-udf.md +++ b/docs/en/extending-doris/udf/contribute-udf.md @@ -28,7 +28,7 @@ According to permission. This manual mainly introduces how external users can contribute their own UDF functions to the Doris community. -# Prerequisites +## Prerequisites 1. UDF function is universal @@ -38,12 +38,12 @@ This manual mainly introduces how external users can contribute their own UDF fu 2. UDF has completed testing and is running normally in the user's production environment -# Ready to work +## Ready to work 1. UDF source code 2. User Manual of UDF -## Source code +### Source code Create a folder for UDF functions under `contrib/udf/src/`, and store the source code and CMAKE files here. The source code to be contributed should include: `.h`, `.cpp`, `CMakeFile.txt`. Taking udf_samples as an example here, first create a new folder under the `contrib/udf/src/` path and store the source code. @@ -65,7 +65,7 @@ Create a folder for UDF functions under `contrib/udf/src/`, and store the source After the user's `CMakeLists.txt` is placed here, a small amount of changes are required. Just remove `include udf` and `udf lib`. The reason for the removal is that it has been declared in the CMake file at the `contrib/udf` level. -## manual +### manual The user manual needs to include: UDF function definition description, applicable scenarios, function syntax, how to compile UDF, how to use UDF in Doris, and use examples. @@ -117,7 +117,7 @@ The user manual needs to include: UDF function definition description, applicabl ``` -# Contribute UDF to the community +## Contribute UDF to the community When you meet the conditions and prepare the code, you can contribute UDF to the Doris community after the document. Simply submit the request (PR) on [Github] (https://github.com/apache/incubator-doris). See the specific submission method: [Pull Request (PR)] (https://help.github.com/articles/about-pull-requests/). diff --git a/docs/en/extending-doris/udf/user-defined-function.md b/docs/en/extending-doris/udf/user-defined-function.md index c392bc03667f2a..99c5fbfe6d3753 100644 --- a/docs/en/extending-doris/udf/user-defined-function.md +++ b/docs/en/extending-doris/udf/user-defined-function.md @@ -261,6 +261,6 @@ Users must have the `SELECT` permission of the corresponding database to use UDF The use of UDF is consistent with ordinary function methods. The only difference is that the scope of built-in functions is global, and the scope of UDF is internal to DB. When the link session is inside the data, directly using the UDF name will find the corresponding UDF inside the current DB. Otherwise, the user needs to display the specified UDF database name, such as `dbName`.`funcName`. -## Delete UDF function +## Delete UDF When you no longer need UDF functions, you can delete a UDF function by the following command, you can refer to `DROP FUNCTION`. diff --git a/docs/zh-CN/extending-doris/udf/contribute-udf.md b/docs/zh-CN/extending-doris/udf/contribute-udf.md index 105168a8251ceb..c7b06fe2fe975d 100644 --- a/docs/zh-CN/extending-doris/udf/contribute-udf.md +++ b/docs/zh-CN/extending-doris/udf/contribute-udf.md @@ -28,7 +28,7 @@ under the License. 该手册主要讲述了外部用户如何将自己编写的 UDF 函数贡献给 Doris 社区。 -# 前提条件 +## 前提条件 1. UDF 函数具有通用性 @@ -38,12 +38,12 @@ under the License. 2. UDF 已经完成测试,并正常运行在用户的生产环境中 -# 准备工作 +## 准备工作 1. UDF 的 source code 2. UDF 的使用手册 -## 源代码 +### 源代码 在 `contrib/udf/src/` 下创建一个存放 UDF 函数的文件夹,并将源码和 CMAKE 文件存放在此处。待贡献的源代码应该包含: `.h` , `.cpp`, `CMakeFile.txt`。这里以 udf_samples 为例,首先在 `contrib/udf/src/` 路径下创建一个新的文件夹,并存放源码。 @@ -65,7 +65,7 @@ under the License. 用户的 `CMakeLists.txt` 放在此处后,需要进行少量更改。去掉 `include udf` 和 `udf lib` 即可。去掉的原因是,在 `contrib/udf` 层级的 CMake 文件中,已经声明了。 -## 使用手册 +### 使用手册 使用手册需要包含:UDF 函数含义说明,适用的场景,函数的语法,如何编译 UDF ,如何在 Doris 集群中使用 UDF, 以及使用示例。 @@ -117,7 +117,7 @@ under the License. ``` -# 贡献 UDF 到社区 +## 贡献 UDF 到社区 当你符合前提条件并准备好代码,文档后就可以将 UDF 贡献到 Doris 社区了。在 [Github](https://github.com/apache/incubator-doris) 上面提交 Pull Request (PR) 即可。具体提交方式见:[Pull Request (PR)](https://help.github.com/articles/about-pull-requests/)。 diff --git a/docs/zh-CN/extending-doris/udf/user-defined-function.md b/docs/zh-CN/extending-doris/udf/user-defined-function.md index 43d5339743695c..0c57b4194773c0 100644 --- a/docs/zh-CN/extending-doris/udf/user-defined-function.md +++ b/docs/zh-CN/extending-doris/udf/user-defined-function.md @@ -37,7 +37,7 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 如果用户使用 UDF 功能并扩展了 Doris 的函数分析,并且希望将自己实现的 UDF 函数贡献回 Doris 社区给其他用户使用,这时候请看文档 [Contribute UDF](./contribute_udf.md)。 -## 编写UDF函数 +## 编写 UDF 函数 在使用UDF之前,用户需要先在 Doris 的 UDF 框架下,编写自己的UDF函数。在`contrib/udf/src/udf_samples/udf_sample.h|cpp`文件中是一个简单的 UDF Demo。 @@ -236,7 +236,7 @@ UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指 ``` -## 创建UDF函数 +## 创建 UDF 函数 通过上述的步骤后,你可以得到 UDF 的动态库(也就是编译结果中的 `.so` 文件)。你需要将这个动态库放到一个能够通过 HTTP 协议访问到的位置。 @@ -256,14 +256,14 @@ CREATE [AGGREGATE] FUNCTION 具体使用可以参见 `CREATE FUNCTION` 获取更详细信息。 -## 使用UDF +## 使用 UDF 用户使用 UDF 必须拥有对应数据库的 `SELECT` 权限。 -UDF 的使用与普通的函数方式一致,唯一的区别在于,内置函数的作用域是全局的,而UDF的作用域是DB内部。当链接 session 位于数据内部时,直接使用 UDF 名字会在当前DB内部查找对应的UDF。否则用户需要显示的指定 UDF 的数据库名字,例如 `dbName`.`funcName`。 +UDF 的使用与普通的函数方式一致,唯一的区别在于,内置函数的作用域是全局的,而 UDF 的作用域是 DB内部。当链接 session 位于数据内部时,直接使用 UDF 名字会在当前DB内部查找对应的 UDF。否则用户需要显示的指定 UDF 的数据库名字,例如 `dbName`.`funcName`。 -## 删除UDF函数 +## 删除 UDF函数 -当你不再需要UDF函数时,你可以通过下述命令来删除一个UDF函数, 可以参考 `DROP FUNCTION`。 +当你不再需要 UDF 函数时,你可以通过下述命令来删除一个 UDF 函数, 可以参考 `DROP FUNCTION`。 From 44d4caa12d0871cd101d9ecd4acb471018e1e3d5 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Mon, 22 Jun 2020 12:25:16 +0800 Subject: [PATCH 18/18] Change docs Change-Id: I53d93075e33e1a29293aa47d84d4995fef585c39 --- docs/en/extending-doris/udf/contribute-udf.md | 36 +++++++++---------- .../udf/user-defined-function.md | 4 +-- .../udf/user-defined-function.md | 4 +-- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/docs/en/extending-doris/udf/contribute-udf.md b/docs/en/extending-doris/udf/contribute-udf.md index 4c39caf0ecd263..11482016177f76 100644 --- a/docs/en/extending-doris/udf/contribute-udf.md +++ b/docs/en/extending-doris/udf/contribute-udf.md @@ -5,24 +5,24 @@ } --- - + # Contribute UDF diff --git a/docs/en/extending-doris/udf/user-defined-function.md b/docs/en/extending-doris/udf/user-defined-function.md index 99c5fbfe6d3753..a96db0ff45be20 100644 --- a/docs/en/extending-doris/udf/user-defined-function.md +++ b/docs/en/extending-doris/udf/user-defined-function.md @@ -1,7 +1,7 @@ --- { -    "title": "User Define Function", -    "language": "en" + "title": "User Defined Function", + "language": "en" } --- diff --git a/docs/zh-CN/extending-doris/udf/user-defined-function.md b/docs/zh-CN/extending-doris/udf/user-defined-function.md index 0c57b4194773c0..8c896b50b9545f 100644 --- a/docs/zh-CN/extending-doris/udf/user-defined-function.md +++ b/docs/zh-CN/extending-doris/udf/user-defined-function.md @@ -1,6 +1,6 @@ --- { - "title": "User Define Function", + "title": "User Defined Function", "language": "zh-CN" } --- @@ -24,7 +24,7 @@ specific language governing permissions and limitations under the License. --> -# User Define Function +# User Defined Function UDF 主要适用于,用户需要的分析能力 Doris 并不具备的场景。用户可以自行根据自己的需求,实现自定义的函数,并且通过 UDF 框架注册到 Doris 中,来扩展 Doris 的能力,并解决用户分析需求。