diff --git a/README.md b/README.md index db0540f6a0d21b..909dc74afdb5d8 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,8 @@ $ docker images REPOSITORY TAG IMAGE ID CREATED SIZE apachedoris/doris-dev build-env f8bc5d4024e0 21 hours ago 3.28GB ``` +**NOTE**: If you want to compile soure code which is later than 0.10, such as master, you should use apachedoris/doris-dev:build-env-0.11 image + #### Step2: Run the Docker image diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 52f9179321b3f8..fe5b9ed38d4af2 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -184,6 +184,9 @@ set_target_properties(librdkafka_cpp PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_D add_library(librdkafka STATIC IMPORTED) set_target_properties(librdkafka PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/librdkafka.a) +add_library(libs2 STATIC IMPORTED) +set_target_properties(libs2 PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libs2.a) + find_program(THRIFT_COMPILER thrift ${CMAKE_SOURCE_DIR}/bin) # llvm-config @@ -269,6 +272,7 @@ set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DBOOST_DATE_TIME_POSIX_TIME_STD_CONFI set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DBOOST_SYSTEM_NO_DEPRECATED") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -msse4.2") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DLLVM_ON_UNIX") +set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-attributes -DS2_USE_GFLAGS -DS2_USE_GLOG") if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -faligned-new") @@ -435,6 +439,7 @@ set(DORIS_LINK_LIBS DorisGen Webserver TestUtil + Geo ${WL_END_GROUP} ) @@ -444,6 +449,7 @@ set(DORIS_DEPENDENCIES rocksdb librdkafka_cpp librdkafka + libs2 lzo snappy ${Boost_LIBRARIES} @@ -535,6 +541,7 @@ add_subdirectory(${SRC_DIR}/runtime) add_subdirectory(${SRC_DIR}/testutil) add_subdirectory(${SRC_DIR}/tools) add_subdirectory(${SRC_DIR}/udf_samples) +add_subdirectory(${SRC_DIR}/geo) # Utility CMake function to make specifying tests and benchmarks less verbose FUNCTION(ADD_BE_TEST TEST_NAME) @@ -563,6 +570,7 @@ if (${MAKE_TEST} STREQUAL "ON") add_subdirectory(${TEST_DIR}/exprs) add_subdirectory(${TEST_DIR}/runtime) add_subdirectory(${TEST_DIR}/http) + add_subdirectory(${TEST_DIR}/geo) endif () # Install be diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 0b78993f3d1946..96a5a33f66afd7 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -49,6 +49,7 @@ #include "exprs/utility_functions.h" #include "exprs/json_functions.h" #include "exprs/hll_hash_function.h" +#include "geo/geo_functions.h" #include "olap/options.h" #include "util/time.h" #include "util/system_metrics.h" @@ -264,6 +265,7 @@ void init_daemon(int argc, char** argv, const std::vector& paths) { JsonFunctions::init(); HllHashFunctions::init(); ESFunctions::init(); + GeoFunctions::init(); pthread_t tc_malloc_pid; pthread_create(&tc_malloc_pid, NULL, tcmalloc_gc_thread, NULL); diff --git a/be/src/geo/CMakeLists.txt b/be/src/geo/CMakeLists.txt new file mode 100644 index 00000000000000..0a5bb81ebd26fc --- /dev/null +++ b/be/src/geo/CMakeLists.txt @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# where to put generated libraries +set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/geo") + +# where to put generated binaries +set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/geo") + +add_library(Geo STATIC + geo_common.cpp + geo_functions.cpp + geo_types.cpp + wkt_parse.cpp + ${GENSRC_DIR}/geo/wkt_lex.l.cpp + ${GENSRC_DIR}/geo/wkt_yacc.y.cpp +) + +add_custom_command( + OUTPUT ${GENSRC_DIR}/geo/wkt_lex.l.cpp ${GENSRC_DIR}/geo/wkt_lex.l.h + COMMAND mkdir -p ${GENSRC_DIR}/geo + COMMAND flex --header-file=${GENSRC_DIR}/geo/wkt_lex.l.h --outfile=${GENSRC_DIR}/geo/wkt_lex.l.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/wkt_lex.l + MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/wkt_lex.l) +set_source_files_properties(${GENSRC_DIR}/geo/wkt_lex.l.cpp PROPERTIES GENERATED TRUE) + +add_custom_command( + OUTPUT ${GENSRC_DIR}/geo/wkt_yacc.y.cpp ${GENSRC_DIR}/geo/wkt_yacc.y.hpp + COMMAND mkdir -p ${GENSRC_DIR}/geo + COMMAND bison --output=${GENSRC_DIR}/geo/wkt_yacc.y.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wkt_yacc.y + MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/wkt_yacc.y) +set_source_files_properties(${GENSRC_DIR}/geo/wkt_yacc.y.cpp PROPERTIES GENERATED TRUE) + diff --git a/be/src/geo/geo_common.cpp b/be/src/geo/geo_common.cpp new file mode 100644 index 00000000000000..0857069dd4c5e3 --- /dev/null +++ b/be/src/geo/geo_common.cpp @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "geo/geo_common.h" + +namespace doris { + +std::string to_string(GeoParseStatus status) { + switch (status) { + case GEO_PARSE_OK: + return "OK"; + case GEO_PARSE_COORD_INVALID: + return "Coordinate invalid"; + case GEO_PARSE_LOOP_NOT_CLOSED: + return "Loop is not closed"; + case GEO_PARSE_LOOP_LACK_VERTICES: + return "Loop lack enough vertices"; + case GEO_PARSE_LOOP_INVALID: + return "Loop invalid"; + case GEO_PARSE_POLYGON_NOT_HOLE: + return "Loop not contained in the first loop"; + case GEO_PARSE_POLYLINE_LACK_VERTICES: + return "Line string lack vertices"; + case GEO_PARSE_POLYLINE_INVALID: + return "Line string invalid"; + case GEO_PARSE_CIRCLE_INVALID: + return "Circle invalid"; + case GEO_PARSE_WKT_SYNTAX_ERROR: + return "WKT syntax error"; + default: + return "Unknown"; + } +} + +std::ostream& operator<<(std::ostream& os, GeoParseStatus status) { + os << to_string(status); + return os; +} + +} diff --git a/be/src/geo/geo_common.h b/be/src/geo/geo_common.h new file mode 100644 index 00000000000000..a5e79f131313a0 --- /dev/null +++ b/be/src/geo/geo_common.h @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +namespace doris { + +enum GeoShapeType { + GEO_SHAPE_ANY = 0, + GEO_SHAPE_POINT = 1, + GEO_SHAPE_LINE_STRING = 2, + GEO_SHAPE_POLYGON = 3, + GEO_SHAPE_MULTI_POINT = 4, + GEO_SHAPE_MULTI_LINE_STRING = 5, + GEO_SHAPE_MULTI_POLYGON = 6, + GEO_SHAPE_CIRCLE = 7, +}; + +enum GeoParseStatus { + GEO_PARSE_OK = 0, + GEO_PARSE_COORD_INVALID = 1, + GEO_PARSE_LOOP_NOT_CLOSED = 2, + GEO_PARSE_LOOP_LACK_VERTICES = 3, + GEO_PARSE_LOOP_INVALID = 4, + GEO_PARSE_POLYGON_NOT_HOLE = 5, + GEO_PARSE_POLYLINE_LACK_VERTICES = 6, + GEO_PARSE_POLYLINE_INVALID = 7, + GEO_PARSE_CIRCLE_INVALID = 8, + GEO_PARSE_WKT_SYNTAX_ERROR = 9, +}; + +std::string to_string(GeoParseStatus status); +std::ostream& operator<<(std::ostream& os, GeoParseStatus status); + +} diff --git a/be/src/geo/geo_functions.cpp b/be/src/geo/geo_functions.cpp new file mode 100644 index 00000000000000..cd9dcad272d3e5 --- /dev/null +++ b/be/src/geo/geo_functions.cpp @@ -0,0 +1,307 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "geo/geo_functions.h" + +#include +#include + +#include "common/logging.h" +#include "geo/geo_types.h" + +namespace doris { + +void GeoFunctions::init() { + // set s2debug to false to avoid crash + FLAGS_s2debug = false; +} + +DoubleVal GeoFunctions::st_distance_sphere(FunctionContext* ctx, + const DoubleVal& x_lng, const DoubleVal& x_lat, + const DoubleVal& y_lng, const DoubleVal& y_lat) { + + if (x_lng.is_null || x_lat.is_null || y_lng.is_null || y_lat.is_null) { + return DoubleVal::null(); + } + S2LatLng x = S2LatLng::FromDegrees(x_lat.val, x_lng.val); + if (!x.is_valid()) { + return DoubleVal::null(); + } + S2LatLng y = S2LatLng::FromDegrees(y_lat.val, y_lng.val); + if (!y.is_valid()) { + return DoubleVal::null(); + } + return DoubleVal(S2Earth::ToMeters(x.GetDistance(y))); +} + +doris_udf::StringVal GeoFunctions::st_point(doris_udf::FunctionContext* ctx, + const doris_udf::DoubleVal& x, + const doris_udf::DoubleVal& y) { + if (x.is_null || y.is_null) { + return StringVal::null(); + } + GeoPoint point; + auto res = point.from_coord(x.val, y.val); + if (res != GEO_PARSE_OK) { + return StringVal::null(); + } + std::string buf; + point.encode_to(&buf); + StringVal result(ctx, buf.size()); + memcpy(result.ptr, buf.data(), buf.size()); + return result; +} + +DoubleVal GeoFunctions::st_x(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& point_encoded) { + if (point_encoded.is_null) { + return DoubleVal::null(); + } + GeoPoint point; + auto res = point.decode_from(point_encoded.ptr, point_encoded.len); + if (!res) { + return DoubleVal::null(); + } + return DoubleVal(point.x()); +} + +DoubleVal GeoFunctions::st_y(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& point_encoded) { + if (point_encoded.is_null) { + return DoubleVal::null(); + } + GeoPoint point; + auto res = point.decode_from(point_encoded.ptr, point_encoded.len); + if (!res) { + return DoubleVal::null(); + } + return DoubleVal(point.y()); +} + +StringVal GeoFunctions::st_as_wkt(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& shape_encoded) { + if (shape_encoded.is_null) { + return StringVal::null(); + } + std::unique_ptr shape(GeoShape::from_encoded(shape_encoded.ptr, shape_encoded.len)); + if (shape == nullptr) { + return StringVal::null(); + } + auto wkt = shape->as_wkt(); + StringVal result(ctx, wkt.size()); + memcpy(result.ptr, wkt.data(), wkt.size()); + return result; +} + +struct StConstructState { + StConstructState() : is_null(false) { } + ~StConstructState() { } + + bool is_null; + std::string encoded_buf; +}; + +void GeoFunctions::st_from_wkt_close(FunctionContext* ctx, + FunctionContext::FunctionStateScope scope) { + if (scope != FunctionContext::FRAGMENT_LOCAL) { + return; + } + StConstructState* state = reinterpret_cast(ctx->get_function_state(scope)); + delete state; +} + +void GeoFunctions::st_from_wkt_prepare_common(FunctionContext* ctx, + FunctionContext::FunctionStateScope scope, + GeoShapeType shape_type) { + if (scope != FunctionContext::FRAGMENT_LOCAL) { + return; + } + if (!ctx->is_arg_constant(0)) { + return; + } + std::unique_ptr state(new StConstructState()); + StringVal* str = reinterpret_cast(ctx->get_constant_arg(0)); + if (str->is_null) { + str->is_null = true; + } else { + GeoParseStatus status; + std::unique_ptr shape(GeoShape::from_wkt((const char*)str->ptr, str->len, &status)); + if (shape == nullptr || (shape_type != GEO_SHAPE_ANY && shape->type() != shape_type)) { + state->is_null = true; + } else { + shape->encode_to(&state->encoded_buf); + } + } + ctx->set_function_state(scope, state.release()); +} + +StringVal GeoFunctions::st_from_wkt_common(FunctionContext* ctx, + const StringVal& wkt, + GeoShapeType shape_type) { + if (wkt.is_null) { + return StringVal::null(); + } + StConstructState* state = (StConstructState*)ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL); + if (state == nullptr) { + GeoParseStatus status; + std::unique_ptr shape(GeoShape::from_wkt((const char*)wkt.ptr, wkt.len, &status)); + if (shape == nullptr || (shape_type != GEO_SHAPE_ANY && shape->type() != shape_type)) { + return StringVal::null(); + } + std::string buf; + shape->encode_to(&buf); + StringVal result(ctx, buf.size()); + memcpy(result.ptr, buf.data(), buf.size()); + return result; + } else { + if (state->is_null) { + return StringVal::null(); + } + StringVal result((uint8_t*)state->encoded_buf.data(), state->encoded_buf.size()); + return result; + } +} + +void GeoFunctions::st_circle_prepare(doris_udf::FunctionContext* ctx, + doris_udf::FunctionContext::FunctionStateScope scope) { + if (scope != FunctionContext::FRAGMENT_LOCAL) { + return; + } + if (!ctx->is_arg_constant(0) || !ctx->is_arg_constant(1) || !ctx->is_arg_constant(2)) { + return; + } + std::unique_ptr state(new StConstructState()); + DoubleVal* lng = reinterpret_cast(ctx->get_constant_arg(0)); + DoubleVal* lat = reinterpret_cast(ctx->get_constant_arg(1)); + DoubleVal* radius = reinterpret_cast(ctx->get_constant_arg(2)); + if (lng->is_null || lat->is_null || radius->is_null) { + state->is_null = true; + } else { + std::unique_ptr circle(new GeoCircle()); + auto res = circle->init(lng->val, lat->val, radius->val); + if (res != GEO_PARSE_OK) { + state->is_null = true; + } else { + circle->encode_to(&state->encoded_buf); + } + } + ctx->set_function_state(scope, state.release()); +} + +doris_udf::StringVal GeoFunctions::st_circle(FunctionContext* ctx, + const DoubleVal& lng, + const DoubleVal& lat, + const DoubleVal& radius) { + if (lng.is_null || lat.is_null || radius.is_null) { + return StringVal::null(); + } + StConstructState* state = (StConstructState*) ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL); + if (state == nullptr) { + std::unique_ptr circle(new GeoCircle()); + auto res = circle->init(lng.val, lat.val, radius.val); + if (res != GEO_PARSE_OK) { + return StringVal::null(); + } + std::string buf; + circle->encode_to(&buf); + StringVal result(ctx, buf.size()); + memcpy(result.ptr, buf.data(), buf.size()); + return result; + } else { + if (state->is_null) { + return StringVal::null(); + } + StringVal result((uint8_t*)state->encoded_buf.data(), state->encoded_buf.size()); + return result; + } +} + +struct StContainsState { + StContainsState() : is_null(false), shapes{nullptr, nullptr} { } + ~StContainsState() { + delete shapes[0]; + delete shapes[1]; + } + bool is_null; + GeoShape* shapes[2]; +}; + +void GeoFunctions::st_contains_prepare(doris_udf::FunctionContext* ctx, + doris_udf::FunctionContext::FunctionStateScope scope) { + if (scope != FunctionContext::FRAGMENT_LOCAL) { + return; + } + if (!ctx->is_arg_constant(0) && !ctx->is_arg_constant(1)) { + return; + } + std::unique_ptr contains_ctx(new StContainsState()); + for (int i = 0; !contains_ctx->is_null && i < 2; ++i) { + if (ctx->is_arg_constant(i)) { + StringVal* str = reinterpret_cast(ctx->get_constant_arg(i)); + if (str->is_null) { + contains_ctx->is_null = true; + } else { + contains_ctx->shapes[i] = GeoShape::from_encoded(str->ptr, str->len); + if (contains_ctx->shapes[i] == nullptr) { + contains_ctx->is_null = true; + } + } + } + } + ctx->set_function_state(scope, contains_ctx.release()); +} + +void GeoFunctions::st_contains_close(doris_udf::FunctionContext* ctx, + doris_udf::FunctionContext::FunctionStateScope scope) { + if (scope != FunctionContext::FRAGMENT_LOCAL) { + return; + } + StContainsState* contains_ctx = reinterpret_cast(ctx->get_function_state(scope)); + delete contains_ctx; +} + +doris_udf::BooleanVal GeoFunctions::st_contains( + doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& lhs, + const doris_udf::StringVal& rhs) { + if (lhs.is_null || rhs.is_null) { + return BooleanVal::null(); + } + const StContainsState* state = reinterpret_cast( + ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + if (state != nullptr && state->is_null) { + return BooleanVal::null(); + } + GeoShape* shapes[2] = {nullptr, nullptr}; + const StringVal* strs[2] = {&lhs, &rhs}; + // use this to delete new + StContainsState local_state; + for (int i = 0; i < 2; ++i) { + if (state != nullptr && state->shapes[i] != nullptr) { + shapes[i] = state->shapes[i]; + } else { + shapes[i] = local_state.shapes[i] = GeoShape::from_encoded(strs[i]->ptr, strs[i]->len); + if (shapes[i] == nullptr) { + return BooleanVal::null(); + } + } + } + + return shapes[0]->contains(shapes[1]); +} + +} diff --git a/be/src/geo/geo_functions.h b/be/src/geo/geo_functions.h new file mode 100644 index 00000000000000..30be9f0e3fda4f --- /dev/null +++ b/be/src/geo/geo_functions.h @@ -0,0 +1,113 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "udf/udf.h" + +#include "geo/geo_common.h" + +namespace doris { + +class GeoFunctions { +public: + static void init(); + + // compute distance between two points in earth sphere + static DoubleVal st_distance_sphere(FunctionContext* ctx, + const DoubleVal& x_lng, const DoubleVal& x_lat, + const DoubleVal& y_lng, const DoubleVal& y_lat); + + // point + static doris_udf::StringVal st_point(doris_udf::FunctionContext* ctx, + const doris_udf::DoubleVal& x, + const doris_udf::DoubleVal& y); + + static doris_udf::DoubleVal st_x(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& point); + static doris_udf::DoubleVal st_y(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& point); + + // to wkt + static doris_udf::StringVal st_as_wkt(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& shape); + // from wkt + static void st_from_wkt_prepare_common(doris_udf::FunctionContext*, + doris_udf::FunctionContext::FunctionStateScope, + GeoShapeType shape_type); + static void st_from_wkt_close(doris_udf::FunctionContext*, + doris_udf::FunctionContext::FunctionStateScope); + static doris_udf::StringVal st_from_wkt_common(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& wkt, + GeoShapeType shape_type); + + static void st_from_wkt_prepare(doris_udf::FunctionContext* ctx, + doris_udf::FunctionContext::FunctionStateScope scope) __attribute__ ((used)) { + st_from_wkt_prepare_common(ctx, scope, GEO_SHAPE_ANY); + } + static doris_udf::StringVal st_from_wkt( + doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& wkt) __attribute__ ((used)) { + return st_from_wkt_common(ctx, wkt, GEO_SHAPE_ANY); + } + + // for line + static void st_line_prepare( + doris_udf::FunctionContext* ctx, + doris_udf::FunctionContext::FunctionStateScope scope) __attribute__ ((used)) { + st_from_wkt_prepare_common(ctx, scope, GEO_SHAPE_LINE_STRING); + } + static doris_udf::StringVal st_line( + doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& wkt) __attribute__ ((used)) { + return st_from_wkt_common(ctx, wkt, GEO_SHAPE_LINE_STRING); + } + + // for polygon + static void st_polygon_prepare( + doris_udf::FunctionContext* ctx, + doris_udf::FunctionContext::FunctionStateScope scope) __attribute__ ((used)) { + st_from_wkt_prepare_common(ctx, scope, GEO_SHAPE_POLYGON); + } + static doris_udf::StringVal st_polygon( + doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& wkt) __attribute__ ((used)) { + return st_from_wkt_common(ctx, wkt, GEO_SHAPE_POLYGON); + } + + // for circle + static doris_udf::StringVal st_circle(doris_udf::FunctionContext* ctx, + const doris_udf::DoubleVal& center_lng, + const doris_udf::DoubleVal& center_lat, + const doris_udf::DoubleVal& radius_meter); + static void st_circle_prepare(doris_udf::FunctionContext*, + doris_udf::FunctionContext::FunctionStateScope); + + // Returns true if and only if no points of the second geometry + // lie in the exterior of the first geometry, and at least one + // point of the interior of the first geometry lies in the + // interior of the second geometry. + static doris_udf::BooleanVal st_contains(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& lhs, + const doris_udf::StringVal& rhs); + static void st_contains_prepare(doris_udf::FunctionContext*, + doris_udf::FunctionContext::FunctionStateScope); + static void st_contains_close(doris_udf::FunctionContext*, + doris_udf::FunctionContext::FunctionStateScope); +}; + +} diff --git a/be/src/geo/geo_types.cpp b/be/src/geo/geo_types.cpp new file mode 100644 index 00000000000000..f14b1f3266edcc --- /dev/null +++ b/be/src/geo/geo_types.cpp @@ -0,0 +1,520 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "geo/geo_types.h" + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "geo/wkt_parse.h" + +namespace doris { + +void print_s2point(std::ostream& os, const S2Point& point) { + S2LatLng coord(point); + os << std::setprecision(12) << coord.lng().degrees() << " " << coord.lat().degrees(); +} + +static inline bool is_valid_lng_lat(double lng, double lat) { + return abs(lng) <= 180 && abs(lat) <= 90; +} + +// Return GEO_PARSE_OK, if and only if this can be converted to a valid S2Point +static inline GeoParseStatus to_s2point(double lng, double lat, S2Point* point) { + if (!is_valid_lng_lat(lng, lat)) { + return GEO_PARSE_COORD_INVALID; + } + S2LatLng ll = S2LatLng::FromDegrees(lat, lng); + DCHECK(ll.is_valid()) << "invalid point, lng=" << lng << ", lat=" << lat; + *point = ll.ToPoint(); + return GEO_PARSE_OK; +} + +static inline GeoParseStatus to_s2point(const GeoCoordinate& coord, S2Point* point) { + return to_s2point(coord.x, coord.y, point); +} + +static bool is_loop_closed(const std::vector& points) { + if (points.empty()) { + return false; + } + if (points[0] != points[points.size() - 1]) { + return false; + } + return true; +} + +// remove adjacent duplicate points +static void remove_duplicate_points(std::vector* points) { + int lhs = 0; + int rhs = 1; + for (; rhs < points->size(); ++rhs) { + if ((*points)[rhs] != (*points)[lhs]) { + lhs++; + if (lhs != rhs) { + (*points)[lhs] = (*points)[rhs]; + } + } + } + points->resize(lhs + 1); +} + +static GeoParseStatus to_s2loop(const GeoCoordinateList& coords, + std::unique_ptr* loop) { + // 1. covnert all coordinates to points + std::vector points(coords.list.size()); + for (int i = 0; i < coords.list.size(); ++i) { + auto res = to_s2point(coords.list[i], &points[i]); + if (res != GEO_PARSE_OK) { + return res; + } + } + // 2. check if it is a closed loop + if (!is_loop_closed(points)) { + return GEO_PARSE_LOOP_NOT_CLOSED; + } + // 3. remove duplicate points + remove_duplicate_points(&points); + // 4. remove last point + points.resize(points.size() - 1); + // 5. check if there is enough point + if (points.size() < 3) { + return GEO_PARSE_LOOP_LACK_VERTICES; + } + loop->reset(new S2Loop(points)); + if (!(*loop)->IsValid()) { + return GEO_PARSE_LOOP_INVALID; + } + (*loop)->Normalize(); + return GEO_PARSE_OK; +} + +static GeoParseStatus to_s2polyline(const GeoCoordinateList& coords, + std::unique_ptr* polyline) { + // 1. covnert all coordinates to points + std::vector points(coords.list.size()); + for (int i = 0; i < coords.list.size(); ++i) { + auto res = to_s2point(coords.list[i], &points[i]); + if (res != GEO_PARSE_OK) { + return res; + } + } + // 2. remove duplicate points + remove_duplicate_points(&points); + // 3. check if there is enough point + if (points.size() < 2) { + return GEO_PARSE_POLYLINE_LACK_VERTICES; + } + polyline->reset(new S2Polyline(points)); + if (!(*polyline)->IsValid()) { + return GEO_PARSE_POLYLINE_INVALID; + } + return GEO_PARSE_OK; +} + +static GeoParseStatus to_s2polygon(const GeoCoordinateListList& coords_list, + std::unique_ptr* polygon) { + std::vector> loops(coords_list.list.size()); + for (int i = 0; i < coords_list.list.size(); ++i) { + auto res = to_s2loop(*coords_list.list[i], &loops[i]); + if (res != GEO_PARSE_OK) { + return res; + } + if (i != 0 && !loops[0]->Contains(loops[i].get())) { + return GEO_PARSE_POLYGON_NOT_HOLE; + } + } + polygon->reset(new S2Polygon(std::move(loops))); + return GEO_PARSE_OK; +} + +bool GeoShape::decode_from(const void* data, size_t size) { + if (size < 2) { + return false; + } + char reserved_byte = ((const char*)data)[0]; + char type_byte = ((const char*)data)[1]; + if (reserved_byte != 0X00 || type_byte != type()) { + return false; + } + return decode((const char*)data + 2, size - 2); +} + +void GeoShape::encode_to(std::string* buf) { + // reserve a byte for future use + buf->push_back(0X00); + buf->push_back((char)type()); + encode(buf); +} + +GeoShape* GeoShape::from_wkt(const char* data, size_t size, GeoParseStatus* status) { + GeoShape* shape = nullptr; + *status = WktParse::parse_wkt(data, size, &shape); + return shape; +} + +GeoShape* GeoShape::from_encoded(const void* ptr, size_t size) { + if (size < 2 || ((const char*)ptr)[0] != 0X00) { + return nullptr; + } + std::unique_ptr shape; + switch (((const char*)ptr)[1]) { + case GEO_SHAPE_POINT: { + shape.reset(new GeoPoint()); + break; + } + case GEO_SHAPE_LINE_STRING: { + shape.reset(new GeoLine()); + break; + } + case GEO_SHAPE_POLYGON: { + shape.reset(new GeoPolygon()); + break; + } + case GEO_SHAPE_CIRCLE: { + shape.reset(new GeoCircle()); + break; + } + default: + return nullptr; + } + auto res = shape->decode((const char*)ptr + 2, size - 2); + if (!res) { + return nullptr; + } + return shape.release(); +} + +GeoParseStatus GeoPoint::from_coord(double x, double y) { + return to_s2point(x, y, &_point); +} + +GeoParseStatus GeoPoint::from_coord(const GeoCoordinate& coord) { + return to_s2point(coord, &_point); +} + +std::string GeoPoint::to_string() const { + return as_wkt(); +} + +void GeoPoint::encode(std::string* buf) { + buf->append((const char*)&_point, sizeof(_point)); +} + +bool GeoPoint::decode(const void* data, size_t size) { + if (size < sizeof(_point)) { + return false; + } + memcpy(&_point, data, size); + return true; +} + +double GeoPoint::x() const { + return S2LatLng(_point).lng().degrees(); +} + +double GeoPoint::y() const { + return S2LatLng(_point).lat().degrees(); +} + +std::string GeoPoint::as_wkt() const { + std::stringstream ss; + ss << "POINT ("; + print_s2point(ss, _point); + ss << ")"; + return ss.str(); +} + +GeoParseStatus GeoLine::from_coords(const GeoCoordinateList& list) { + return to_s2polyline(list, &_polyline); +} + +void GeoLine::encode(std::string* buf) { + Encoder encoder; + _polyline->Encode(&encoder); + buf->append(encoder.base(), encoder.length()); +} + +bool GeoLine::decode(const void* data, size_t size) { + Decoder decoder(data, size); + _polyline.reset(new S2Polyline()); + return _polyline->Decode(&decoder); +} + +GeoParseStatus GeoPolygon::from_coords(const GeoCoordinateListList& list) { + return to_s2polygon(list, &_polygon); +} + +void GeoPolygon::encode(std::string* buf) { + Encoder encoder; + _polygon->Encode(&encoder); + buf->append(encoder.base(), encoder.length()); +} + +bool GeoPolygon::decode(const void* data, size_t size) { + Decoder decoder(data, size); + _polygon.reset(new S2Polygon()); + return _polygon->Decode(&decoder); +} + +std::string GeoLine::as_wkt() const { + std::stringstream ss; + ss << "LINESTRING ("; + for (int i = 0; i < _polyline->num_vertices(); ++i) { + if (i != 0) { + ss << ", "; + } + print_s2point(ss, _polyline->vertex(i)); + } + ss << ")"; + return ss.str(); +} + +std::string GeoPolygon::as_wkt() const { + std::stringstream ss; + ss << "POLYGON ("; + for (int i = 0; i < _polygon->num_loops(); ++i) { + if (i != 0) { + ss << ", "; + } + ss << "("; + const S2Loop* loop = _polygon->loop(i); + for (int j = 0; j < loop->num_vertices(); ++j) { + if (j != 0) { + ss << ", "; + } + print_s2point(ss, loop->vertex(j)); + } + ss << ", "; + print_s2point(ss, loop->vertex(0)); + ss << ")"; + } + ss << ")"; + + return ss.str(); +} + +bool GeoPolygon::contains(const GeoShape* rhs) const { + switch (rhs->type()) { + case GEO_SHAPE_POINT: { + const GeoPoint* point = (const GeoPoint*)rhs; + return _polygon->Contains(point->point()); +#if 0 + if (_polygon->Contains(point->point())) { + return true; + } + return _polygon->MayIntersect(S2Cell(point->point())); +#endif + + } + case GEO_SHAPE_LINE_STRING: { + const GeoLine* line = (const GeoLine*)rhs; + return _polygon->Contains(*line->polyline()); + } + case GEO_SHAPE_POLYGON: { + const GeoPolygon* other = (const GeoPolygon*)rhs; + return _polygon->Contains(other->polygon()); + } +#if 0 + case GEO_SHAPE_MULTI_POINT: { + const GeoMultiPoint* multi_point = (const GeoMultiPoint*)rhs; + for (auto& point : multi_point->points()) { + if (!_polygon.Contains(point)) { + return false; + } + } + return true; + } + case GEO_SHAPE_MULTI_LINE_STRING: { + const GeoMultiLine* multi_line = (const GeoMultiLine*)rhs; + for (auto line : multi_line->lines()) { + if (!_polygon.Contains(line)) { + return false; + } + } + return true; + } + case GEO_SHAPE_MULTI_POLYGON: { + const GeoMultiPolygon* multi_polygon = (const GeoMultiPolygon*)rhs; + for (auto polygon : multi_polygon->polygons()) { + if (!_polygon.Contains(polygon)) { + return false; + } + } + return true; + } +#endif + default: + return false; + } +} + +GeoParseStatus GeoCircle::init(double lng, double lat, double radius_meter) { + S2Point center; + auto status = to_s2point(lng, lat, ¢er); + if (status != GEO_PARSE_OK) { + return status; + } + S1Angle radius = S2Earth::ToAngle(util::units::Meters(radius_meter)); + _cap.reset(new S2Cap(center, radius)); + if (!_cap->is_valid()) { + return GEO_PARSE_CIRCLE_INVALID; + } + return GEO_PARSE_OK; +} + +bool GeoCircle::contains(const GeoShape* rhs) const { + switch (rhs->type()) { + case GEO_SHAPE_POINT: { + const GeoPoint* point = (const GeoPoint*)rhs; + return _cap->Contains(point->point()); +#if 0 + if (_polygon->Contains(point->point())) { + return true; + } + return _polygon->MayIntersect(S2Cell(point->point())); +#endif + + } +#if 0 + case GEO_SHAPE_LINE_STRING: { + const GeoLine* line = (const GeoLine*)rhs; + return _polygon->Contains(*line->polyline()); + } + case GEO_SHAPE_POLYGON: { + const GeoPolygon* other = (const GeoPolygon*)rhs; + return _polygon->Contains(other->polygon()); + } + case GEO_SHAPE_MULTI_POINT: { + const GeoMultiPoint* multi_point = (const GeoMultiPoint*)rhs; + for (auto& point : multi_point->points()) { + if (!_polygon.Contains(point)) { + return false; + } + } + return true; + } + case GEO_SHAPE_MULTI_LINE_STRING: { + const GeoMultiLine* multi_line = (const GeoMultiLine*)rhs; + for (auto line : multi_line->lines()) { + if (!_polygon.Contains(line)) { + return false; + } + } + return true; + } + case GEO_SHAPE_MULTI_POLYGON: { + const GeoMultiPolygon* multi_polygon = (const GeoMultiPolygon*)rhs; + for (auto polygon : multi_polygon->polygons()) { + if (!_polygon.Contains(polygon)) { + return false; + } + } + return true; + } +#endif + default: + return false; + } +} + +void GeoCircle::encode(std::string* buf) { + Encoder encoder; + _cap->Encode(&encoder); + buf->append(encoder.base(), encoder.length()); +} + +bool GeoCircle::decode(const void* data, size_t size) { + Decoder decoder(data, size); + _cap.reset(new S2Cap()); + return _cap->Decode(&decoder); +} + +std::string GeoCircle::as_wkt() const { + std::stringstream ss; + ss << "CIRCLE (("; + print_s2point(ss, _cap->center()); + ss << "), " << S2Earth::ToMeters(_cap->radius()) << ")"; + return ss.str(); +} + +#if 0 + +template +bool GeoMultiPolygon::_contains(const T rhs) { + for (auto polygon : _polygons) { + if (polygon->Contains(point->point())) { + return true; + } + } + return false; +} + +bool GeoMultiPolygon::contains(const GeoShape* rhs) { + switch (rhs->type()) { + case GEO_SHAPE_POINT: { + const GeoPoint* point = (const GeoPoint*)rhs; + return _contains(point->point()); + } + case GEO_SHAPE_LINE_STRING: { + const GeoLine* line = (const GeoLine*)rhs; + return _contains(line->polyline()); + } + case GEO_SHAPE_POLYGON: { + const GeoPolygon* polygon = (const GeoPolygon*)rhs; + return _contains(line->polygon()); + } + case GEO_SHAPE_MULTI_POINT: { + const GeoMultiPoint* multi_point = (const GeoMultiPoint*)rhs; + for (auto point : multi_point->points()) { + if (!_contains(point)) { + return false; + } + } + return true; + } + case GEO_SHAPE_LINE_STRING: { + const GeoMultiLine* multi_line = (const GeoMultiLine*)rhs; + for (auto line : multi_line->lines()) { + if (!_contains(line)) { + return false; + } + } + return true; + } + case GEO_SHAPE_POLYGON: { + const GeoMultiPolygon* multi_polygon = (const GeoMultiPolygon*)rhs; + for (auto polygon : multi_polygon->polygons()) { + if (!_contains(polygon)) { + return false; + } + } + return true; + } + } + return false; +} +#endif + +} diff --git a/be/src/geo/geo_types.h b/be/src/geo/geo_types.h new file mode 100644 index 00000000000000..6768c01cba68eb --- /dev/null +++ b/be/src/geo/geo_types.h @@ -0,0 +1,199 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +#include "geo/geo_common.h" +#include "geo/wkt_parse_type.h" + +namespace doris { + +class GeoShape { +public: + virtual ~GeoShape() { } + + virtual GeoShapeType type() const = 0; + + // decode from serialized data + static GeoShape* from_encoded(const void* data, size_t size); + // try to construct a GeoShape from a WKT. If construct successfully, a GeoShape will + // be returned, and the client should delete it when don't need it. + // return nullptr if convert failed, and reason will be set in status + static GeoShape* from_wkt(const char* data, size_t size, GeoParseStatus* status); + + void encode_to(std::string* buf); + bool decode_from(const void* data, size_t size); + + virtual std::string as_wkt() const = 0; + + virtual bool contains(const GeoShape* rhs) const { return false; } + virtual std::string to_string() const { return ""; }; + +protected: + virtual void encode(std::string* buf) = 0; + virtual bool decode(const void* data, size_t size) = 0; +}; + +class GeoPoint : public GeoShape { +public: + GeoPoint() { } + ~GeoPoint() override { } + + GeoParseStatus from_coord(double x, double y); + GeoParseStatus from_coord(const GeoCoordinate& point); + + GeoShapeType type() const override { return GEO_SHAPE_POINT; } + + const S2Point& point() const { return _point; } + + std::string to_string() const override; + std::string as_wkt() const override; + + double x() const; + double y() const; + +protected: + void encode(std::string* buf) override; + bool decode(const void* data, size_t size) override; + +private: + S2Point _point; +}; + +class GeoLine : public GeoShape { +public: + GeoLine() { } + ~GeoLine() override { } + + GeoParseStatus from_coords(const GeoCoordinateList& list); + + GeoShapeType type() const override { return GEO_SHAPE_LINE_STRING; } + const S2Polyline* polyline() const { return _polyline.get(); } + + std::string as_wkt() const override; +protected: + void encode(std::string* buf) override; + bool decode(const void* data, size_t size) override; + +private: + std::unique_ptr _polyline; +}; + +class GeoPolygon : public GeoShape { +public: + GeoPolygon() { } + ~GeoPolygon() override { } + + GeoParseStatus from_coords(const GeoCoordinateListList& list); + + GeoShapeType type() const override { return GEO_SHAPE_POLYGON; } + const S2Polygon* polygon() const { return _polygon.get(); } + + bool contains(const GeoShape* rhs) const override; + std::string as_wkt() const override; + +protected: + void encode(std::string* buf) override; + bool decode(const void* data, size_t size) override; + +private: + std::unique_ptr _polygon; +}; + +class GeoCircle : public GeoShape { +public: + GeoCircle() { } + ~GeoCircle() { } + + GeoParseStatus init(double lng, double lat, double radius); + + GeoShapeType type() const override { return GEO_SHAPE_CIRCLE; } + + bool contains(const GeoShape* rhs) const override; + std::string as_wkt() const override; + +protected: + void encode(std::string* buf) override; + bool decode(const void* data, size_t size) override; + +private: + std::unique_ptr _cap; +}; + +#if 0 +class GeoMultiPoint : public GeoShape { +public: + GeoPolygon(); + ~GeoPolygon() override; + + GeoShapeType type() const override { return GEO_SHAPE_POLYGON; } + const std::vector& points() const { return _points; } + +private: + std::vector _points; +}; + +class GeoMultiLine : public GeoShape { +public: + GeoMultiLine(); + ~GeoMultiLine() override; + + GeoShapeType type() const override { return GEO_SHAPE_MULTI_LINE_STRING; } + const std::vector& polylines() const { return _polylines; } + +private: + std::vector _polylines; +}; + +class GeoMultiPolygon : public GeoShape { +public: + GeoMultiPolygon(); + ~GeoMultiPolygon() override; + + GeoShapeType type() const override { return GEO_SHAPE_MULTI_POLYGON; } + + const std::vector& polygons() const { return _polygons; } + + + bool contains(const GeoShape* rhs) override; +private: + std::vector _polygons; +}; + +#if 0 +class GeoEnvelope : public GeoShape { +public: +}; + +class GeoCircle : public GeoShape { +public: +}; +#endif + +#endif + +} + diff --git a/be/src/geo/wkt_lex.l b/be/src/geo/wkt_lex.l new file mode 100644 index 00000000000000..af27f5550c020d --- /dev/null +++ b/be/src/geo/wkt_lex.l @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +%{ + +#include "common/logging.h" + +namespace doris { +class GeoShape; +} + +#include +/* wkt_parse_ctx and wkt_parse_type must be ahead of wkt_yacc */ +#include "geo/wkt_parse_ctx.h" +#include "geo/wkt_parse_type.h" +#include "geo/wkt_yacc.y.hpp" + +#define YYSTYPE WKT_STYPE +#define YY_EXTRA_TYPE WktParseContext* + +%} + +%option case-insensitive +%option bison-bridge +%option reentrant +%option never-interactive +%option prefix="wkt_" +%option noyywrap +%option nounput +%option noinput + +%% + +-?(([0-9]+\.?)|([0-9]*\.?[0-9]+)([eE][-+]?[0-9]+)?) { + yylval->double_value = atof(yytext); + return NUMERIC; +} + +MULTIPOLYGON { return KW_MULTI_POLYGON; } +MULTILINESTRING { return KW_MULTI_LINESTRING; } +MULTIPOINT { return KW_MULTI_POINT; } +POLYGON { return KW_POLYGON; } +LINESTRING { return KW_LINESTRING; } +POINT { + return KW_POINT; } + +\( | +\) | +, | +\; { return yytext[0]; } + +[ \t\n\r]+ { /* ignore whitespace */ } + +. { yyterminate(); } + +%% diff --git a/be/src/geo/wkt_parse.cpp b/be/src/geo/wkt_parse.cpp new file mode 100644 index 00000000000000..5699ea97afe97e --- /dev/null +++ b/be/src/geo/wkt_parse.cpp @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "geo/wkt_parse.h" + +#include "geo/wkt_parse_ctx.h" +#include "geo/geo_types.h" +#include "geo/wkt_parse_type.h" +#include "geo/wkt_yacc.y.hpp" +#define YYSTYPE WKT_STYPE +#define YY_EXTRA_TYPE WktParseContext* +#include "geo/wkt_lex.l.h" + +namespace doris { + +GeoParseStatus WktParse::parse_wkt(const char* str, size_t len, GeoShape** shape) { + WktParseContext ctx; + // initialize lexer + wkt_lex_init_extra(&ctx, &ctx.scaninfo); + wkt__scan_bytes(str, len, ctx.scaninfo); + + // parse + auto res = wkt_parse(&ctx); + wkt_lex_destroy(ctx.scaninfo); + if (res == 0) { + *shape = ctx.shape; + } else { + if (ctx.parse_status == GEO_PARSE_OK) { + ctx.parse_status = GEO_PARSE_WKT_SYNTAX_ERROR; + } + } + return ctx.parse_status; +} + +} diff --git a/be/src/geo/wkt_parse.h b/be/src/geo/wkt_parse.h new file mode 100644 index 00000000000000..235609cbdc649a --- /dev/null +++ b/be/src/geo/wkt_parse.h @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "common/status.h" +#include "geo/geo_common.h" + +namespace doris { + +class GeoShape; + +class WktParse { +public: + // Parse WKT(Well Known Text) to a GeoShape. + // Return a valid GeoShape if input WKT is supppored. + // Return null if WKT is not supported or invalid + static GeoParseStatus parse_wkt(const char* str, size_t len, GeoShape** shape); +}; + +} diff --git a/be/src/geo/wkt_parse_ctx.h b/be/src/geo/wkt_parse_ctx.h new file mode 100644 index 00000000000000..43092e04c9e689 --- /dev/null +++ b/be/src/geo/wkt_parse_ctx.h @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "geo/geo_common.h" + +namespace doris { +class GeoShape; +} + +typedef void* yyscan_t; +struct WktParseContext { + yyscan_t scaninfo; + doris::GeoShape* shape = nullptr; + doris::GeoParseStatus parse_status = doris::GEO_PARSE_OK; +}; + diff --git a/be/src/geo/wkt_parse_type.h b/be/src/geo/wkt_parse_type.h new file mode 100644 index 00000000000000..67c66e45740096 --- /dev/null +++ b/be/src/geo/wkt_parse_type.h @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +// This file include +namespace doris { + +struct GeoCoordinate { + double x; + double y; +}; + +struct GeoCoordinateList { + void add(const GeoCoordinate& coordinate) { + list.push_back(coordinate); + } + std::vector list; +}; + +struct GeoCoordinateListList { + ~GeoCoordinateListList() { + for (auto item : list) { + delete item; + } + } + void add(GeoCoordinateList* coordinates) { + list.push_back(coordinates); + } + std::vector list; +}; + +} + diff --git a/be/src/geo/wkt_yacc.y b/be/src/geo/wkt_yacc.y new file mode 100644 index 00000000000000..d60a54f28bb42e --- /dev/null +++ b/be/src/geo/wkt_yacc.y @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +%{ +#include "common/logging.h" +#include "geo/wkt_parse_type.h" +#include "geo/geo_types.h" + +class WktParseContext; +void wkt_error(WktParseContext* ctx, const char* msg) { +} +/* forword declare this class for wkt_parse declaration in yacc.y.cpp */ +%} + +%union { + double double_value; + doris::GeoCoordinate coordinate_value; + doris::GeoCoordinateList* coordinate_list_value; + doris::GeoCoordinateListList* coordinate_list_list_value; + doris::GeoShape* shape_value; +} + +%code { +/* we need yyscan_t in WktParseContext, so we include lex.h here, + * and we shoud include this header after union define, because it + * need YYSTYPE + */ +#include "geo/wkt_lex.l.h" +/* we need WktParseContext to pass scaninfo to lexer */ +#include "geo/wkt_parse_ctx.h" + +#define WKT_LEX_PARAM ctx->scaninfo +} + +%define api.pure full +%parse-param { WktParseContext* ctx } +%lex-param { WKT_LEX_PARAM } + +/* for multi-thread */ +%define api.prefix {wkt_} +%defines + +%expect 0 + +%start shape + +/* keyword for */ +%token KW_POINT KW_LINESTRING KW_POLYGON +%token KW_MULTI_POINT KW_MULTI_LINESTRING KW_MULTI_POLYGON + +%token NUMERIC + +%type shape +%type point linestring polygon +%type coordinate +%type coordinate_list +%type coordinate_list_list + +%destructor { delete $$; } coordinate_list +%destructor { delete $$; } coordinate_list_list +%destructor { delete $$; } point +%destructor { delete $$; } linestring +%destructor { delete $$; } polygon + +%% + +shape: + point + { ctx->shape = $1; } + | linestring + { ctx->shape = $1; } + | polygon + { ctx->shape = $1; } + ; + +point: + KW_POINT '(' coordinate ')' + { + std::unique_ptr point(new doris::GeoPoint()); + ctx->parse_status = point->from_coord($3); + if (ctx->parse_status != doris::GEO_PARSE_OK) { + YYABORT; + } + $$ = point.release(); + } + ; + +linestring: + KW_LINESTRING '(' coordinate_list ')' + { + // to avoid memory leak + std::unique_ptr list($3); + std::unique_ptr line(new doris::GeoLine()); + ctx->parse_status = line->from_coords(*$3); + if (ctx->parse_status != doris::GEO_PARSE_OK) { + YYABORT; + } + $$ = line.release(); + } + ; + +polygon: + KW_POLYGON '(' coordinate_list_list ')' + { + // to avoid memory leak + std::unique_ptr list($3); + std::unique_ptr polygon(new doris::GeoPolygon()); + ctx->parse_status = polygon->from_coords(*$3); + if (ctx->parse_status != doris::GEO_PARSE_OK) { + YYABORT; + } + $$ = polygon.release(); + } + ; + +coordinate_list_list: + coordinate_list_list ',' '(' coordinate_list ')' + { + $1->add($4); + $$ = $1; + } + | '(' coordinate_list ')' + { + $$ = new doris::GeoCoordinateListList(); + $$->add($2); + } + ; + +coordinate_list: + coordinate_list ',' coordinate + { + $1->add($3); + $$ = $1; + } + | coordinate + { + $$ = new doris::GeoCoordinateList(); + $$->add($1); + } + ; + +coordinate: + NUMERIC NUMERIC + { + $$.x = $1; + $$.y = $2; + } + ; + diff --git a/be/src/testutil/CMakeLists.txt b/be/src/testutil/CMakeLists.txt index c31d6f07bdaf53..6d7b0aad5ba569 100644 --- a/be/src/testutil/CMakeLists.txt +++ b/be/src/testutil/CMakeLists.txt @@ -23,5 +23,6 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/testutil") add_library(TestUtil desc_tbl_builder.cc + function_utils.cpp ) diff --git a/be/src/testutil/function_utils.cpp b/be/src/testutil/function_utils.cpp new file mode 100644 index 00000000000000..d6572b103a93bd --- /dev/null +++ b/be/src/testutil/function_utils.cpp @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "testutil/function_utils.h" + +#include + +#include "runtime/mem_pool.h" +#include "runtime/mem_tracker.h" +#include "udf/udf_internal.h" +#include "udf/udf.h" + +namespace doris { + +FunctionUtils::FunctionUtils() { + doris_udf::FunctionContext::TypeDesc return_type; + std::vector arg_types; + _mem_tracker = new MemTracker(); + _memory_pool = new MemPool(_mem_tracker); + _fn_ctx = FunctionContextImpl::create_context( + _state, _memory_pool, return_type, arg_types, 0, false); +} + +FunctionUtils::~FunctionUtils() { + _fn_ctx->impl()->close(); + delete _fn_ctx; + delete _memory_pool; + delete _mem_tracker; +} + +} diff --git a/be/src/testutil/function_utils.h b/be/src/testutil/function_utils.h new file mode 100644 index 00000000000000..c47a5d1df12402 --- /dev/null +++ b/be/src/testutil/function_utils.h @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +namespace doris_udf { +class FunctionContext; +} + +namespace doris { + +class MemPool; +class MemTracker; +class RuntimeState; + +class FunctionUtils { +public: + FunctionUtils(); + ~FunctionUtils(); + + doris_udf::FunctionContext* get_fn_ctx() { + return _fn_ctx; + } +private: + RuntimeState* _state = nullptr; + MemTracker* _mem_tracker = nullptr; + MemPool* _memory_pool = nullptr; + doris_udf::FunctionContext* _fn_ctx = nullptr; +}; + +} diff --git a/be/test/geo/CMakeLists.txt b/be/test/geo/CMakeLists.txt new file mode 100644 index 00000000000000..bbe5dabfc9cba0 --- /dev/null +++ b/be/test/geo/CMakeLists.txt @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# where to put generated libraries +set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/test/geo") + +ADD_BE_TEST(wkt_parse_test) +ADD_BE_TEST(geo_functions_test) +ADD_BE_TEST(geo_types_test) diff --git a/be/test/geo/geo_functions_test.cpp b/be/test/geo/geo_functions_test.cpp new file mode 100644 index 00000000000000..f54d84a1086843 --- /dev/null +++ b/be/test/geo/geo_functions_test.cpp @@ -0,0 +1,329 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "geo/geo_functions.h" + +#include +#include + +#include "geo/geo_types.h" +#include "geo/wkt_parse.h" +#include "geo/wkt_parse_ctx.h" +#include "common/logging.h" +#include "testutil/function_utils.h" +#include "udf/udf.h" +#include "udf/udf_internal.h" + +namespace doris { + +class GeoFunctionsTest : public testing::Test { +public: + GeoFunctionsTest() { } + virtual ~GeoFunctionsTest() { } +}; + +TEST_F(GeoFunctionsTest, st_dist_sphere) { + FunctionUtils utils; + FunctionContext* ctx = utils.get_fn_ctx(); + { + DoubleVal x_lng(0.0); + DoubleVal x_lat(0.0); + DoubleVal y_lng(0.0); + DoubleVal y_lat(0.0); + + auto dist = GeoFunctions::st_distance_sphere(ctx, x_lng, x_lat, y_lng, y_lat); + ASSERT_EQ(0, dist.val); + } + { + DoubleVal x_lng(0.0); + DoubleVal x_lat(0.0); + DoubleVal y_lng(0.0); + DoubleVal y_lat(1.0); + + auto dist = GeoFunctions::st_distance_sphere(ctx, x_lng, x_lat, y_lng, y_lat); + LOG(INFO) << dist.val; + } +} + +TEST_F(GeoFunctionsTest, st_point) { + FunctionUtils utils; + FunctionContext* ctx = utils.get_fn_ctx(); + DoubleVal lng(113); + DoubleVal lat(64); + + auto str = GeoFunctions::st_point(ctx, lng, lat); + ASSERT_FALSE(str.is_null); + + GeoPoint point; + auto res = point.decode_from(str.ptr, str.len); + ASSERT_TRUE(res); + ASSERT_EQ(113, point.x()); + ASSERT_EQ(64, point.y()); +} + +TEST_F(GeoFunctionsTest, st_x_y) { + FunctionUtils utils; + FunctionContext* ctx = utils.get_fn_ctx(); + + GeoPoint point; + point.from_coord(134, 63); + + std::string buf; + point.encode_to(&buf); + + auto x = GeoFunctions::st_x(ctx, StringVal((uint8_t*)buf.data(), buf.size())); + auto y = GeoFunctions::st_y(ctx, StringVal((uint8_t*)buf.data(), buf.size())); + ASSERT_EQ(134, x.val); + ASSERT_EQ(63, y.val); +} + +TEST_F(GeoFunctionsTest, as_wkt) { + FunctionUtils utils; + FunctionContext* ctx = utils.get_fn_ctx(); + + GeoPoint point; + point.from_coord(134, 63); + + std::string buf; + point.encode_to(&buf); + + auto wkt = GeoFunctions::st_as_wkt(ctx, StringVal((uint8_t*)buf.data(), buf.size())); + ASSERT_STREQ("POINT (134 63)", std::string((char*)wkt.ptr, wkt.len).c_str()); +} + +TEST_F(GeoFunctionsTest, st_from_wkt) { + FunctionUtils utils; + FunctionContext* ctx = utils.get_fn_ctx(); + + GeoFunctions::st_from_wkt_prepare(ctx, FunctionContext::FRAGMENT_LOCAL); + ASSERT_EQ(nullptr, ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + std::string wkt = "POINT (10.1 20.2)"; + auto str = GeoFunctions::st_from_wkt(ctx, StringVal((uint8_t*)wkt.data(), wkt.size())); + ASSERT_FALSE(str.is_null); + GeoFunctions::st_from_wkt_close(ctx, FunctionContext::FRAGMENT_LOCAL); + + // second time + { + StringVal wkt_val((uint8_t*)wkt.data(), wkt.size()); + // push const value + std::vector const_vals; + const_vals.push_back(&wkt_val); + ctx->impl()->set_constant_args(const_vals); + + // prepare + GeoFunctions::st_from_wkt_prepare(ctx, FunctionContext::FRAGMENT_LOCAL); + ASSERT_NE(nullptr, ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + + // convert + auto str2 = GeoFunctions::st_from_wkt(ctx, StringVal((uint8_t*)wkt.data(), wkt.size())); + ASSERT_FALSE(str2.is_null); + + // close + GeoPoint point; + auto res = point.decode_from(str2.ptr, str2.len); + ASSERT_TRUE(res); + ASSERT_EQ(10.1, point.x()); + ASSERT_EQ(20.2, point.y()); + GeoFunctions::st_from_wkt_close(ctx, FunctionContext::FRAGMENT_LOCAL); + } +} + +TEST_F(GeoFunctionsTest, st_line) { + FunctionUtils utils; + FunctionContext* ctx = utils.get_fn_ctx(); + + GeoFunctions::st_from_wkt_prepare(ctx, FunctionContext::FRAGMENT_LOCAL); + + std::string wkt = "LINESTRING (10.1 20.2, 21.1 30.1)"; + auto str = GeoFunctions::st_from_wkt(ctx, StringVal((uint8_t*)wkt.data(), wkt.size())); + ASSERT_FALSE(str.is_null); + + GeoLine line; + auto res = line.decode_from(str.ptr, str.len); + ASSERT_TRUE(res); + GeoFunctions::st_from_wkt_close(ctx, FunctionContext::FRAGMENT_LOCAL); + + // second time + { + StringVal wkt_val((uint8_t*)wkt.data(), wkt.size()); + // push const value + std::vector const_vals; + const_vals.push_back(&wkt_val); + ctx->impl()->set_constant_args(const_vals); + + // prepare + GeoFunctions::st_line_prepare(ctx, FunctionContext::FRAGMENT_LOCAL); + ASSERT_NE(nullptr, ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + + // convert + auto str2 = GeoFunctions::st_line(ctx, StringVal((uint8_t*)wkt.data(), wkt.size())); + ASSERT_FALSE(str2.is_null); + + // close + GeoLine line; + auto res = line.decode_from(str2.ptr, str2.len); + ASSERT_TRUE(res); + GeoFunctions::st_from_wkt_close(ctx, FunctionContext::FRAGMENT_LOCAL); + } +} + +TEST_F(GeoFunctionsTest, st_polygon) { + FunctionUtils utils; + FunctionContext* ctx = utils.get_fn_ctx(); + + GeoFunctions::st_from_wkt_prepare(ctx, FunctionContext::FRAGMENT_LOCAL); + + std::string wkt = "POLYGON ((10 10, 50 10, 50 50, 10 50, 10 10))"; + auto str = GeoFunctions::st_from_wkt(ctx, StringVal((uint8_t*)wkt.data(), wkt.size())); + ASSERT_FALSE(str.is_null); + + // second time + { + StringVal wkt_val((uint8_t*)wkt.data(), wkt.size()); + // push const value + std::vector const_vals; + const_vals.push_back(&wkt_val); + ctx->impl()->set_constant_args(const_vals); + + // prepare + GeoFunctions::st_polygon_prepare(ctx, FunctionContext::FRAGMENT_LOCAL); + ASSERT_NE(nullptr, ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + + // convert + auto str2 = GeoFunctions::st_polygon(ctx, StringVal((uint8_t*)wkt.data(), wkt.size())); + ASSERT_FALSE(str2.is_null); + + // close + GeoPolygon polygon; + auto res = polygon.decode_from(str2.ptr, str2.len); + ASSERT_TRUE(res); + GeoFunctions::st_from_wkt_close(ctx, FunctionContext::FRAGMENT_LOCAL); + } +} + +TEST_F(GeoFunctionsTest, st_circle) { + FunctionUtils utils; + FunctionContext* ctx = utils.get_fn_ctx(); + + GeoFunctions::st_from_wkt_prepare(ctx, FunctionContext::FRAGMENT_LOCAL); + + DoubleVal lng(111); + DoubleVal lat(64); + DoubleVal radius_meter(10 * 100); + auto str = GeoFunctions::st_circle(ctx, lng, lat, radius_meter); + ASSERT_FALSE(str.is_null); + + // second time + { + // push const value + std::vector const_vals; + const_vals.push_back(&lng); + const_vals.push_back(&lat); + const_vals.push_back(&radius_meter); + ctx->impl()->set_constant_args(const_vals); + + // prepare + GeoFunctions::st_circle_prepare(ctx, FunctionContext::FRAGMENT_LOCAL); + ASSERT_NE(nullptr, ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + + // convert + auto str2 = GeoFunctions::st_circle(ctx, lng, lat, radius_meter); + ASSERT_FALSE(str2.is_null); + + // close + GeoCircle circle; + auto res = circle.decode_from(str2.ptr, str2.len); + ASSERT_TRUE(res); + GeoFunctions::st_from_wkt_close(ctx, FunctionContext::FRAGMENT_LOCAL); + } +} + +TEST_F(GeoFunctionsTest, st_poly_line_fail) { + FunctionUtils utils; + FunctionContext* ctx = utils.get_fn_ctx(); + + { + GeoFunctions::st_polygon_prepare(ctx, FunctionContext::FRAGMENT_LOCAL); + + std::string wkt = "POINT (10.1 20.2)"; + auto str = GeoFunctions::st_polygon(ctx, StringVal((uint8_t*)wkt.data(), wkt.size())); + ASSERT_TRUE(str.is_null); + GeoFunctions::st_from_wkt_close(ctx, FunctionContext::FRAGMENT_LOCAL); + } + { + GeoFunctions::st_line_prepare(ctx, FunctionContext::FRAGMENT_LOCAL); + + std::string wkt = "POINT (10.1 20.2)"; + auto str = GeoFunctions::st_line(ctx, StringVal((uint8_t*)wkt.data(), wkt.size())); + ASSERT_TRUE(str.is_null); + GeoFunctions::st_from_wkt_close(ctx, FunctionContext::FRAGMENT_LOCAL); + } +} + +TEST_F(GeoFunctionsTest, st_contains) { + FunctionUtils utils; + FunctionContext* ctx = utils.get_fn_ctx(); + + ASSERT_EQ(nullptr, ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + + std::string polygon_wkt = "POLYGON ((10 10, 50 10, 50 50, 10 50, 10 10))"; + auto polygon = GeoFunctions::st_from_wkt(ctx, StringVal((uint8_t*)polygon_wkt.data(), polygon_wkt.size())); + ASSERT_EQ(nullptr, ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + + std::string point_wkt = "POINT (25 25)"; + auto point = GeoFunctions::st_from_wkt(ctx, StringVal((uint8_t*)point_wkt.data(), point_wkt.size())); + ASSERT_EQ(nullptr, ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + + GeoFunctions::st_contains_prepare(ctx, FunctionContext::FRAGMENT_LOCAL); + ASSERT_EQ(nullptr, ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + auto res = GeoFunctions::st_contains(ctx, polygon, point); + ASSERT_TRUE(res.val); + GeoFunctions::st_contains_close(ctx, FunctionContext::FRAGMENT_LOCAL); +} + +TEST_F(GeoFunctionsTest, st_contains_cached) { + FunctionUtils utils; + FunctionContext* ctx = utils.get_fn_ctx(); + + GeoFunctions::st_from_wkt_prepare(ctx, FunctionContext::FRAGMENT_LOCAL); + + std::string polygon_wkt = "POLYGON ((10 10, 50 10, 50 50, 10 50, 10 10))"; + auto polygon = GeoFunctions::st_from_wkt(ctx, StringVal((uint8_t*)polygon_wkt.data(), polygon_wkt.size())); + std::string point_wkt = "POINT (25 25)"; + auto point = GeoFunctions::st_from_wkt(ctx, StringVal((uint8_t*)point_wkt.data(), point_wkt.size())); + + // push const value + std::vector const_vals; + const_vals.push_back(&polygon); + const_vals.push_back(&point); + ctx->impl()->set_constant_args(const_vals); + + // prepare + GeoFunctions::st_contains_prepare(ctx, FunctionContext::FRAGMENT_LOCAL); + ASSERT_NE(nullptr, ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + auto res = GeoFunctions::st_contains(ctx, polygon, point); + ASSERT_TRUE(res.val); + GeoFunctions::st_contains_close(ctx, FunctionContext::FRAGMENT_LOCAL); +} + +} + +int main(int argc, char* argv[]) { + ::testing::InitGoogleTest(&argc, argv); + FLAGS_s2debug = false; + return RUN_ALL_TESTS(); +} diff --git a/be/test/geo/geo_types_test.cpp b/be/test/geo/geo_types_test.cpp new file mode 100644 index 00000000000000..55df473b8b6b2a --- /dev/null +++ b/be/test/geo/geo_types_test.cpp @@ -0,0 +1,209 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "geo/geo_types.h" + +#include + +#include "geo/geo_types.h" +#include "geo/wkt_parse.h" +#include "geo/wkt_parse_ctx.h" +#include "s2/s2debug.h" +#include "common/logging.h" + +namespace doris { + +class GeoTypesTest : public testing::Test { +public: + GeoTypesTest() { } + virtual ~GeoTypesTest() { } +}; + +TEST_F(GeoTypesTest, point_normal) { + { + GeoPoint point; + auto status = point.from_coord(116.123, 63.546); + ASSERT_EQ(GEO_PARSE_OK, status); + ASSERT_STREQ("POINT (116.123 63.546)", point.as_wkt().c_str()); + + std::string buf; + point.encode_to(&buf); + { + std::unique_ptr point2(GeoShape::from_encoded(buf.data(), buf.size())); + ASSERT_STREQ("POINT (116.123 63.546)", point2->as_wkt().c_str()); + } + + { + buf.resize(buf.size() - 1); + std::unique_ptr point2(GeoShape::from_encoded(buf.data(), buf.size())); + ASSERT_EQ(nullptr, point2); + } + } + { + GeoPoint point; + GeoCoordinate coord; + coord.x = 116.123; + coord.y = 63.546; + auto status = point.from_coord(coord); + ASSERT_EQ(GEO_PARSE_OK, status); + ASSERT_STREQ("POINT (116.123 63.546)", point.as_wkt().c_str()); + } +} + +TEST_F(GeoTypesTest, point_invalid) { + GeoPoint point; + + auto status = point.from_coord(200, 88); + ASSERT_NE(GEO_PARSE_OK, status); +} + +TEST_F(GeoTypesTest, linestring) { + const char* wkt = "LINESTRING (30 10, 10 30, 40 40)"; + GeoParseStatus status; + std::unique_ptr line(GeoShape::from_wkt(wkt, strlen(wkt), &status)); + ASSERT_NE(nullptr, line.get()); + ASSERT_EQ(GEO_SHAPE_LINE_STRING, line->type()); + + ASSERT_STREQ(wkt, line->as_wkt().c_str()); + + std::string buf; + line->encode_to(&buf); + + { + std::unique_ptr line2(GeoShape::from_encoded(buf.data(), buf.size())); + ASSERT_STREQ(wkt, line2->as_wkt().c_str()); + } + { + buf.resize(buf.size() - 1); + std::unique_ptr line2(GeoShape::from_encoded(buf.data(), buf.size())); + ASSERT_EQ(nullptr, line2); + } +} + +TEST_F(GeoTypesTest, polygon_contains) { + const char* wkt = "POLYGON ((10 10, 50 10, 50 10, 50 50, 50 50, 10 50, 10 10))"; + GeoParseStatus status; + std::unique_ptr polygon(GeoShape::from_wkt(wkt, strlen(wkt), &status)); + ASSERT_NE(nullptr, polygon.get()); + + { + GeoPoint point; + point.from_coord(20, 20); + auto res = polygon->contains(&point); + ASSERT_TRUE(res); + } + { + GeoPoint point; + point.from_coord(5, 5); + auto res = polygon->contains(&point); + ASSERT_FALSE(res); + } + + std::string buf; + polygon->encode_to(&buf); + + { + std::unique_ptr shape(GeoShape::from_encoded(buf.data(), buf.size())); + ASSERT_EQ(GEO_SHAPE_POLYGON, shape->type()); + LOG(INFO) << "polygon=" << shape->as_wkt(); + } + + { + buf.resize(buf.size() - 1); + std::unique_ptr shape(GeoShape::from_encoded(buf.data(), buf.size())); + ASSERT_EQ(nullptr, shape); + } +} + +TEST_F(GeoTypesTest, polygon_parse_fail) { + { + const char* wkt = "POLYGON ((10 10, 50 10, 50 50, 10 50), (10 10 01))"; + GeoParseStatus status; + std::unique_ptr polygon(GeoShape::from_wkt(wkt, strlen(wkt), &status)); + ASSERT_EQ(GEO_PARSE_WKT_SYNTAX_ERROR, status); + ASSERT_EQ(nullptr, polygon.get()); + } + { + const char* wkt = "POLYGON ((10 10, 50 10, 50 50, 10 50))"; + GeoParseStatus status; + std::unique_ptr polygon(GeoShape::from_wkt(wkt, strlen(wkt), &status)); + ASSERT_EQ(GEO_PARSE_LOOP_NOT_CLOSED, status); + ASSERT_EQ(nullptr, polygon.get()); + } + { + const char* wkt = "POLYGON ((10 10, 50 10, 10 10))"; + GeoParseStatus status; + std::unique_ptr polygon(GeoShape::from_wkt(wkt, strlen(wkt), &status)); + ASSERT_EQ(GEO_PARSE_LOOP_LACK_VERTICES, status); + ASSERT_EQ(nullptr, polygon.get()); + } +} + +TEST_F(GeoTypesTest, polygon_hole_contains) { + const char* wkt = "POLYGON ((10 10, 50 10, 50 50, 10 50, 10 10), (20 20, 40 20, 40 40, 20 40, 20 20))"; + GeoParseStatus status; + std::unique_ptr polygon(GeoShape::from_wkt(wkt, strlen(wkt), &status)); + ASSERT_EQ(GEO_PARSE_OK, status); + ASSERT_NE(nullptr, polygon); + + { + GeoPoint point; + point.from_coord(15, 15); + auto res = polygon->contains(&point); + ASSERT_TRUE(res); + } + { + GeoPoint point; + point.from_coord(25, 25); + auto res = polygon->contains(&point); + ASSERT_FALSE(res); + } + { + GeoPoint point; + point.from_coord(20, 20); + auto res = polygon->contains(&point); + ASSERT_TRUE(res); + } +} + +TEST_F(GeoTypesTest, circle) { + GeoCircle circle; + auto res = circle.init(110.123, 64, 1000); + ASSERT_EQ(GEO_PARSE_OK, res); + + std::string buf; + circle.encode_to(&buf); + + { + std::unique_ptr circle2(GeoShape::from_encoded(buf.data(), buf.size())); + ASSERT_STREQ("CIRCLE ((110.123 64), 1000)", circle2->as_wkt().c_str()); + } + + { + buf.resize(buf.size() - 1); + std::unique_ptr circle2(GeoShape::from_encoded(buf.data(), buf.size())); + ASSERT_EQ(nullptr, circle2); + } +} + +} + +int main(int argc, char* argv[]) { + ::testing::InitGoogleTest(&argc, argv); + FLAGS_s2debug = false; + return RUN_ALL_TESTS(); +} diff --git a/be/test/geo/wkt_parse_test.cpp b/be/test/geo/wkt_parse_test.cpp new file mode 100644 index 00000000000000..de5d83bcebf56d --- /dev/null +++ b/be/test/geo/wkt_parse_test.cpp @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "geo/wkt_parse.h" + +#include + +#include "geo/geo_types.h" +#include "geo/wkt_parse_ctx.h" +#include "common/logging.h" + +namespace doris { + +class WktParseTest : public testing::Test { +public: + WktParseTest() { } + virtual ~WktParseTest() { } +}; + +TEST_F(WktParseTest, normal) { + const char* wkt = "POINT(1 2)"; + + GeoShape* shape = nullptr; + auto status = WktParse::parse_wkt(wkt, strlen(wkt), &shape); + ASSERT_EQ(GEO_PARSE_OK, status); + ASSERT_NE(nullptr, shape); + LOG(INFO) << "parse result: " << shape->to_string(); +} + +TEST_F(WktParseTest, invalid_wkt) { + const char* wkt = "POINT(1,2)"; + + GeoShape* shape = nullptr; + auto status = WktParse::parse_wkt(wkt, strlen(wkt), &shape); + ASSERT_NE(GEO_PARSE_OK, status); + ASSERT_EQ(nullptr, shape); +} + +} + +int main(int argc, char* argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/build.sh b/build.sh index 0217675cb35a3d..06ba6dd35f6f2f 100755 --- a/build.sh +++ b/build.sh @@ -36,7 +36,7 @@ export DORIS_HOME=${ROOT} . ${DORIS_HOME}/env.sh # build thirdparty libraries if necessary -if [[ ! -f ${DORIS_THIRDPARTY}/installed/lib/librdkafka.a ]]; then +if [[ ! -f ${DORIS_THIRDPARTY}/installed/lib/libs2.a ]]; then echo "Thirdparty libraries need to be build ..." ${DORIS_THIRDPARTY}/build-thirdparty.sh fi diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_astext.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_astext.md new file mode 100644 index 00000000000000..4b37351235b6f0 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_astext.md @@ -0,0 +1,20 @@ +# `ST_AsText`,`ST_AsWKT` + +## Syntax + +`VARCHAR ST_AsText(GEOMETRY geo)` + +## Description + +将一个几何图形转化为WKT(Well Known Text)的表示形式 + +## Examples + +``` +mysql> SELECT ST_AsText(ST_Point(24.7, 56.7)); ++---------------------------------+ +| st_astext(st_point(24.7, 56.7)) | ++---------------------------------+ +| POINT (24.7 56.7) | ++---------------------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_circle.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_circle.md new file mode 100644 index 00000000000000..073d49f6c80f86 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_circle.md @@ -0,0 +1,21 @@ +# `ST_Circle` + +## Syntax + +`GEOMETRY ST_Circle(DOUBLE center_lng, DOUBLE center_lat, DOUBLE radius)` + +## Description + +将一个WKT(Well Known Text)转化为地球球面上的一个圆。其中`center_lng`表示的圆心的经度, +`center_lat`表示的是圆心的纬度,`radius`表示的是圆的半径,单位是米 + +## Examples + +``` +mysql> SELECT ST_AsText(ST_Circle(111, 64, 10000)); ++--------------------------------------------+ +| st_astext(st_circle(111.0, 64.0, 10000.0)) | ++--------------------------------------------+ +| CIRCLE ((111 64), 10000) | ++--------------------------------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_contains.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_contains.md new file mode 100644 index 00000000000000..47e939f4099609 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_contains.md @@ -0,0 +1,27 @@ +# `ST_Contains` + +## Syntax + +`BOOL ST_Contains(GEOMETRY shape1, GEOMETRY shape2)` + +## Description + +判断几何图形shape1是否完全能够包含几何图形shape2 + +## Examples + +``` +mysql> SELECT ST_Contains(ST_Polygon("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))"), ST_Point(5, 5)); ++----------------------------------------------------------------------------------------+ +| st_contains(st_polygon('POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))'), st_point(5.0, 5.0)) | ++----------------------------------------------------------------------------------------+ +| 1 | ++----------------------------------------------------------------------------------------+ + +mysql> SELECT ST_Contains(ST_Polygon("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))"), ST_Point(50, 50)); ++------------------------------------------------------------------------------------------+ +| st_contains(st_polygon('POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))'), st_point(50.0, 50.0)) | ++------------------------------------------------------------------------------------------+ +| 0 | ++------------------------------------------------------------------------------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md new file mode 100644 index 00000000000000..4b5b034f07e814 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md @@ -0,0 +1,20 @@ +# `ST_Distance_Sphere` + +## Syntax + +`DOUBLE ST_Distance_Sphere(DOUBLE x_lng, DOUBLE x_lat, DOUBLE y_lng, DOUBLE x_lat)` + +## Description + +计算地球两点之间的球面距离,单位为 米。传入的参数分别为X点的经度,X点的纬度,Y点的经度,Y点的纬度。 + +## Examples + +``` +mysql> select st_distance_sphere(116.35620117, 39.939093, 116.4274406433, 39.9020987219); ++----------------------------------------------------------------------------+ +| st_distance_sphere(116.35620117, 39.939093, 116.4274406433, 39.9020987219) | ++----------------------------------------------------------------------------+ +| 7336.9135549995917 | ++----------------------------------------------------------------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md new file mode 100644 index 00000000000000..577d1558a09ccb --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md @@ -0,0 +1,20 @@ +# `ST_GeometryFromText`,`ST_GeomFromText` + +## Syntax + +`GEOMETRY ST_GeometryFromText(VARCHAR wkt)` + +## Description + +将一个WKT(Well Known Text)转化为对应的内存的几何形式 + +## Examples + +``` +mysql> SELECT ST_AsText(ST_GeometryFromText("LINESTRING (1 1, 2 2)")); ++---------------------------------------------------------+ +| st_astext(st_geometryfromtext('LINESTRING (1 1, 2 2)')) | ++---------------------------------------------------------+ +| LINESTRING (1 1, 2 2) | ++---------------------------------------------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_linefromtext.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_linefromtext.md new file mode 100644 index 00000000000000..40201909932ad3 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_linefromtext.md @@ -0,0 +1,20 @@ +# `ST_LineFromText`,`ST_LineStringFromText` + +## Syntax + +`GEOMETRY ST_LineFromText(VARCHAR wkt)` + +## Description + +将一个WKT(Well Known Text)转化为一个Line形式的内存表现形式 + +## Examples + +``` +mysql> SELECT ST_AsText(ST_LineFromText("LINESTRING (1 1, 2 2)")); ++---------------------------------------------------------+ +| st_astext(st_geometryfromtext('LINESTRING (1 1, 2 2)')) | ++---------------------------------------------------------+ +| LINESTRING (1 1, 2 2) | ++---------------------------------------------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_point.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_point.md new file mode 100644 index 00000000000000..9bf5e30f2178f8 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_point.md @@ -0,0 +1,21 @@ +# `ST_Point` + +## Syntax + +`POINT ST_Point(DOUBLE x, DOUBLE y)` + +## Description + +通过给定的X坐标值,Y坐标值返回对应的Point。 +当前这个值只是在球面集合上有意义,X/Y对应的是经度/纬度(longitude/latitude) + +## Examples + +``` +mysql> SELECT ST_AsText(ST_Point(24.7, 56.7)); ++---------------------------------+ +| st_astext(st_point(24.7, 56.7)) | ++---------------------------------+ +| POINT (24.7 56.7) | ++---------------------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_polygon.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_polygon.md new file mode 100644 index 00000000000000..6e5765e3e61918 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_polygon.md @@ -0,0 +1,20 @@ +# `ST_Polygon`,`ST_PolyFromText`,`ST_PolygonFromText` + +## Syntax + +`GEOMETRY ST_Polygon(VARCHAR wkt)` + +## Description + +将一个WKT(Well Known Text)转化为对应的多边形内存形式 + +## Examples + +``` +mysql> SELECT ST_AsText(ST_Polygon("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))")); ++------------------------------------------------------------------+ +| st_astext(st_polygon('POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))')) | ++------------------------------------------------------------------+ +| POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0)) | ++------------------------------------------------------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_x.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_x.md new file mode 100644 index 00000000000000..644b9ee11dc1a6 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_x.md @@ -0,0 +1,20 @@ +# `ST_X` + +## Syntax + +`DOUBLE ST_X(POINT point)` + +## Description + +当point是一个合法的POINT类型时,返回对应的X坐标值 + +## Examples + +``` +mysql> SELECT ST_X(ST_Point(24.7, 56.7)); ++----------------------------+ +| st_x(st_point(24.7, 56.7)) | ++----------------------------+ +| 24.7 | ++----------------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_y.md b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_y.md new file mode 100644 index 00000000000000..fb02ddb102d4c0 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/spatial-functions/st_y.md @@ -0,0 +1,20 @@ +# `ST_Y` + +## Syntax + +`DOUBLE ST_Y(POINT point)` + +## Description + +当point是一个合法的POINT类型时,返回对应的Y坐标值 + +## Examples + +``` +mysql> SELECT ST_Y(ST_Point(24.7, 56.7)); ++----------------------------+ +| st_y(st_point(24.7, 56.7)) | ++----------------------------+ +| 56.7 | ++----------------------------+ +``` diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 0ba161b0fad9ec..ded9dc95e7b514 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -602,7 +602,45 @@ [['md5'], 'VARCHAR', ['VARCHAR'], '_ZN5doris19EncryptionFunctions3md5EPN9doris_udf15FunctionContextERKNS1_9StringValE'], [['md5sum'], 'VARCHAR', ['VARCHAR', '...'], - '_ZN5doris19EncryptionFunctions6md5sumEPN9doris_udf15FunctionContextEiPKNS1_9StringValE'] + '_ZN5doris19EncryptionFunctions6md5sumEPN9doris_udf15FunctionContextEiPKNS1_9StringValE'], + + # geo functions + [['ST_Point'], 'VARCHAR', ['DOUBLE', 'DOUBLE'], + '_ZN5doris12GeoFunctions8st_pointEPN9doris_udf15FunctionContextERKNS1_9DoubleValES6_'], + [['ST_X'], 'DOUBLE', ['VARCHAR'], + '_ZN5doris12GeoFunctions4st_xEPN9doris_udf15FunctionContextERKNS1_9StringValE'], + [['ST_Y'], 'DOUBLE', ['VARCHAR'], + '_ZN5doris12GeoFunctions4st_yEPN9doris_udf15FunctionContextERKNS1_9StringValE'], + + [['ST_Distance_Sphere'], 'DOUBLE', ['DOUBLE', 'DOUBLE', 'DOUBLE', 'DOUBLE'], + '_ZN5doris12GeoFunctions18st_distance_sphereEPN9doris_udf15FunctionContextERKNS1_9DoubleValES6_S6_S6_'], + + [['ST_AsText', 'ST_AsWKT'], 'VARCHAR', ['VARCHAR'], + '_ZN5doris12GeoFunctions9st_as_wktEPN9doris_udf15FunctionContextERKNS1_9StringValE'], + [['ST_GeometryFromText', 'ST_GeomFromText'], 'VARCHAR', ['VARCHAR'], + '_ZN5doris12GeoFunctions11st_from_wktEPN9doris_udf15FunctionContextERKNS1_9StringValE', + '_ZN5doris12GeoFunctions19st_from_wkt_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE', + '_ZN5doris12GeoFunctions17st_from_wkt_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE'], + + [['ST_LineFromText', 'ST_LineStringFromText'], 'VARCHAR', ['VARCHAR'], + '_ZN5doris12GeoFunctions7st_lineEPN9doris_udf15FunctionContextERKNS1_9StringValE', + '_ZN5doris12GeoFunctions15st_line_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE', + '_ZN5doris12GeoFunctions17st_from_wkt_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE'], + + [['ST_Polygon', 'ST_PolyFromText', 'ST_PolygonFromText'], 'VARCHAR', ['VARCHAR'], + '_ZN5doris12GeoFunctions10st_polygonEPN9doris_udf15FunctionContextERKNS1_9StringValE', + '_ZN5doris12GeoFunctions18st_polygon_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE', + '_ZN5doris12GeoFunctions17st_from_wkt_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE'], + + [['ST_Circle'], 'VARCHAR', ['DOUBLE', 'DOUBLE', 'DOUBLE'], + '_ZN5doris12GeoFunctions9st_circleEPN9doris_udf15FunctionContextERKNS1_9DoubleValES6_S6_', + '_ZN5doris12GeoFunctions17st_circle_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE', + '_ZN5doris12GeoFunctions17st_from_wkt_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE'], + + [['ST_Contains'], 'BOOLEAN', ['VARCHAR', 'VARCHAR'], + '_ZN5doris12GeoFunctions11st_containsEPN9doris_udf15FunctionContextERKNS1_9StringValES6_', + '_ZN5doris12GeoFunctions19st_contains_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE', + '_ZN5doris12GeoFunctions17st_contains_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE'] ] invisible_functions = [ diff --git a/run-ut.sh b/run-ut.sh index 6c2f06a9833756..c867d290263360 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -158,6 +158,11 @@ ${DORIS_TEST_BINARY_DIR}/common/resource_tls_test ${DORIS_TEST_BINARY_DIR}/exprs/string_functions_test ${DORIS_TEST_BINARY_DIR}/exprs/json_function_test +## Running geo unit test +${DORIS_TEST_BINARY_DIR}/geo/geo_functions_test +${DORIS_TEST_BINARY_DIR}/geo/wkt_parse_test +${DORIS_TEST_BINARY_DIR}/geo/geo_types_test + ## Running exec unit test ${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_uncompressed_test ${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_gzip_test diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh index a3f45bbd54d530..4bcf8958b060fd 100755 --- a/thirdparty/build-thirdparty.sh +++ b/thirdparty/build-thirdparty.sh @@ -558,6 +558,25 @@ build_arrow() { cp -rf ./uriparser_ep-install/lib/liburiparser.a $TP_INSTALL_DIR/lib64/liburiparser.a } +# s2 +build_s2() { + check_if_source_exist $S2_SOURCE + cd $TP_SOURCE_DIR/s2geometry-0.9.0 + mkdir build -p && cd build + rm -rf CMakeCache.txt CMakeFiles/ + CXXFLAGS="-O3" \ + LDFLAGS="-L${TP_LIB_DIR} -static-libstdc++ -static-libgcc" \ + $CMAKE_CMD -v -DBUILD_SHARED_LIBS=0 -DCMAKE_INSTALL_PREFIX=$TP_INSTALL_DIR \ + -DCMAKE_INCLUDE_PATH="$TP_INSTALL_DIR/include" \ + -DBUILD_SHARED_LIBS=OFF \ + -DGFLAGS_ROOT_DIR="$TP_INSTALL_DIR/include" \ + -DWITH_GFLAGS=ON \ + -DGLOG_ROOT_DIR="$TP_INSTALL_DIR/include" \ + -DWITH_GLOG=ON \ + -DCMAKE_LIBRARY_PATH="$TP_INSTALL_DIR/lib;$TP_INSTALL_DIR/lib64" .. + make -j$PARALLEL && make install +} + build_llvm build_libevent build_zlib @@ -582,6 +601,7 @@ build_brpc build_rocksdb build_librdkafka build_arrow +build_s2 echo "Finihsed to build all thirdparties" diff --git a/thirdparty/download-thirdparty.sh b/thirdparty/download-thirdparty.sh index f535b3b6b95e2f..f28faff216746d 100755 --- a/thirdparty/download-thirdparty.sh +++ b/thirdparty/download-thirdparty.sh @@ -295,4 +295,12 @@ fi cd - echo "Finished patching $BRPC_SOURCE" +# s2 patch to disable shared library +cd $TP_SOURCE_DIR/$S2_SOURCE +if [ ! -f $PATCHED_MARK ]; then + patch -p1 < $TP_PATCH_DIR/s2geometry-0.9.0.patch + touch $PATCHED_MARK +fi +cd - +echo "Finished patching $S2_SOURCE" diff --git a/thirdparty/patches/s2geometry-0.9.0.patch b/thirdparty/patches/s2geometry-0.9.0.patch new file mode 100644 index 00000000000000..7f7a6466c6d7d5 --- /dev/null +++ b/thirdparty/patches/s2geometry-0.9.0.patch @@ -0,0 +1,13 @@ +diff -rupN s2geometry-0.9.0/CMakeLists.txt s2geometry-0.9.0.new/CMakeLists.txt +--- s2geometry-0.9.0/CMakeLists.txt 2019-03-05 00:53:16.000000000 +0800 ++++ s2geometry-0.9.0.new/CMakeLists.txt 2019-05-29 07:12:28.672879024 +0800 +@@ -531,6 +531,6 @@ if (BUILD_EXAMPLES) + add_subdirectory("doc/examples" examples) + endif() + +-if (${SWIG_FOUND} AND ${PYTHONLIBS_FOUND}) +- add_subdirectory("src/python" python) +-endif() ++# if (${SWIG_FOUND} AND ${PYTHONLIBS_FOUND}) ++# add_subdirectory("src/python" python) ++# endif() diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh index e7b7647373bea0..ddf2ff0797a292 100644 --- a/thirdparty/vars.sh +++ b/thirdparty/vars.sh @@ -241,6 +241,12 @@ ARROW_NAME=arrow-apache-arrow-0.13.0.tar.gz ARROW_SOURCE=arrow-apache-arrow-0.13.0 ARROW_MD5SUM="de9e00e43df0b5fae2ed92f6491cb10b" +# S2 +S2_DOWNLOAD="https://github.com/google/s2geometry/archive/v0.9.0.tar.gz" +S2_NAME=s2geometry-0.9.0.tar.gz +S2_SOURCE=s2geometry-0.9.0 +S2_MD5SUM="293552c7646193b8b4a01556808fe155" + # all thirdparties which need to be downloaded is set in array TP_ARCHIVES -export TP_ARCHIVES="LIBEVENT OPENSSL THRIFT LLVM CLANG COMPILER_RT PROTOBUF GFLAGS GLOG GTEST RAPIDJSON SNAPPY GPERFTOOLS ZLIB LZ4 BZIP LZO2 CURL RE2 BOOST MYSQL BOOST_FOR_MYSQL LEVELDB BRPC ROCKSDB LIBRDKAFKA ARROW BROTLI DOUBLE_CONVERSION URIPARSER ZSTD" +export TP_ARCHIVES="LIBEVENT OPENSSL THRIFT LLVM CLANG COMPILER_RT PROTOBUF GFLAGS GLOG GTEST RAPIDJSON SNAPPY GPERFTOOLS ZLIB LZ4 BZIP LZO2 CURL RE2 BOOST MYSQL BOOST_FOR_MYSQL LEVELDB BRPC ROCKSDB LIBRDKAFKA ARROW BROTLI DOUBLE_CONVERSION URIPARSER ZSTD S2"