From 17007c036d9e12438dafdb4b1439fd2b1ab08fcd Mon Sep 17 00:00:00 2001 From: /bin/eash Date: Thu, 9 Jan 2025 00:39:50 +0800 Subject: [PATCH 01/12] [fix](func) Fix precision loss in ST_GeometryFromWKB coordinate parsing --- be/src/geo/geo_types.cpp | 8 ++-- be/src/geo/wkb_parse.cpp | 39 +++++++++++++------ .../spatial_functions/test_gis_function.out | 5 ++- .../test_gis_function.groovy | 1 + 4 files changed, 35 insertions(+), 18 deletions(-) diff --git a/be/src/geo/geo_types.cpp b/be/src/geo/geo_types.cpp index bee6f69ba8e816..34a0df8dc28109 100644 --- a/be/src/geo/geo_types.cpp +++ b/be/src/geo/geo_types.cpp @@ -57,7 +57,7 @@ GeoCircle::~GeoCircle() = default; void print_s2point(std::ostream& os, const S2Point& point) { S2LatLng coord(point); - os << std::setprecision(12) << coord.lng().degrees() << " " << coord.lat().degrees(); + os << std::setprecision(15) << coord.lng().degrees() << " " << coord.lat().degrees(); } static inline bool is_valid_lng_lat(double lng, double lat) { @@ -316,13 +316,11 @@ bool GeoPoint::decode(const void* data, size_t size) { } double GeoPoint::x() const { - //Accurate to 13 decimal places - return std::stod(absl::StrFormat("%.13f", S2LatLng::Longitude(*_point).degrees())); + return S2LatLng::Longitude(*_point).degrees(); } double GeoPoint::y() const { - //Accurate to 13 decimal places - return std::stod(absl::StrFormat("%.13f", S2LatLng::Latitude(*_point).degrees())); + return S2LatLng::Latitude(*_point).degrees(); } std::string GeoPoint::as_wkt() const { diff --git a/be/src/geo/wkb_parse.cpp b/be/src/geo/wkb_parse.cpp index e24328d7564ac3..60f469952221e8 100644 --- a/be/src/geo/wkb_parse.cpp +++ b/be/src/geo/wkb_parse.cpp @@ -125,7 +125,20 @@ WkbParseContext* WkbParse::read(std::istream& is, WkbParseContext* ctx) { std::vector buf(static_cast(size)); is.read(reinterpret_cast(buf.data()), static_cast(size)); - ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); // will default to machine endian + // First read the byte order using machine endian + auto byteOrder = buf[0]; + + // Create ByteOrderDataInStream with the correct byte order + if (byteOrder == byteOrder::wkbNDR) { + ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); + ctx->dis.setOrder(ByteOrderValues::ENDIAN_LITTLE); + } else if (byteOrder == byteOrder::wkbXDR) { + ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); + ctx->dis.setOrder(ByteOrderValues::ENDIAN_BIG); + } else { + ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; + } ctx->shape = readGeometry(ctx).release(); @@ -136,19 +149,21 @@ WkbParseContext* WkbParse::read(std::istream& is, WkbParseContext* ctx) { } std::unique_ptr WkbParse::readGeometry(WkbParseContext* ctx) { - // determine byte order - unsigned char byteOrder = ctx->dis.readByte(); - - // default is machine endian - if (byteOrder == byteOrder::wkbNDR) { - ctx->dis.setOrder(ByteOrderValues::ENDIAN_LITTLE); - } else if (byteOrder == byteOrder::wkbXDR) { - ctx->dis.setOrder(ByteOrderValues::ENDIAN_BIG); - } + // Skip the byte order as we've already handled it + ctx->dis.readByte(); uint32_t typeInt = ctx->dis.readUnsigned(); - uint32_t geometryType = (typeInt & 0xffff) % 1000; + // Check if geometry has SRID + bool has_srid = (typeInt & 0x20000000) != 0; + + // Read SRID if present + if (has_srid) { + ctx->dis.readUnsigned(); // Read and store SRID if needed + } + + // Get the base geometry type + uint32_t geometryType = typeInt & 0xff; std::unique_ptr shape; @@ -199,7 +214,7 @@ std::unique_ptr WkbParse::readPolygon(WkbParseContext* ctx) { GeoCoordinateListList coordss; for (int i = 0; i < num_loops; ++i) { uint32_t size = ctx->dis.readUnsigned(); - GeoCoordinateList* coords = new GeoCoordinateList(); + auto* coords = new GeoCoordinateList(); *coords = WkbParse::readCoordinateList(size, ctx); coordss.add(coords); } diff --git a/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out b/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out index db1b1ffcae52d3..dffd863932c784 100644 --- a/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out +++ b/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out @@ -48,7 +48,7 @@ POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0)) POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0)) -- !sql -- -1.0 +0.9999999999999998 -- !sql -- 24.7 @@ -113,3 +113,6 @@ LINESTRING (1 1, 2 2) -- !sql -- POLYGON ((114.104486 22.547119, 114.093758 22.547753, 114.096504 22.532057, 114.104229 22.539826, 114.106203 22.54268, 114.104486 22.547119)) +-- !sql -- +POINT (117.194767000297 36.46326301008) + diff --git a/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy b/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy index f76cb44cb4ad4b..0f87c28283892a 100644 --- a/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy @@ -70,4 +70,5 @@ suite("test_gis_function", "arrow_flight_sql") { qt_sql "SELECT ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_GeometryFromText(\"LINESTRING (1 1, 2 2)\"))));" qt_sql "SELECT ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_Polygon(\"POLYGON ((114.104486 22.547119,114.093758 22.547753,114.096504 22.532057,114.104229 22.539826,114.106203 22.542680,114.104486 22.547119))\"))));" + qt_sql "SELECT ST_AsText(ST_GeometryFromWKB('01010000208A11000068270210774C5D40B8DECA334C3B4240'));" } From b55373f988e63ace636505d11ef42f347f599188 Mon Sep 17 00:00:00 2001 From: /bin/eash Date: Thu, 9 Jan 2025 02:04:26 +0800 Subject: [PATCH 02/12] fix --- be/src/geo/geo_types.cpp | 26 +++++++++++++++++-- .../spatial_functions/test_gis_function.out | 2 +- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/be/src/geo/geo_types.cpp b/be/src/geo/geo_types.cpp index 34a0df8dc28109..83c987ff10ba4e 100644 --- a/be/src/geo/geo_types.cpp +++ b/be/src/geo/geo_types.cpp @@ -66,6 +66,16 @@ static inline bool is_valid_lng_lat(double lng, double lat) { // Return GEO_PARSE_OK, if and only if this can be converted to a valid S2Point static inline GeoParseStatus to_s2point(double lng, double lat, S2Point* point) { + // Handle values very close to integers + double rounded_lng = round(lng); + if (std::abs(lng - rounded_lng) < 1e-14) { + lng = rounded_lng; + } + double rounded_lat = round(lat); + if (std::abs(lat - rounded_lat) < 1e-14) { + lat = rounded_lat; + } + if (!is_valid_lng_lat(lng, lat)) { return GEO_PARSE_COORD_INVALID; } @@ -316,11 +326,23 @@ bool GeoPoint::decode(const void* data, size_t size) { } double GeoPoint::x() const { - return S2LatLng::Longitude(*_point).degrees(); + double value = S2LatLng::Longitude(*_point).degrees(); + // Handle values very close to integers + double rounded = round(value); + if (std::abs(value - rounded) < 1e-13) { // 使用更大的阈值 + return rounded; + } + return value; } double GeoPoint::y() const { - return S2LatLng::Latitude(*_point).degrees(); + double value = S2LatLng::Latitude(*_point).degrees(); + // Handle values very close to integers + double rounded = round(value); + if (std::abs(value - rounded) < 1e-13) { // 使用更大的阈值 + return rounded; + } + return value; } std::string GeoPoint::as_wkt() const { diff --git a/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out b/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out index dffd863932c784..dccec2bc3d8adf 100644 --- a/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out +++ b/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out @@ -48,7 +48,7 @@ POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0)) POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0)) -- !sql -- -0.9999999999999998 +1.0 -- !sql -- 24.7 From 31c5d6d10a7fdda82bb94584c0320f4a52aa5da0 Mon Sep 17 00:00:00 2001 From: /bin/eash Date: Thu, 9 Jan 2025 02:07:14 +0800 Subject: [PATCH 03/12] format --- be/src/geo/geo_types.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/geo/geo_types.cpp b/be/src/geo/geo_types.cpp index 83c987ff10ba4e..80a85612dad62d 100644 --- a/be/src/geo/geo_types.cpp +++ b/be/src/geo/geo_types.cpp @@ -329,7 +329,7 @@ double GeoPoint::x() const { double value = S2LatLng::Longitude(*_point).degrees(); // Handle values very close to integers double rounded = round(value); - if (std::abs(value - rounded) < 1e-13) { // 使用更大的阈值 + if (std::abs(value - rounded) < 1e-13) { // 使用更大的阈值 return rounded; } return value; @@ -339,7 +339,7 @@ double GeoPoint::y() const { double value = S2LatLng::Latitude(*_point).degrees(); // Handle values very close to integers double rounded = round(value); - if (std::abs(value - rounded) < 1e-13) { // 使用更大的阈值 + if (std::abs(value - rounded) < 1e-13) { // 使用更大的阈值 return rounded; } return value; From 31a70db2001d9fc1a0449a3e67354e8361832a15 Mon Sep 17 00:00:00 2001 From: /bin/eash Date: Wed, 15 Jan 2025 14:27:49 +0800 Subject: [PATCH 04/12] format and comment --- be/src/geo/geo_types.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/be/src/geo/geo_types.cpp b/be/src/geo/geo_types.cpp index 80a85612dad62d..22c9c1f1bc8dee 100644 --- a/be/src/geo/geo_types.cpp +++ b/be/src/geo/geo_types.cpp @@ -329,7 +329,8 @@ double GeoPoint::x() const { double value = S2LatLng::Longitude(*_point).degrees(); // Handle values very close to integers double rounded = round(value); - if (std::abs(value - rounded) < 1e-13) { // 使用更大的阈值 + // Use a larger threshold + if (std::abs(value - rounded) < 1e-13) { return rounded; } return value; @@ -339,7 +340,8 @@ double GeoPoint::y() const { double value = S2LatLng::Latitude(*_point).degrees(); // Handle values very close to integers double rounded = round(value); - if (std::abs(value - rounded) < 1e-13) { // 使用更大的阈值 + // Use a larger threshold + if (std::abs(value - rounded) < 1e-13) { return rounded; } return value; From 1bb9949d40438702f90f2b03a3e66d85629be01a Mon Sep 17 00:00:00 2001 From: /bin/eash Date: Thu, 16 Jan 2025 15:41:08 +0800 Subject: [PATCH 05/12] save --- be/src/geo/geo_types.cpp | 30 +++---------------- .../spatial_functions/test_gis_function.out | 12 ++++++++ .../test_gis_function.groovy | 5 ++++ 3 files changed, 21 insertions(+), 26 deletions(-) diff --git a/be/src/geo/geo_types.cpp b/be/src/geo/geo_types.cpp index 22c9c1f1bc8dee..dc27595da3bfc6 100644 --- a/be/src/geo/geo_types.cpp +++ b/be/src/geo/geo_types.cpp @@ -66,16 +66,6 @@ static inline bool is_valid_lng_lat(double lng, double lat) { // Return GEO_PARSE_OK, if and only if this can be converted to a valid S2Point static inline GeoParseStatus to_s2point(double lng, double lat, S2Point* point) { - // Handle values very close to integers - double rounded_lng = round(lng); - if (std::abs(lng - rounded_lng) < 1e-14) { - lng = rounded_lng; - } - double rounded_lat = round(lat); - if (std::abs(lat - rounded_lat) < 1e-14) { - lat = rounded_lat; - } - if (!is_valid_lng_lat(lng, lat)) { return GEO_PARSE_COORD_INVALID; } @@ -326,25 +316,13 @@ bool GeoPoint::decode(const void* data, size_t size) { } double GeoPoint::x() const { - double value = S2LatLng::Longitude(*_point).degrees(); - // Handle values very close to integers - double rounded = round(value); - // Use a larger threshold - if (std::abs(value - rounded) < 1e-13) { - return rounded; - } - return value; + //Accurate to 13 decimal places + return std::stod(absl::StrFormat("%.13f", S2LatLng::Longitude(*_point).degrees())); } double GeoPoint::y() const { - double value = S2LatLng::Latitude(*_point).degrees(); - // Handle values very close to integers - double rounded = round(value); - // Use a larger threshold - if (std::abs(value - rounded) < 1e-13) { - return rounded; - } - return value; + //Accurate to 13 decimal places + return std::stod(absl::StrFormat("%.13f", S2LatLng::Latitude(*_point).degrees())); } std::string GeoPoint::as_wkt() const { diff --git a/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out b/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out index dccec2bc3d8adf..59bc628249f030 100644 --- a/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out +++ b/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out @@ -116,3 +116,15 @@ POLYGON ((114.104486 22.547119, 114.093758 22.547753, 114.096504 22.532057, 114. -- !sql -- POINT (117.194767000297 36.46326301008) +-- !sql -- +1.0 + +-- !sql -- +POINT (1.23456789 2.34567891) + +-- !sql -- +0.9999999999999 + +-- !sql -- +1.0000000000001 + diff --git a/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy b/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy index 0f87c28283892a..81eadfb0cc039d 100644 --- a/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy @@ -71,4 +71,9 @@ suite("test_gis_function", "arrow_flight_sql") { qt_sql "SELECT ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_Polygon(\"POLYGON ((114.104486 22.547119,114.093758 22.547753,114.096504 22.532057,114.104229 22.539826,114.106203 22.542680,114.104486 22.547119))\"))));" qt_sql "SELECT ST_AsText(ST_GeometryFromWKB('01010000208A11000068270210774C5D40B8DECA334C3B4240'));" + qt_sql "SELECT ST_X(ST_GeometryFromWKB(ST_AsBinary(ST_Point(1, 1))));" + qt_sql "SELECT ST_AsText(ST_GeometryFromWKB(ST_AsBinary(ST_Point(1.23456789, 2.34567891))));" + qt_sql "SELECT ST_X(ST_Point(0.9999999999999, 1));" + qt_sql "SELECT ST_Y(ST_Point(1, 1.0000000000001));" + } From 9e4f0845f57937a3332cda01edaf54fbee940199 Mon Sep 17 00:00:00 2001 From: /bin/eash Date: Thu, 16 Jan 2025 16:15:34 +0800 Subject: [PATCH 06/12] save --- regression-test/data/correctness_p0/test_select_constant.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regression-test/data/correctness_p0/test_select_constant.out b/regression-test/data/correctness_p0/test_select_constant.out index 33c56d3a22cb02..6737e373d16b89 100644 --- a/regression-test/data/correctness_p0/test_select_constant.out +++ b/regression-test/data/correctness_p0/test_select_constant.out @@ -3,7 +3,7 @@ 100 test 2021-01-02 -- !select_geo1 -- -POINT (123.123456789 89.123456789) +POINT (123.123456789012 89.123456789) -- !select2 -- 20230101 From 1da897354dc43a7c44b7363e7dde070fdb1792cd Mon Sep 17 00:00:00 2001 From: /bin/eash Date: Thu, 16 Jan 2025 18:37:22 +0800 Subject: [PATCH 07/12] save --- regression-test/data/nereids_p0/test_select_constant.out | 2 +- regression-test/data/query_p0/test_select_constant.out | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/regression-test/data/nereids_p0/test_select_constant.out b/regression-test/data/nereids_p0/test_select_constant.out index cb391ffb1210fc..6faa26295a1359 100644 --- a/regression-test/data/nereids_p0/test_select_constant.out +++ b/regression-test/data/nereids_p0/test_select_constant.out @@ -3,5 +3,5 @@ 100 test 2021-01-02 -- !select_geo1 -- -POINT (123.123456789 89.123456789) +POINT (123.123456789012 89.123456789) diff --git a/regression-test/data/query_p0/test_select_constant.out b/regression-test/data/query_p0/test_select_constant.out index cb391ffb1210fc..6faa26295a1359 100644 --- a/regression-test/data/query_p0/test_select_constant.out +++ b/regression-test/data/query_p0/test_select_constant.out @@ -3,5 +3,5 @@ 100 test 2021-01-02 -- !select_geo1 -- -POINT (123.123456789 89.123456789) +POINT (123.123456789012 89.123456789) From 8cbff9ee8b8bced3c7932398db16b913f2ac50a3 Mon Sep 17 00:00:00 2001 From: /bin/eash Date: Thu, 16 Jan 2025 21:54:04 +0800 Subject: [PATCH 08/12] save --- regression-test/data/nereids_function_p0/scalar_function/S.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regression-test/data/nereids_function_p0/scalar_function/S.out b/regression-test/data/nereids_function_p0/scalar_function/S.out index 53a5a1639a747f..0f15546f5f5600 100644 --- a/regression-test/data/nereids_function_p0/scalar_function/S.out +++ b/regression-test/data/nereids_function_p0/scalar_function/S.out @@ -1335,7 +1335,7 @@ POINT (98.35620117 36.939093) -- !sql_st_circle_Double_Double_Double -- \N -CIRCLE ((-46.35620117 39.939093), 3.32100009918) +CIRCLE ((-46.35620117 39.939093), 3.32100009918213) CIRCLE ((-74.35620117 79.939093), 66.3209991455) CIRCLE ((126.35620117 -39.939093), 5.30000019073) CIRCLE ((16.35620117 19.939093), 7.32100009918) From e3b2a498c869d7e7a0578a29836b511e397e04cc Mon Sep 17 00:00:00 2001 From: /bin/eash Date: Fri, 17 Jan 2025 20:22:01 +0800 Subject: [PATCH 09/12] save --- .../nereids_function_p0/scalar_function/S.out | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/regression-test/data/nereids_function_p0/scalar_function/S.out b/regression-test/data/nereids_function_p0/scalar_function/S.out index 0f15546f5f5600..b346ae022a0808 100644 --- a/regression-test/data/nereids_function_p0/scalar_function/S.out +++ b/regression-test/data/nereids_function_p0/scalar_function/S.out @@ -1336,31 +1336,31 @@ POINT (98.35620117 36.939093) -- !sql_st_circle_Double_Double_Double -- \N CIRCLE ((-46.35620117 39.939093), 3.32100009918213) -CIRCLE ((-74.35620117 79.939093), 66.3209991455) -CIRCLE ((126.35620117 -39.939093), 5.30000019073) -CIRCLE ((16.35620117 19.939093), 7.32100009918) -CIRCLE ((43.35620117 35.939093), 2.32100009918) -CIRCLE ((47.35620117 26.939093), 33.3209991455) -CIRCLE ((5 5), 4.32100009918) -CIRCLE ((90.35620117 39.939093), 100.320999146) -CIRCLE ((90.35620117 47.939093), 88.3209991455) -CIRCLE ((90.35620117 49.939093), 76.3209991455) -CIRCLE ((90.35620117 59.939093), 75.3209991455) -CIRCLE ((98.35620117 36.939093), 45.3209991455) +CIRCLE ((-74.35620117 79.939093), 66.3209991455078) +CIRCLE ((126.35620117 -39.939093), 5.30000019073486) +CIRCLE ((16.35620117 19.939093), 7.32100009918213) +CIRCLE ((43.35620117 35.939093), 2.32100009918213) +CIRCLE ((47.35620117 26.939093), 33.3209991455078) +CIRCLE ((5 5), 4.32100009918213) +CIRCLE ((90.35620117 39.939093), 100.320999145508) +CIRCLE ((90.35620117 47.939093), 88.3209991455078) +CIRCLE ((90.35620117 49.939093), 76.3209991455078) +CIRCLE ((90.35620117 59.939093), 75.3209991455078) +CIRCLE ((98.35620117 36.939093), 45.3209991455078) -- !sql_st_circle_Double_Double_Double_notnull -- -CIRCLE ((-46.35620117 39.939093), 3.32100009918) -CIRCLE ((-74.35620117 79.939093), 66.3209991455) -CIRCLE ((126.35620117 -39.939093), 5.30000019073) -CIRCLE ((16.35620117 19.939093), 7.32100009918) -CIRCLE ((43.35620117 35.939093), 2.32100009918) -CIRCLE ((47.35620117 26.939093), 33.3209991455) -CIRCLE ((5 5), 4.32100009918) -CIRCLE ((90.35620117 39.939093), 100.320999146) -CIRCLE ((90.35620117 47.939093), 88.3209991455) -CIRCLE ((90.35620117 49.939093), 76.3209991455) -CIRCLE ((90.35620117 59.939093), 75.3209991455) -CIRCLE ((98.35620117 36.939093), 45.3209991455) +CIRCLE ((-46.35620117 39.939093), 3.32100009918213) +CIRCLE ((-74.35620117 79.939093), 66.3209991455078) +CIRCLE ((126.35620117 -39.939093), 5.30000019073486) +CIRCLE ((16.35620117 19.939093), 7.32100009918213) +CIRCLE ((43.35620117 35.939093), 2.32100009918213) +CIRCLE ((47.35620117 26.939093), 33.3209991455078) +CIRCLE ((5 5), 4.32100009918213) +CIRCLE ((90.35620117 39.939093), 100.320999145508) +CIRCLE ((90.35620117 47.939093), 88.3209991455078) +CIRCLE ((90.35620117 49.939093), 76.3209991455078) +CIRCLE ((90.35620117 59.939093), 75.3209991455078) +CIRCLE ((98.35620117 36.939093), 45.3209991455078) -- !sql_st_contains_Varchar_Varchar -- \N From 5bea5cc4fa8438c3d44e8b5dfff99d2af5e1dbc1 Mon Sep 17 00:00:00 2001 From: /bin/eash Date: Fri, 17 Jan 2025 21:32:37 +0800 Subject: [PATCH 10/12] save --- be/src/geo/wkb_parse.cpp | 140 ++++++++++++++++++++++++++------------- 1 file changed, 93 insertions(+), 47 deletions(-) diff --git a/be/src/geo/wkb_parse.cpp b/be/src/geo/wkb_parse.cpp index 60f469952221e8..6d6a8bca76100f 100644 --- a/be/src/geo/wkb_parse.cpp +++ b/be/src/geo/wkb_parse.cpp @@ -122,8 +122,23 @@ WkbParseContext* WkbParse::read(std::istream& is, WkbParseContext* ctx) { auto size = is.tellg(); is.seekg(0, std::ios::beg); + // Check if size is valid + if (size <= 0) { + ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; + } + std::vector buf(static_cast(size)); - is.read(reinterpret_cast(buf.data()), static_cast(size)); + if (!is.read(reinterpret_cast(buf.data()), static_cast(size))) { + ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; + } + + // Ensure we have at least one byte for byte order + if (buf.empty()) { + ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; + } // First read the byte order using machine endian auto byteOrder = buf[0]; @@ -140,105 +155,136 @@ WkbParseContext* WkbParse::read(std::istream& is, WkbParseContext* ctx) { return ctx; } - ctx->shape = readGeometry(ctx).release(); - - if (!ctx->shape) { + std::unique_ptr shape = readGeometry(ctx); + if (!shape) { ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; } + + ctx->shape = shape.release(); return ctx; } std::unique_ptr WkbParse::readGeometry(WkbParseContext* ctx) { - // Skip the byte order as we've already handled it - ctx->dis.readByte(); + try { + // Ensure we have enough data to read + if (ctx->dis.size() < 5) { // At least 1 byte for order and 4 bytes for type + return nullptr; + } - uint32_t typeInt = ctx->dis.readUnsigned(); + // Skip the byte order as we've already handled it + ctx->dis.readByte(); - // Check if geometry has SRID - bool has_srid = (typeInt & 0x20000000) != 0; + uint32_t typeInt = ctx->dis.readUnsigned(); - // Read SRID if present - if (has_srid) { - ctx->dis.readUnsigned(); // Read and store SRID if needed - } + // Check if geometry has SRID + bool has_srid = (typeInt & 0x20000000) != 0; - // Get the base geometry type - uint32_t geometryType = typeInt & 0xff; + // Read SRID if present + if (has_srid) { + ctx->dis.readUnsigned(); // Read and store SRID if needed + } - std::unique_ptr shape; + // Get the base geometry type + uint32_t geometryType = typeInt & 0xff; - switch (geometryType) { - case wkbType::wkbPoint: - shape.reset(readPoint(ctx).release()); - break; - case wkbType::wkbLine: - shape.reset(readLine(ctx).release()); - break; - case wkbType::wkbPolygon: - shape.reset(readPolygon(ctx).release()); - break; - default: + std::unique_ptr shape; + + switch (geometryType) { + case wkbType::wkbPoint: + shape = readPoint(ctx); + break; + case wkbType::wkbLine: + shape = readLine(ctx); + break; + case wkbType::wkbPolygon: + shape = readPolygon(ctx); + break; + default: + return nullptr; + } + + return shape; + } catch (...) { + // Handle any exceptions from reading operations return nullptr; } - return shape; } std::unique_ptr WkbParse::readPoint(WkbParseContext* ctx) { GeoCoordinateList coords = WkbParse::readCoordinateList(1, ctx); - std::unique_ptr point = GeoPoint::create_unique(); + if (coords.list.empty()) { + return nullptr; + } - if (point->from_coord(coords.list[0]) == GEO_PARSE_OK) { - return point; - } else { + std::unique_ptr point = GeoPoint::create_unique(); + if (!point || point->from_coord(coords.list[0]) != GEO_PARSE_OK) { return nullptr; } + + return point; } std::unique_ptr WkbParse::readLine(WkbParseContext* ctx) { uint32_t size = ctx->dis.readUnsigned(); - minMemSize(wkbLine, size, ctx); + if (minMemSize(wkbLine, size, ctx) != GEO_PARSE_OK) { + return nullptr; + } GeoCoordinateList coords = WkbParse::readCoordinateList(size, ctx); - std::unique_ptr line = GeoLine::create_unique(); + if (coords.list.empty()) { + return nullptr; + } - if (line->from_coords(coords) == GEO_PARSE_OK) { - return line; - } else { + std::unique_ptr line = GeoLine::create_unique(); + if (!line || line->from_coords(coords) != GEO_PARSE_OK) { return nullptr; } + + return line; } std::unique_ptr WkbParse::readPolygon(WkbParseContext* ctx) { uint32_t num_loops = ctx->dis.readUnsigned(); - minMemSize(wkbPolygon, num_loops, ctx); + if (minMemSize(wkbPolygon, num_loops, ctx) != GEO_PARSE_OK) { + return nullptr; + } + GeoCoordinateListList coordss; - for (int i = 0; i < num_loops; ++i) { + for (uint32_t i = 0; i < num_loops; ++i) { uint32_t size = ctx->dis.readUnsigned(); - auto* coords = new GeoCoordinateList(); + if (size < 3) { // A polygon loop must have at least 3 points + return nullptr; + } + + auto coords = std::make_unique(); *coords = WkbParse::readCoordinateList(size, ctx); - coordss.add(coords); + if (coords->list.empty()) { + return nullptr; + } + coordss.add(coords.release()); } std::unique_ptr polygon = GeoPolygon::create_unique(); - - if (polygon->from_coords(coordss) == GEO_PARSE_OK) { - return polygon; - } else { + if (!polygon || polygon->from_coords(coordss) != GEO_PARSE_OK) { return nullptr; } + + return polygon; } GeoCoordinateList WkbParse::readCoordinateList(unsigned size, WkbParseContext* ctx) { GeoCoordinateList coords; for (uint32_t i = 0; i < size; i++) { - readCoordinate(ctx); + if (!readCoordinate(ctx)) { + return GeoCoordinateList(); + } unsigned int j = 0; GeoCoordinate coord; coord.x = ctx->ordValues[j++]; coord.y = ctx->ordValues[j++]; coords.add(coord); } - return coords; } From ea72e50e244eb8664f497158d6660ab6c8879f35 Mon Sep 17 00:00:00 2001 From: /bin/eash Date: Tue, 21 Jan 2025 12:20:09 +0800 Subject: [PATCH 11/12] save --- be/src/geo/wkb_parse.cpp | 4 ++-- be/src/geo/wkb_parse.h | 13 ++++++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/be/src/geo/wkb_parse.cpp b/be/src/geo/wkb_parse.cpp index 6d6a8bca76100f..7b345929fc0979 100644 --- a/be/src/geo/wkb_parse.cpp +++ b/be/src/geo/wkb_parse.cpp @@ -178,7 +178,7 @@ std::unique_ptr WkbParse::readGeometry(WkbParseContext* ctx) { uint32_t typeInt = ctx->dis.readUnsigned(); // Check if geometry has SRID - bool has_srid = (typeInt & 0x20000000) != 0; + bool has_srid = (typeInt & WKB_SRID_FLAG) != 0; // Read SRID if present if (has_srid) { @@ -186,7 +186,7 @@ std::unique_ptr WkbParse::readGeometry(WkbParseContext* ctx) { } // Get the base geometry type - uint32_t geometryType = typeInt & 0xff; + uint32_t geometryType = typeInt & WKB_TYPE_MASK; std::unique_ptr shape; diff --git a/be/src/geo/wkb_parse.h b/be/src/geo/wkb_parse.h index e03dddf97a3964..3a8688ae01f040 100644 --- a/be/src/geo/wkb_parse.h +++ b/be/src/geo/wkb_parse.h @@ -17,7 +17,7 @@ #pragma once -#include +#include #include #include @@ -34,6 +34,17 @@ class GeoLine; class GeoPoint; class GeoPolygon; +// WKB format constants +// According to OpenGIS Implementation Specification: +// The high bit of the type value is set to 1 if the WKB contains a SRID. +// Reference: OpenGIS Implementation Specification for Geographic information - Simple feature access - Part 1: Common architecture +// Bit mask to check if WKB contains SRID +constexpr uint32_t WKB_SRID_FLAG = 0x20000000; + +// The geometry type is stored in the least significant byte of the type value +// Bit mask to extract the base geometry type +constexpr uint32_t WKB_TYPE_MASK = 0xFF; + class WkbParse { public: static GeoParseStatus parse_wkb(std::istream& is, GeoShape** shape); From 8973a6887db88c1b34c8c47928f0891e8b0d7b8e Mon Sep 17 00:00:00 2001 From: /bin/eash Date: Tue, 21 Jan 2025 12:23:20 +0800 Subject: [PATCH 12/12] format --- be/src/geo/wkb_parse.h | 1 - 1 file changed, 1 deletion(-) diff --git a/be/src/geo/wkb_parse.h b/be/src/geo/wkb_parse.h index 3a8688ae01f040..de6d0e9b2d97c9 100644 --- a/be/src/geo/wkb_parse.h +++ b/be/src/geo/wkb_parse.h @@ -18,7 +18,6 @@ #pragma once #include - #include #include