diff --git a/be/src/geo/geo_types.cpp b/be/src/geo/geo_types.cpp index bee6f69ba8e816..dc27595da3bfc6 100644 --- a/be/src/geo/geo_types.cpp +++ b/be/src/geo/geo_types.cpp @@ -57,7 +57,7 @@ GeoCircle::~GeoCircle() = default; void print_s2point(std::ostream& os, const S2Point& point) { S2LatLng coord(point); - os << std::setprecision(12) << coord.lng().degrees() << " " << coord.lat().degrees(); + os << std::setprecision(15) << coord.lng().degrees() << " " << coord.lat().degrees(); } static inline bool is_valid_lng_lat(double lng, double lat) { diff --git a/be/src/geo/wkb_parse.cpp b/be/src/geo/wkb_parse.cpp index e24328d7564ac3..7b345929fc0979 100644 --- a/be/src/geo/wkb_parse.cpp +++ b/be/src/geo/wkb_parse.cpp @@ -122,108 +122,169 @@ WkbParseContext* WkbParse::read(std::istream& is, WkbParseContext* ctx) { auto size = is.tellg(); is.seekg(0, std::ios::beg); - std::vector buf(static_cast(size)); - is.read(reinterpret_cast(buf.data()), static_cast(size)); - - ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); // will default to machine endian + // Check if size is valid + if (size <= 0) { + ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; + } - ctx->shape = readGeometry(ctx).release(); + std::vector buf(static_cast(size)); + if (!is.read(reinterpret_cast(buf.data()), static_cast(size))) { + ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; + } - if (!ctx->shape) { + // Ensure we have at least one byte for byte order + if (buf.empty()) { ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; } - return ctx; -} -std::unique_ptr WkbParse::readGeometry(WkbParseContext* ctx) { - // determine byte order - unsigned char byteOrder = ctx->dis.readByte(); + // First read the byte order using machine endian + auto byteOrder = buf[0]; - // default is machine endian + // Create ByteOrderDataInStream with the correct byte order if (byteOrder == byteOrder::wkbNDR) { + ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); ctx->dis.setOrder(ByteOrderValues::ENDIAN_LITTLE); } else if (byteOrder == byteOrder::wkbXDR) { + ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); ctx->dis.setOrder(ByteOrderValues::ENDIAN_BIG); + } else { + ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; + } + + std::unique_ptr shape = readGeometry(ctx); + if (!shape) { + ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; } - uint32_t typeInt = ctx->dis.readUnsigned(); + ctx->shape = shape.release(); + return ctx; +} - uint32_t geometryType = (typeInt & 0xffff) % 1000; +std::unique_ptr WkbParse::readGeometry(WkbParseContext* ctx) { + try { + // Ensure we have enough data to read + if (ctx->dis.size() < 5) { // At least 1 byte for order and 4 bytes for type + return nullptr; + } - std::unique_ptr shape; + // Skip the byte order as we've already handled it + ctx->dis.readByte(); - switch (geometryType) { - case wkbType::wkbPoint: - shape.reset(readPoint(ctx).release()); - break; - case wkbType::wkbLine: - shape.reset(readLine(ctx).release()); - break; - case wkbType::wkbPolygon: - shape.reset(readPolygon(ctx).release()); - break; - default: + uint32_t typeInt = ctx->dis.readUnsigned(); + + // Check if geometry has SRID + bool has_srid = (typeInt & WKB_SRID_FLAG) != 0; + + // Read SRID if present + if (has_srid) { + ctx->dis.readUnsigned(); // Read and store SRID if needed + } + + // Get the base geometry type + uint32_t geometryType = typeInt & WKB_TYPE_MASK; + + std::unique_ptr shape; + + switch (geometryType) { + case wkbType::wkbPoint: + shape = readPoint(ctx); + break; + case wkbType::wkbLine: + shape = readLine(ctx); + break; + case wkbType::wkbPolygon: + shape = readPolygon(ctx); + break; + default: + return nullptr; + } + + return shape; + } catch (...) { + // Handle any exceptions from reading operations return nullptr; } - return shape; } std::unique_ptr WkbParse::readPoint(WkbParseContext* ctx) { GeoCoordinateList coords = WkbParse::readCoordinateList(1, ctx); - std::unique_ptr point = GeoPoint::create_unique(); + if (coords.list.empty()) { + return nullptr; + } - if (point->from_coord(coords.list[0]) == GEO_PARSE_OK) { - return point; - } else { + std::unique_ptr point = GeoPoint::create_unique(); + if (!point || point->from_coord(coords.list[0]) != GEO_PARSE_OK) { return nullptr; } + + return point; } std::unique_ptr WkbParse::readLine(WkbParseContext* ctx) { uint32_t size = ctx->dis.readUnsigned(); - minMemSize(wkbLine, size, ctx); + if (minMemSize(wkbLine, size, ctx) != GEO_PARSE_OK) { + return nullptr; + } GeoCoordinateList coords = WkbParse::readCoordinateList(size, ctx); - std::unique_ptr line = GeoLine::create_unique(); + if (coords.list.empty()) { + return nullptr; + } - if (line->from_coords(coords) == GEO_PARSE_OK) { - return line; - } else { + std::unique_ptr line = GeoLine::create_unique(); + if (!line || line->from_coords(coords) != GEO_PARSE_OK) { return nullptr; } + + return line; } std::unique_ptr WkbParse::readPolygon(WkbParseContext* ctx) { uint32_t num_loops = ctx->dis.readUnsigned(); - minMemSize(wkbPolygon, num_loops, ctx); + if (minMemSize(wkbPolygon, num_loops, ctx) != GEO_PARSE_OK) { + return nullptr; + } + GeoCoordinateListList coordss; - for (int i = 0; i < num_loops; ++i) { + for (uint32_t i = 0; i < num_loops; ++i) { uint32_t size = ctx->dis.readUnsigned(); - GeoCoordinateList* coords = new GeoCoordinateList(); + if (size < 3) { // A polygon loop must have at least 3 points + return nullptr; + } + + auto coords = std::make_unique(); *coords = WkbParse::readCoordinateList(size, ctx); - coordss.add(coords); + if (coords->list.empty()) { + return nullptr; + } + coordss.add(coords.release()); } std::unique_ptr polygon = GeoPolygon::create_unique(); - - if (polygon->from_coords(coordss) == GEO_PARSE_OK) { - return polygon; - } else { + if (!polygon || polygon->from_coords(coordss) != GEO_PARSE_OK) { return nullptr; } + + return polygon; } GeoCoordinateList WkbParse::readCoordinateList(unsigned size, WkbParseContext* ctx) { GeoCoordinateList coords; for (uint32_t i = 0; i < size; i++) { - readCoordinate(ctx); + if (!readCoordinate(ctx)) { + return GeoCoordinateList(); + } unsigned int j = 0; GeoCoordinate coord; coord.x = ctx->ordValues[j++]; coord.y = ctx->ordValues[j++]; coords.add(coord); } - return coords; } diff --git a/be/src/geo/wkb_parse.h b/be/src/geo/wkb_parse.h index e03dddf97a3964..de6d0e9b2d97c9 100644 --- a/be/src/geo/wkb_parse.h +++ b/be/src/geo/wkb_parse.h @@ -17,8 +17,7 @@ #pragma once -#include - +#include #include #include @@ -34,6 +33,17 @@ class GeoLine; class GeoPoint; class GeoPolygon; +// WKB format constants +// According to OpenGIS Implementation Specification: +// The high bit of the type value is set to 1 if the WKB contains a SRID. +// Reference: OpenGIS Implementation Specification for Geographic information - Simple feature access - Part 1: Common architecture +// Bit mask to check if WKB contains SRID +constexpr uint32_t WKB_SRID_FLAG = 0x20000000; + +// The geometry type is stored in the least significant byte of the type value +// Bit mask to extract the base geometry type +constexpr uint32_t WKB_TYPE_MASK = 0xFF; + class WkbParse { public: static GeoParseStatus parse_wkb(std::istream& is, GeoShape** shape); diff --git a/regression-test/data/correctness_p0/test_select_constant.out b/regression-test/data/correctness_p0/test_select_constant.out index 33c56d3a22cb02..6737e373d16b89 100644 --- a/regression-test/data/correctness_p0/test_select_constant.out +++ b/regression-test/data/correctness_p0/test_select_constant.out @@ -3,7 +3,7 @@ 100 test 2021-01-02 -- !select_geo1 -- -POINT (123.123456789 89.123456789) +POINT (123.123456789012 89.123456789) -- !select2 -- 20230101 diff --git a/regression-test/data/nereids_function_p0/scalar_function/S.out b/regression-test/data/nereids_function_p0/scalar_function/S.out index 53a5a1639a747f..b346ae022a0808 100644 --- a/regression-test/data/nereids_function_p0/scalar_function/S.out +++ b/regression-test/data/nereids_function_p0/scalar_function/S.out @@ -1335,32 +1335,32 @@ POINT (98.35620117 36.939093) -- !sql_st_circle_Double_Double_Double -- \N -CIRCLE ((-46.35620117 39.939093), 3.32100009918) -CIRCLE ((-74.35620117 79.939093), 66.3209991455) -CIRCLE ((126.35620117 -39.939093), 5.30000019073) -CIRCLE ((16.35620117 19.939093), 7.32100009918) -CIRCLE ((43.35620117 35.939093), 2.32100009918) -CIRCLE ((47.35620117 26.939093), 33.3209991455) -CIRCLE ((5 5), 4.32100009918) -CIRCLE ((90.35620117 39.939093), 100.320999146) -CIRCLE ((90.35620117 47.939093), 88.3209991455) -CIRCLE ((90.35620117 49.939093), 76.3209991455) -CIRCLE ((90.35620117 59.939093), 75.3209991455) -CIRCLE ((98.35620117 36.939093), 45.3209991455) +CIRCLE ((-46.35620117 39.939093), 3.32100009918213) +CIRCLE ((-74.35620117 79.939093), 66.3209991455078) +CIRCLE ((126.35620117 -39.939093), 5.30000019073486) +CIRCLE ((16.35620117 19.939093), 7.32100009918213) +CIRCLE ((43.35620117 35.939093), 2.32100009918213) +CIRCLE ((47.35620117 26.939093), 33.3209991455078) +CIRCLE ((5 5), 4.32100009918213) +CIRCLE ((90.35620117 39.939093), 100.320999145508) +CIRCLE ((90.35620117 47.939093), 88.3209991455078) +CIRCLE ((90.35620117 49.939093), 76.3209991455078) +CIRCLE ((90.35620117 59.939093), 75.3209991455078) +CIRCLE ((98.35620117 36.939093), 45.3209991455078) -- !sql_st_circle_Double_Double_Double_notnull -- -CIRCLE ((-46.35620117 39.939093), 3.32100009918) -CIRCLE ((-74.35620117 79.939093), 66.3209991455) -CIRCLE ((126.35620117 -39.939093), 5.30000019073) -CIRCLE ((16.35620117 19.939093), 7.32100009918) -CIRCLE ((43.35620117 35.939093), 2.32100009918) -CIRCLE ((47.35620117 26.939093), 33.3209991455) -CIRCLE ((5 5), 4.32100009918) -CIRCLE ((90.35620117 39.939093), 100.320999146) -CIRCLE ((90.35620117 47.939093), 88.3209991455) -CIRCLE ((90.35620117 49.939093), 76.3209991455) -CIRCLE ((90.35620117 59.939093), 75.3209991455) -CIRCLE ((98.35620117 36.939093), 45.3209991455) +CIRCLE ((-46.35620117 39.939093), 3.32100009918213) +CIRCLE ((-74.35620117 79.939093), 66.3209991455078) +CIRCLE ((126.35620117 -39.939093), 5.30000019073486) +CIRCLE ((16.35620117 19.939093), 7.32100009918213) +CIRCLE ((43.35620117 35.939093), 2.32100009918213) +CIRCLE ((47.35620117 26.939093), 33.3209991455078) +CIRCLE ((5 5), 4.32100009918213) +CIRCLE ((90.35620117 39.939093), 100.320999145508) +CIRCLE ((90.35620117 47.939093), 88.3209991455078) +CIRCLE ((90.35620117 49.939093), 76.3209991455078) +CIRCLE ((90.35620117 59.939093), 75.3209991455078) +CIRCLE ((98.35620117 36.939093), 45.3209991455078) -- !sql_st_contains_Varchar_Varchar -- \N diff --git a/regression-test/data/nereids_p0/test_select_constant.out b/regression-test/data/nereids_p0/test_select_constant.out index cb391ffb1210fc..6faa26295a1359 100644 --- a/regression-test/data/nereids_p0/test_select_constant.out +++ b/regression-test/data/nereids_p0/test_select_constant.out @@ -3,5 +3,5 @@ 100 test 2021-01-02 -- !select_geo1 -- -POINT (123.123456789 89.123456789) +POINT (123.123456789012 89.123456789) diff --git a/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out b/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out index db1b1ffcae52d3..59bc628249f030 100644 --- a/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out +++ b/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out @@ -113,3 +113,18 @@ LINESTRING (1 1, 2 2) -- !sql -- POLYGON ((114.104486 22.547119, 114.093758 22.547753, 114.096504 22.532057, 114.104229 22.539826, 114.106203 22.54268, 114.104486 22.547119)) +-- !sql -- +POINT (117.194767000297 36.46326301008) + +-- !sql -- +1.0 + +-- !sql -- +POINT (1.23456789 2.34567891) + +-- !sql -- +0.9999999999999 + +-- !sql -- +1.0000000000001 + diff --git a/regression-test/data/query_p0/test_select_constant.out b/regression-test/data/query_p0/test_select_constant.out index cb391ffb1210fc..6faa26295a1359 100644 --- a/regression-test/data/query_p0/test_select_constant.out +++ b/regression-test/data/query_p0/test_select_constant.out @@ -3,5 +3,5 @@ 100 test 2021-01-02 -- !select_geo1 -- -POINT (123.123456789 89.123456789) +POINT (123.123456789012 89.123456789) diff --git a/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy b/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy index f76cb44cb4ad4b..81eadfb0cc039d 100644 --- a/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy @@ -70,4 +70,10 @@ suite("test_gis_function", "arrow_flight_sql") { qt_sql "SELECT ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_GeometryFromText(\"LINESTRING (1 1, 2 2)\"))));" qt_sql "SELECT ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_Polygon(\"POLYGON ((114.104486 22.547119,114.093758 22.547753,114.096504 22.532057,114.104229 22.539826,114.106203 22.542680,114.104486 22.547119))\"))));" + qt_sql "SELECT ST_AsText(ST_GeometryFromWKB('01010000208A11000068270210774C5D40B8DECA334C3B4240'));" + qt_sql "SELECT ST_X(ST_GeometryFromWKB(ST_AsBinary(ST_Point(1, 1))));" + qt_sql "SELECT ST_AsText(ST_GeometryFromWKB(ST_AsBinary(ST_Point(1.23456789, 2.34567891))));" + qt_sql "SELECT ST_X(ST_Point(0.9999999999999, 1));" + qt_sql "SELECT ST_Y(ST_Point(1, 1.0000000000001));" + }