Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion be/src/geo/geo_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ GeoCircle::~GeoCircle() = default;

void print_s2point(std::ostream& os, const S2Point& point) {
S2LatLng coord(point);
os << std::setprecision(12) << coord.lng().degrees() << " " << coord.lat().degrees();
os << std::setprecision(15) << coord.lng().degrees() << " " << coord.lat().degrees();
}

static inline bool is_valid_lng_lat(double lng, double lat) {
Expand Down
153 changes: 107 additions & 46 deletions be/src/geo/wkb_parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,108 +122,169 @@ WkbParseContext* WkbParse::read(std::istream& is, WkbParseContext* ctx) {
auto size = is.tellg();
is.seekg(0, std::ios::beg);

std::vector<unsigned char> buf(static_cast<size_t>(size));
is.read(reinterpret_cast<char*>(buf.data()), static_cast<std::streamsize>(size));

ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); // will default to machine endian
// Check if size is valid
if (size <= 0) {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
return ctx;
}

ctx->shape = readGeometry(ctx).release();
std::vector<unsigned char> buf(static_cast<size_t>(size));
if (!is.read(reinterpret_cast<char*>(buf.data()), static_cast<std::streamsize>(size))) {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
return ctx;
}

if (!ctx->shape) {
// Ensure we have at least one byte for byte order
if (buf.empty()) {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
return ctx;
}
return ctx;
}

std::unique_ptr<GeoShape> WkbParse::readGeometry(WkbParseContext* ctx) {
// determine byte order
unsigned char byteOrder = ctx->dis.readByte();
// First read the byte order using machine endian
auto byteOrder = buf[0];

// default is machine endian
// Create ByteOrderDataInStream with the correct byte order
if (byteOrder == byteOrder::wkbNDR) {
ctx->dis = ByteOrderDataInStream(buf.data(), buf.size());
ctx->dis.setOrder(ByteOrderValues::ENDIAN_LITTLE);
} else if (byteOrder == byteOrder::wkbXDR) {
ctx->dis = ByteOrderDataInStream(buf.data(), buf.size());
ctx->dis.setOrder(ByteOrderValues::ENDIAN_BIG);
} else {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
return ctx;
}

std::unique_ptr<GeoShape> shape = readGeometry(ctx);
if (!shape) {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
return ctx;
}

uint32_t typeInt = ctx->dis.readUnsigned();
ctx->shape = shape.release();
return ctx;
}

uint32_t geometryType = (typeInt & 0xffff) % 1000;
std::unique_ptr<GeoShape> WkbParse::readGeometry(WkbParseContext* ctx) {
try {
// Ensure we have enough data to read
if (ctx->dis.size() < 5) { // At least 1 byte for order and 4 bytes for type
return nullptr;
}

std::unique_ptr<GeoShape> shape;
// Skip the byte order as we've already handled it
ctx->dis.readByte();

switch (geometryType) {
case wkbType::wkbPoint:
shape.reset(readPoint(ctx).release());
break;
case wkbType::wkbLine:
shape.reset(readLine(ctx).release());
break;
case wkbType::wkbPolygon:
shape.reset(readPolygon(ctx).release());
break;
default:
uint32_t typeInt = ctx->dis.readUnsigned();

// Check if geometry has SRID
bool has_srid = (typeInt & WKB_SRID_FLAG) != 0;

// Read SRID if present
if (has_srid) {
ctx->dis.readUnsigned(); // Read and store SRID if needed
}

// Get the base geometry type
uint32_t geometryType = typeInt & WKB_TYPE_MASK;

std::unique_ptr<GeoShape> shape;

switch (geometryType) {
case wkbType::wkbPoint:
shape = readPoint(ctx);
break;
case wkbType::wkbLine:
shape = readLine(ctx);
break;
case wkbType::wkbPolygon:
shape = readPolygon(ctx);
break;
default:
return nullptr;
}

return shape;
} catch (...) {
// Handle any exceptions from reading operations
return nullptr;
}
return shape;
}

std::unique_ptr<GeoPoint> WkbParse::readPoint(WkbParseContext* ctx) {
GeoCoordinateList coords = WkbParse::readCoordinateList(1, ctx);
std::unique_ptr<GeoPoint> point = GeoPoint::create_unique();
if (coords.list.empty()) {
return nullptr;
}

if (point->from_coord(coords.list[0]) == GEO_PARSE_OK) {
return point;
} else {
std::unique_ptr<GeoPoint> point = GeoPoint::create_unique();
if (!point || point->from_coord(coords.list[0]) != GEO_PARSE_OK) {
return nullptr;
}

return point;
}

std::unique_ptr<GeoLine> WkbParse::readLine(WkbParseContext* ctx) {
uint32_t size = ctx->dis.readUnsigned();
minMemSize(wkbLine, size, ctx);
if (minMemSize(wkbLine, size, ctx) != GEO_PARSE_OK) {
return nullptr;
}

GeoCoordinateList coords = WkbParse::readCoordinateList(size, ctx);
std::unique_ptr<GeoLine> line = GeoLine::create_unique();
if (coords.list.empty()) {
return nullptr;
}

if (line->from_coords(coords) == GEO_PARSE_OK) {
return line;
} else {
std::unique_ptr<GeoLine> line = GeoLine::create_unique();
if (!line || line->from_coords(coords) != GEO_PARSE_OK) {
return nullptr;
}

return line;
}

std::unique_ptr<GeoPolygon> WkbParse::readPolygon(WkbParseContext* ctx) {
uint32_t num_loops = ctx->dis.readUnsigned();
minMemSize(wkbPolygon, num_loops, ctx);
if (minMemSize(wkbPolygon, num_loops, ctx) != GEO_PARSE_OK) {
return nullptr;
}

GeoCoordinateListList coordss;
for (int i = 0; i < num_loops; ++i) {
for (uint32_t i = 0; i < num_loops; ++i) {
uint32_t size = ctx->dis.readUnsigned();
GeoCoordinateList* coords = new GeoCoordinateList();
if (size < 3) { // A polygon loop must have at least 3 points
return nullptr;
}

auto coords = std::make_unique<GeoCoordinateList>();
*coords = WkbParse::readCoordinateList(size, ctx);
coordss.add(coords);
if (coords->list.empty()) {
return nullptr;
}
coordss.add(coords.release());
}

std::unique_ptr<GeoPolygon> polygon = GeoPolygon::create_unique();

if (polygon->from_coords(coordss) == GEO_PARSE_OK) {
return polygon;
} else {
if (!polygon || polygon->from_coords(coordss) != GEO_PARSE_OK) {
return nullptr;
}

return polygon;
}

GeoCoordinateList WkbParse::readCoordinateList(unsigned size, WkbParseContext* ctx) {
GeoCoordinateList coords;
for (uint32_t i = 0; i < size; i++) {
readCoordinate(ctx);
if (!readCoordinate(ctx)) {
return GeoCoordinateList();
}
unsigned int j = 0;
GeoCoordinate coord;
coord.x = ctx->ordValues[j++];
coord.y = ctx->ordValues[j++];
coords.add(coord);
}

return coords;
}

Expand Down
14 changes: 12 additions & 2 deletions be/src/geo/wkb_parse.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@

#pragma once

#include <stdint.h>

#include <cstdint>
#include <iosfwd>
#include <memory>

Expand All @@ -34,6 +33,17 @@ class GeoLine;
class GeoPoint;
class GeoPolygon;

// WKB format constants
// According to OpenGIS Implementation Specification:
// The high bit of the type value is set to 1 if the WKB contains a SRID.
// Reference: OpenGIS Implementation Specification for Geographic information - Simple feature access - Part 1: Common architecture
// Bit mask to check if WKB contains SRID
constexpr uint32_t WKB_SRID_FLAG = 0x20000000;

// The geometry type is stored in the least significant byte of the type value
// Bit mask to extract the base geometry type
constexpr uint32_t WKB_TYPE_MASK = 0xFF;

class WkbParse {
public:
static GeoParseStatus parse_wkb(std::istream& is, GeoShape** shape);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
100 test 2021-01-02

-- !select_geo1 --
POINT (123.123456789 89.123456789)
POINT (123.123456789012 89.123456789)

-- !select2 --
20230101
Expand Down
48 changes: 24 additions & 24 deletions regression-test/data/nereids_function_p0/scalar_function/S.out
Original file line number Diff line number Diff line change
Expand Up @@ -1335,32 +1335,32 @@ POINT (98.35620117 36.939093)

-- !sql_st_circle_Double_Double_Double --
\N
CIRCLE ((-46.35620117 39.939093), 3.32100009918)
CIRCLE ((-74.35620117 79.939093), 66.3209991455)
CIRCLE ((126.35620117 -39.939093), 5.30000019073)
CIRCLE ((16.35620117 19.939093), 7.32100009918)
CIRCLE ((43.35620117 35.939093), 2.32100009918)
CIRCLE ((47.35620117 26.939093), 33.3209991455)
CIRCLE ((5 5), 4.32100009918)
CIRCLE ((90.35620117 39.939093), 100.320999146)
CIRCLE ((90.35620117 47.939093), 88.3209991455)
CIRCLE ((90.35620117 49.939093), 76.3209991455)
CIRCLE ((90.35620117 59.939093), 75.3209991455)
CIRCLE ((98.35620117 36.939093), 45.3209991455)
CIRCLE ((-46.35620117 39.939093), 3.32100009918213)
CIRCLE ((-74.35620117 79.939093), 66.3209991455078)
CIRCLE ((126.35620117 -39.939093), 5.30000019073486)
CIRCLE ((16.35620117 19.939093), 7.32100009918213)
CIRCLE ((43.35620117 35.939093), 2.32100009918213)
CIRCLE ((47.35620117 26.939093), 33.3209991455078)
CIRCLE ((5 5), 4.32100009918213)
CIRCLE ((90.35620117 39.939093), 100.320999145508)
CIRCLE ((90.35620117 47.939093), 88.3209991455078)
CIRCLE ((90.35620117 49.939093), 76.3209991455078)
CIRCLE ((90.35620117 59.939093), 75.3209991455078)
CIRCLE ((98.35620117 36.939093), 45.3209991455078)

-- !sql_st_circle_Double_Double_Double_notnull --
CIRCLE ((-46.35620117 39.939093), 3.32100009918)
CIRCLE ((-74.35620117 79.939093), 66.3209991455)
CIRCLE ((126.35620117 -39.939093), 5.30000019073)
CIRCLE ((16.35620117 19.939093), 7.32100009918)
CIRCLE ((43.35620117 35.939093), 2.32100009918)
CIRCLE ((47.35620117 26.939093), 33.3209991455)
CIRCLE ((5 5), 4.32100009918)
CIRCLE ((90.35620117 39.939093), 100.320999146)
CIRCLE ((90.35620117 47.939093), 88.3209991455)
CIRCLE ((90.35620117 49.939093), 76.3209991455)
CIRCLE ((90.35620117 59.939093), 75.3209991455)
CIRCLE ((98.35620117 36.939093), 45.3209991455)
CIRCLE ((-46.35620117 39.939093), 3.32100009918213)
CIRCLE ((-74.35620117 79.939093), 66.3209991455078)
CIRCLE ((126.35620117 -39.939093), 5.30000019073486)
CIRCLE ((16.35620117 19.939093), 7.32100009918213)
CIRCLE ((43.35620117 35.939093), 2.32100009918213)
CIRCLE ((47.35620117 26.939093), 33.3209991455078)
CIRCLE ((5 5), 4.32100009918213)
CIRCLE ((90.35620117 39.939093), 100.320999145508)
CIRCLE ((90.35620117 47.939093), 88.3209991455078)
CIRCLE ((90.35620117 49.939093), 76.3209991455078)
CIRCLE ((90.35620117 59.939093), 75.3209991455078)
CIRCLE ((98.35620117 36.939093), 45.3209991455078)

-- !sql_st_contains_Varchar_Varchar --
\N
Expand Down
2 changes: 1 addition & 1 deletion regression-test/data/nereids_p0/test_select_constant.out
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
100 test 2021-01-02

-- !select_geo1 --
POINT (123.123456789 89.123456789)
POINT (123.123456789012 89.123456789)

Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,18 @@ LINESTRING (1 1, 2 2)
-- !sql --
POLYGON ((114.104486 22.547119, 114.093758 22.547753, 114.096504 22.532057, 114.104229 22.539826, 114.106203 22.54268, 114.104486 22.547119))

-- !sql --
POINT (117.194767000297 36.46326301008)

-- !sql --
1.0

-- !sql --
POINT (1.23456789 2.34567891)

-- !sql --
0.9999999999999

-- !sql --
1.0000000000001

2 changes: 1 addition & 1 deletion regression-test/data/query_p0/test_select_constant.out
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
100 test 2021-01-02

-- !select_geo1 --
POINT (123.123456789 89.123456789)
POINT (123.123456789012 89.123456789)

Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,10 @@ suite("test_gis_function", "arrow_flight_sql") {
qt_sql "SELECT ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_GeometryFromText(\"LINESTRING (1 1, 2 2)\"))));"
qt_sql "SELECT ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_Polygon(\"POLYGON ((114.104486 22.547119,114.093758 22.547753,114.096504 22.532057,114.104229 22.539826,114.106203 22.542680,114.104486 22.547119))\"))));"

qt_sql "SELECT ST_AsText(ST_GeometryFromWKB('01010000208A11000068270210774C5D40B8DECA334C3B4240'));"
qt_sql "SELECT ST_X(ST_GeometryFromWKB(ST_AsBinary(ST_Point(1, 1))));"
qt_sql "SELECT ST_AsText(ST_GeometryFromWKB(ST_AsBinary(ST_Point(1.23456789, 2.34567891))));"
qt_sql "SELECT ST_X(ST_Point(0.9999999999999, 1));"
qt_sql "SELECT ST_Y(ST_Point(1, 1.0000000000001));"

}
Loading