From 06a72b12f9302a648030ff5d6382ba4945a1453f Mon Sep 17 00:00:00 2001 From: Colin Dellow Date: Wed, 6 Dec 2023 21:24:12 -0500 Subject: [PATCH 1/2] clipping monster polygons, take 2 This replaces https://github.com/systemed/tilemaker/pull/606 Rather than dealing with the special case of fully covered or fully excluded, we keep a cache of recently clipped geometries. A higher zoom will look in the cache for a clipped geometry of the object from a lower zoom and re-use it if possible, falling back to the whole geometry only if no cache entry is found. Hudson Bay, master: 22m27s this PR: 1m28s This relies on the assumption that clipping a clipped polygon gives the same result as clipping the original polygon. That seems reasonable to me, but if you know of edge cases to consider, let me know. --- include/coordinates.h | 9 ++++++ include/tile_data.h | 6 +++- src/tile_data.cpp | 65 +++++++++++++++++++++++++++++++++++++++++-- src/tile_worker.cpp | 4 +-- 4 files changed, 78 insertions(+), 6 deletions(-) diff --git a/include/coordinates.h b/include/coordinates.h index 3ba983bd..d936db47 100644 --- a/include/coordinates.h +++ b/include/coordinates.h @@ -33,6 +33,15 @@ class TileCoordinates_ { return false; return y == obj.y; } + + bool operator <(const TileCoordinates_ & obj) const + { + if (x != obj.x) + return x < obj.x; + + return y < obj.y; + } + }; struct TileCoordinatesCompare { bool operator()(const class TileCoordinates_& a, const class TileCoordinates_& b) const { diff --git a/include/tile_data.h b/include/tile_data.h index eb17a780..caaf0392 100644 --- a/include/tile_data.h +++ b/include/tile_data.h @@ -335,7 +335,7 @@ class TileDataSource { TileCoordinates coordinates ); - Geometry buildWayGeometry(OutputGeometryType const geomType, NodeID const objectID, const TileBbox &bbox) const; + Geometry buildWayGeometry(OutputGeometryType const geomType, NodeID const objectID, const TileBbox &bbox); LatpLon buildNodeGeometry(OutputGeometryType const geomType, NodeID const objectID, const TileBbox &bbox) const; void open() { @@ -427,6 +427,10 @@ class TileDataSource { private: + std::vector, std::shared_ptr>> clipCache; + std::vector clipCacheMutex; + std::vector clipCacheSize; + void cacheClippedGeometry(const TileBbox& box, const NodeID objectID, const MultiPolygon& mp); }; TileCoordinatesSet getTilesAtZoom( diff --git a/src/tile_data.cpp b/src/tile_data.cpp index 273f559d..2c5699ef 100644 --- a/src/tile_data.cpp +++ b/src/tile_data.cpp @@ -14,7 +14,10 @@ TileDataSource::TileDataSource(size_t threadNum, unsigned int baseZoom, bool inc objectsMutex(threadNum * 4), objects(CLUSTER_ZOOM_AREA), objectsWithIds(CLUSTER_ZOOM_AREA), - baseZoom(baseZoom) + baseZoom(baseZoom), + clipCache(threadNum * 4), + clipCacheMutex(threadNum * 4), + clipCacheSize(threadNum * 4) { } @@ -129,7 +132,7 @@ void TileDataSource::collectLargeObjectsForTile( // Build node and way geometries Geometry TileDataSource::buildWayGeometry(OutputGeometryType const geomType, - NodeID const objectID, const TileBbox &bbox) const { + NodeID const objectID, const TileBbox &bbox) { switch(geomType) { case POINT_: { auto p = retrieve_point(objectID); @@ -175,7 +178,35 @@ Geometry TileDataSource::buildWayGeometry(OutputGeometryType const geomType, } case POLYGON_: { - auto const &input = retrieve_multi_polygon(objectID); + // Look for a previously clipped version at z-1, z-2, ... + std::shared_ptr cachedClip; + { + size_t zoom = bbox.zoom; + size_t x = bbox.index.x; + size_t y = bbox.index.y; + std::lock_guard lock(clipCacheMutex[objectID % clipCacheMutex.size()]); + while (zoom > 0) { + zoom--; + x /= 2; + y /= 2; + const auto& cache = clipCache[objectID % clipCache.size()]; + const auto& rv = cache.find(std::make_tuple(zoom, TileCoordinates(x, y), objectID)); + if (rv != cache.end()) { + cachedClip = rv->second; + break; + } + } + } + + MultiPolygon uncached; + + if (cachedClip == nullptr) { + // The cached multipolygon uses a non-standard allocator, so copy it + const auto &input = retrieve_multi_polygon(objectID); + boost::geometry::assign(uncached, input); + } + + const auto &input = cachedClip == nullptr ? uncached : *cachedClip; Box box = bbox.clippingBox; @@ -235,11 +266,13 @@ Geometry TileDataSource::buildWayGeometry(OutputGeometryType const geomType, MultiPolygon output; geom::intersection(input, box, output); geom::correct(output); + cacheClippedGeometry(bbox, objectID, output); return output; } else { // occasionally also wrong_topological_dimension, disconnected_interior } } + cacheClippedGeometry(bbox, objectID, mp); return mp; } @@ -248,6 +281,32 @@ Geometry TileDataSource::buildWayGeometry(OutputGeometryType const geomType, } } +void TileDataSource::cacheClippedGeometry(const TileBbox& box, const NodeID objectID, const MultiPolygon& mp) { + // The point of caching is to reuse the clip, so caching at the terminal zoom is + // pointless. + if (box.zoom == baseZoom) + return; + + std::shared_ptr copy = std::make_shared(); + boost::geometry::assign(*copy, mp); + + size_t index = objectID % clipCacheMutex.size(); + std::lock_guard lock(clipCacheMutex[index]); + auto& cache = clipCache[index]; + // In a perfect world, this would be an LRU cache and we'd evict old entries + // that are unlikely to be used again. + // + // But for now, just reset the cache every so often to prevent it growing + // without bound. + clipCacheSize[index]++; + if (clipCacheSize[index] > 5000) { + clipCacheSize[index] = 0; + cache.clear(); + } + + cache[std::make_tuple(box.zoom, box.index, objectID)] = copy; +} + LatpLon TileDataSource::buildNodeGeometry(OutputGeometryType const geomType, NodeID const objectID, const TileBbox &bbox) const { switch(geomType) { diff --git a/src/tile_worker.cpp b/src/tile_worker.cpp index a20b1b3d..df939f6b 100644 --- a/src/tile_worker.cpp +++ b/src/tile_worker.cpp @@ -96,7 +96,7 @@ void MergeIntersecting(MultiPolygon &input, MultiPolygon &to_merge) { template void CheckNextObjectAndMerge( - const TileDataSource* source, + TileDataSource* source, OutputObjectsConstIt& jt, OutputObjectsConstIt ooSameLayerEnd, const TileBbox& bbox, @@ -147,7 +147,7 @@ void RemoveInnersBelowSize(MultiPolygon &g, double filterArea) { } void ProcessObjects( - const TileDataSource* source, + TileDataSource* source, const AttributeStore& attributeStore, OutputObjectsConstIt ooSameLayerBegin, OutputObjectsConstIt ooSameLayerEnd, From 47bcf4a8d4e9a29c2b2541482142cbb046a21549 Mon Sep 17 00:00:00 2001 From: Colin Dellow Date: Wed, 6 Dec 2023 22:15:36 -0500 Subject: [PATCH 2/2] more robustness against invalid Antarctica nodes This is another fix in the vein of https://github.com/systemed/tilemaker/pull/595/commits/066ca0a91ea2981f55e63cd616af07a5dfff3125 Testing Antarctica with this PR: 9m5s The generated mbtiles doesn't seem to be convertible to a pmtiles file -- `pmtiles convert` fails with: ``` main.go:162: Failed to convert /home/cldellow/src/basemap/tiles.mbtiles, Missing row ``` I've never generated Antarctica with the previous code, so I'm not sure if this is specific to this PR or not. My intuition is this is not specific to this PR. Wild guess is that we're creating tiles with no features, due to https://github.com/systemed/tilemaker/blob/d470dc94fea6ae74e948c1a858c6e1228c9f4bf9/src/tile_worker.cpp#L210, and pmtiles doesn't like that. --- src/tile_data.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/tile_data.cpp b/src/tile_data.cpp index 2c5699ef..4438955f 100644 --- a/src/tile_data.cpp +++ b/src/tile_data.cpp @@ -100,6 +100,10 @@ void TileDataSource::collectObjectsForTile( TileCoordinate z6x = dstIndex.x / (1 << (zoom - CLUSTER_ZOOM)); TileCoordinate z6y = dstIndex.y / (1 << (zoom - CLUSTER_ZOOM)); + if (z6x >= 64 || z6y >= 64) { + if (verbose) std::cerr << "collectObjectsForTile: invalid tile z" << zoom << "/" << dstIndex.x << "/" << dstIndex.y << std::endl; + return; + } iStart = z6x * CLUSTER_ZOOM_WIDTH + z6y; iEnd = iStart + 1; }