From 73994a99cf96510f58270b6e037eea0f1987a141 Mon Sep 17 00:00:00 2001 From: systemed Date: Mon, 23 Aug 2021 18:27:41 +0100 Subject: [PATCH 1/6] Only add ways used by relations to the store --- include/osm_store.h | 47 ++++++++++++++++++++++++++++++++++++++++++++- include/read_pbf.h | 4 ++-- src/read_pbf.cpp | 37 +++++++++++++++++++++++++++++++---- 3 files changed, 81 insertions(+), 7 deletions(-) diff --git a/include/osm_store.h b/include/osm_store.h index b7295047..b25f094e 100644 --- a/include/osm_store.h +++ b/include/osm_store.h @@ -189,6 +189,41 @@ class CompactNodeStore std::shared_ptr mLatpLons; }; +// list of ways used by relations +// by noting these in advance, we don't need to store all ways in the store +class UsedWays { + +private: + std::vector usedList; + +public: + bool inited = false; + + // Size the vector to a reasonable estimate, to avoid resizing on the fly + void reserve(bool compact, int numNodes) { + inited = true; + if (compact) { + // If we're running in compact mode, way count is roughly 1/9th of node count... say 1/8 to be safe + usedList.reserve(numNodes/8); + } else { + // Otherwise, we could have anything up to the current max node ID (approaching 2**30 in summer 2021) + // 2**31 is 0.25GB with a vector + usedList.reserve(pow(2,31)); + } + } + + // Mark a way as used + void insert(WayID wayid) { + if (wayid>usedList.size()) usedList.resize(wayid); + usedList[wayid] = true; + } + + // See if a way is used + bool at(WayID wayid) { + return (wayid>usedList.size()) ? false : usedList[wayid]; + } +}; + // way store class WayStore { @@ -336,6 +371,7 @@ class OSMStore WayStore ways; RelationStore relations; + UsedWays used_ways; generated osm_generated; generated shp_generated; @@ -382,7 +418,10 @@ class OSMStore if(!use_compact_nodes) nodes.sort(threadNum); } - + std::size_t nodes_size() { + return use_compact_nodes ? compact_nodes.size() : nodes.size(); + } + LatpLon nodes_at(NodeID i) const { return use_compact_nodes ? compact_nodes.at(i) : nodes.at(i); } @@ -398,6 +437,12 @@ class OSMStore relations.insert_front(new_relations); } + void mark_way_used(WayID i) { used_ways.insert(i); } + bool way_is_used(WayID i) { return used_ways.at(i); } + void ensure_used_ways_inited() { + if (!used_ways.inited) used_ways.reserve(use_compact_nodes, nodes_size()); + } + generated &osm() { return osm_generated; } generated const &osm() const { return osm_generated; } generated &shp() { return shp_generated; } diff --git a/include/read_pbf.h b/include/read_pbf.h index 326a188b..ae31d340 100644 --- a/include/read_pbf.h +++ b/include/read_pbf.h @@ -22,7 +22,7 @@ class OsmLuaProcessing; class PbfReader { public: - enum class ReadPhase { Nodes = 1, Ways = 2, Relations = 4, All = 7 }; + enum class ReadPhase { Nodes = 1, Ways = 2, Relations = 4, RelationScan = 8, All = 15 }; PbfReader(OSMStore &osmStore); @@ -38,7 +38,7 @@ class PbfReader bool ReadNodes(OsmLuaProcessing &output, PrimitiveGroup &pg, PrimitiveBlock const &pb, const std::unordered_set &nodeKeyPositions); bool ReadWays(OsmLuaProcessing &output, PrimitiveGroup &pg, PrimitiveBlock const &pb); - + bool ScanRelations(OsmLuaProcessing &output, PrimitiveGroup &pg, PrimitiveBlock const &pb); bool ReadRelations(OsmLuaProcessing &output, PrimitiveGroup &pg, PrimitiveBlock const &pb); /// Find a string in the dictionary diff --git a/src/read_pbf.cpp b/src/read_pbf.cpp index f7ba13d2..1d71b410 100644 --- a/src/read_pbf.cpp +++ b/src/read_pbf.cpp @@ -91,9 +91,10 @@ bool PbfReader::ReadWays(OsmLuaProcessing &output, PrimitiveGroup &pg, Primitive tags[pb.stringtable().s(keysPtr->Get(n))] = pb.stringtable().s(valsPtr->Get(n)); } - // Store the way's nodes in the global way store - ways.push_back(std::make_pair(static_cast(pbfWay.id()), - WayStore::nodeid_vector_t(nodeVec.begin(), nodeVec.end()))); + // If we need it for later, store the way's nodes in the global way store + if (osmStore.way_is_used(wayId)) { + ways.push_back(std::make_pair(wayId, WayStore::nodeid_vector_t(nodeVec.begin(), nodeVec.end()))); + } output.setWay(static_cast(pbfWay.id()), nodeVec, tags); } catch (std::out_of_range &err) { @@ -109,6 +110,28 @@ bool PbfReader::ReadWays(OsmLuaProcessing &output, PrimitiveGroup &pg, Primitive return false; } +bool PbfReader::ScanRelations(OsmLuaProcessing &output, PrimitiveGroup &pg, PrimitiveBlock const &pb) { + // Scan relations to see which ways we need to save + // as with ReadRelations, we currently just parse multipolygons + if (pg.relations_size()==0) return false; + + int typeKey = findStringPosition(pb, "type"); + int mpKey = findStringPosition(pb, "multipolygon"); + for (int j=0; j(lastID); + osmStore.mark_way_used(wayId); + } + } + return true; +} + bool PbfReader::ReadRelations(OsmLuaProcessing &output, PrimitiveGroup &pg, PrimitiveBlock const &pb) { // ---- Read relations // (just multipolygons for now; we should do routes in time) @@ -207,6 +230,12 @@ bool PbfReader::ReadBlock(std::istream &infile, OsmLuaProcessing &output, std::p continue; } } + + if(phase == ReadPhase::RelationScan || phase == ReadPhase::All) { + osmStore.ensure_used_ways_inited(); + bool done = ScanRelations(output, pg, pb); + if(done) { continue; } + } if(phase == ReadPhase::Ways || phase == ReadPhase::All) { bool done = ReadWays(output, pg, pb); @@ -259,7 +288,7 @@ int PbfReader::ReadPbfFile(unordered_set const &nodeKeys, unsigned int t std::size_t total_blocks = blocks.size(); - std::vector all_phases = { ReadPhase::Nodes, ReadPhase::Ways, ReadPhase::Relations }; + std::vector all_phases = { ReadPhase::Nodes, ReadPhase::RelationScan, ReadPhase::Ways, ReadPhase::Relations }; for(auto phase: all_phases) { // Launch the pool with threadNum threads boost::asio::thread_pool pool(threadNum); From 592d64986b9abefdbe72d983d69d9bc198d5bc24 Mon Sep 17 00:00:00 2001 From: systemed Date: Mon, 23 Aug 2021 20:54:13 +0100 Subject: [PATCH 2/6] Use size_t to avoid overflow --- include/osm_store.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/osm_store.h b/include/osm_store.h index b25f094e..fc70dfbe 100644 --- a/include/osm_store.h +++ b/include/osm_store.h @@ -200,7 +200,7 @@ class UsedWays { bool inited = false; // Size the vector to a reasonable estimate, to avoid resizing on the fly - void reserve(bool compact, int numNodes) { + void reserve(bool compact, size_t numNodes) { inited = true; if (compact) { // If we're running in compact mode, way count is roughly 1/9th of node count... say 1/8 to be safe @@ -502,6 +502,7 @@ class OSMStore compact_nodes.clear(); ways.clear(); relations.clear(); + used_ways.clear(); } void reportStoreSize(std::ostringstream &str); From b370fa9425910962dbdb05ed1c466f16b9b6caa6 Mon Sep 17 00:00:00 2001 From: systemed Date: Mon, 23 Aug 2021 21:06:26 +0100 Subject: [PATCH 3/6] Clear usedWays with everything else --- include/osm_store.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/osm_store.h b/include/osm_store.h index fc70dfbe..e8b29e0b 100644 --- a/include/osm_store.h +++ b/include/osm_store.h @@ -222,6 +222,8 @@ class UsedWays { bool at(WayID wayid) { return (wayid>usedList.size()) ? false : usedList[wayid]; } + + void clear() { usedList.clear(); } }; // way store From 0893aa9ce6107737530056df12d5c0adfb060252 Mon Sep 17 00:00:00 2001 From: Richard Fairhurst Date: Tue, 24 Aug 2021 00:53:55 +0100 Subject: [PATCH 4/6] Add mutex --- include/osm_store.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/osm_store.h b/include/osm_store.h index e8b29e0b..2bea4344 100644 --- a/include/osm_store.h +++ b/include/osm_store.h @@ -195,12 +195,14 @@ class UsedWays { private: std::vector usedList; + mutable std::mutex mutex; public: bool inited = false; // Size the vector to a reasonable estimate, to avoid resizing on the fly void reserve(bool compact, size_t numNodes) { + std::lock_guard lock(mutex); inited = true; if (compact) { // If we're running in compact mode, way count is roughly 1/9th of node count... say 1/8 to be safe @@ -214,6 +216,7 @@ class UsedWays { // Mark a way as used void insert(WayID wayid) { + std::lock_guard lock(mutex); if (wayid>usedList.size()) usedList.resize(wayid); usedList[wayid] = true; } @@ -223,7 +226,10 @@ class UsedWays { return (wayid>usedList.size()) ? false : usedList[wayid]; } - void clear() { usedList.clear(); } + void clear() { + std::lock_guard lock(mutex); + usedList.clear(); + } }; // way store From 94bd7721fc2142326eec0d7da7141863058c84d7 Mon Sep 17 00:00:00 2001 From: systemed Date: Thu, 26 Aug 2021 21:05:03 +0100 Subject: [PATCH 5/6] % progress; fix missing ways --- include/osm_store.h | 15 +++++++++++++-- src/read_pbf.cpp | 13 ++++++++++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/include/osm_store.h b/include/osm_store.h index 2bea4344..a7c960a9 100644 --- a/include/osm_store.h +++ b/include/osm_store.h @@ -8,6 +8,7 @@ #include #include #include +#include class void_mmap_allocator { @@ -203,6 +204,7 @@ class UsedWays { // Size the vector to a reasonable estimate, to avoid resizing on the fly void reserve(bool compact, size_t numNodes) { std::lock_guard lock(mutex); + if (inited) return; inited = true; if (compact) { // If we're running in compact mode, way count is roughly 1/9th of node count... say 1/8 to be safe @@ -217,12 +219,20 @@ class UsedWays { // Mark a way as used void insert(WayID wayid) { std::lock_guard lock(mutex); - if (wayid>usedList.size()) usedList.resize(wayid); + if (wayid>usedList.size()) usedList.resize(wayid+1); usedList[wayid] = true; } + void insert_set(std::unordered_set ids) { + std::lock_guard lock(mutex); + for (WayID wayid : ids) { + if (wayid>usedList.size()) usedList.resize(wayid+1); + usedList[wayid] = true; + } + } + // See if a way is used - bool at(WayID wayid) { + bool at(WayID wayid) const { return (wayid>usedList.size()) ? false : usedList[wayid]; } @@ -446,6 +456,7 @@ class OSMStore } void mark_way_used(WayID i) { used_ways.insert(i); } + void mark_ways_used(std::unordered_set ids) { used_ways.insert_set(ids); } bool way_is_used(WayID i) { return used_ways.at(i); } void ensure_used_ways_inited() { if (!used_ways.inited) used_ways.reserve(use_compact_nodes, nodes_size()); diff --git a/src/read_pbf.cpp b/src/read_pbf.cpp index 1d71b410..e1ae667e 100644 --- a/src/read_pbf.cpp +++ b/src/read_pbf.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include "osm_lua_processing.h" @@ -117,6 +118,8 @@ bool PbfReader::ScanRelations(OsmLuaProcessing &output, PrimitiveGroup &pg, Prim int typeKey = findStringPosition(pb, "type"); int mpKey = findStringPosition(pb, "multipolygon"); + + std::unordered_set wayIDs; for (int j=0; j(lastID); - osmStore.mark_way_used(wayId); + wayIDs.insert(static_cast(lastID)); } } + osmStore.mark_ways_used(wayIDs); return true; } @@ -234,7 +237,11 @@ bool PbfReader::ReadBlock(std::istream &infile, OsmLuaProcessing &output, std::p if(phase == ReadPhase::RelationScan || phase == ReadPhase::All) { osmStore.ensure_used_ways_inited(); bool done = ScanRelations(output, pg, pb); - if(done) { continue; } + if(done) { + std::cout << "(Scanning for ways used in relations: " << (100*progress.first/progress.second) << "%)\r"; + std::cout.flush(); + continue; + } } if(phase == ReadPhase::Ways || phase == ReadPhase::All) { From d0e3d5f0f1870c2d4f2311271cf025ee9e6baecc Mon Sep 17 00:00:00 2001 From: systemed Date: Thu, 26 Aug 2021 22:40:05 +0100 Subject: [PATCH 6/6] Clear store after use --- src/tilemaker.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tilemaker.cpp b/src/tilemaker.cpp index efc4d004..e3c1cc85 100644 --- a/src/tilemaker.cpp +++ b/src/tilemaker.cpp @@ -337,6 +337,7 @@ int main(int argc, char* argv[]) { }); if (ret != 0) return ret; } + osmStore.clear(); // no longer needed } // ---- Initialise SharedData