diff --git a/Makefile b/Makefile index 4bef36f798..e098f3b08b 100644 --- a/Makefile +++ b/Makefile @@ -43,6 +43,7 @@ include $(wildcard $(IO_SHARED_OBJ_DIR)/*.d) include $(wildcard $(SUBCOMMAND_OBJ_DIR)/*.d) include $(wildcard $(UNITTEST_OBJ_DIR)/*.d) include $(wildcard $(UNITTEST_BIN_DIR)/*.d) +include $(wildcard test/*.d) # What pkg-config-controlled system dependencies should we use compile and link flags from? # Use PKG_CONFIG_PATH to point the build system at the right versions of these, if they aren't picked up automatically. @@ -606,8 +607,8 @@ else ln -s `which shuf` $(BIN_DIR)/shuf endif -test/build_graph: test/build_graph.cpp $(LIB_DIR)/libvg.a $(SRC_DIR)/vg.hpp - $(CXX) $(INCLUDE_FLAGS) $(CPPFLAGS) $(CXXFLAGS) -o test/build_graph test/build_graph.cpp $(LD_LIB_DIR_FLAGS) $(LDFLAGS) $(LIB_DIR)/libvg.a $(LD_LIB_FLAGS) $(START_STATIC) $(LD_STATIC_LIB_FLAGS) $(END_STATIC) $(FILTER) +test/build_graph: test/build_graph.o $(LIB_DIR)/libvg.a + $(CXX) $(INCLUDE_FLAGS) $(CPPFLAGS) $(CXXFLAGS) -o test/build_graph $(PRE_LINK_DEPS) test/build_graph.o $(LD_LIB_DIR_FLAGS) $(LDFLAGS) $(LIB_DIR)/libvg.a $(LD_LIB_FLAGS) $(START_STATIC) $(LD_STATIC_LIB_FLAGS) $(END_STATIC) $(LD_STATIC_LIB_DEPS) $(LD_EXE_LIB_FLAGS) $(LIB_DIR)/mimalloc.o: $(MIMALLOC_DIR)/src/*.c $(MIMALLOC_DIR)/src/*/*.c $(MIMALLOC_DIR)/src/*/*/*.c $(MIMALLOC_DIR)/include/*.h $(MIMALLOC_DIR)/include/*/*.h $(MIMALLOC_DIR)/CMakeLists.txt +rm -f $(LIB_DIR)/mimalloc.o && rm -Rf $(INC_DIR)/mimalloc $(INC_DIR)/mimalloc*.h && cd $(MIMALLOC_DIR) && rm -Rf build && mkdir build && cd build && cmake -DCMAKE_C_COMPILER="$(CC)" -DCMAKE_CXX_COMPILER="$(CXX)" -DCMAKE_C_FLAGS="$(CFLAGS)" -DCMAKE_CXX_FLAGS="$(CXXFLAGS)" .. && $(MAKE) $(FILTER) && cp -r ../include/* $(CWD)/$(INC_DIR)/ && cp mimalloc.o $(CWD)/$(LIB_DIR)/ @@ -708,9 +709,11 @@ $(LIB_DIR)/cleaned_old_elfutils: $(LIB_DIR)/libvgio.a: $(LIB_DIR)/libhts.a $(LIB_DIR)/libhandlegraph.a $(LIB_DIR)/pkgconfig/htslib.pc $(LIB_DIR)/cleaned_old_protobuf_v003 $(LIBVGIO_DIR)/CMakeLists.txt $(LIBVGIO_DIR)/src/*.cpp $(LIBVGIO_DIR)/include/vg/io/*.hpp $(LIBVGIO_DIR)/deps/vg.proto +rm -f $(CWD)/$(INC_DIR)/vg.pb.h $(CWD)/$(INC_DIR)/vg/vg.pb.h +rm -Rf $(CWD)/$(INC_DIR)/vg/io/ - +export CXXFLAGS="$(CPPFLAGS) $(CXXFLAGS)" && export LDFLAGS="$(LD_LIB_DIR_FLAGS) $(LDFLAGS)" && cd $(LIBVGIO_DIR) && rm -Rf CMakeCache.txt CMakeFiles *.cmake install_manifest.txt *.pb.cc *.pb.h *.a && rm -rf build-vg && mkdir build-vg && cd build-vg && PKG_CONFIG_PATH=$(CWD)/$(LIB_DIR)/pkgconfig:$(PKG_CONFIG_PATH) cmake -DCMAKE_C_COMPILER="$(CC)" -DCMAKE_CXX_COMPILER="$(CXX)" -DCMAKE_CXX_STANDARD=$(CXX_STANDARD) -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_PREFIX_PATH="/usr;$(OMP_PREFIXES)" -DCMAKE_INSTALL_PREFIX=$(CWD) -DCMAKE_INSTALL_LIBDIR=lib .. $(FILTER) && $(MAKE) clean && VERBOSE=1 $(MAKE) $(FILTER) && $(MAKE) install + +export CXXFLAGS="$(CPPFLAGS) $(CXXFLAGS)" && export LDFLAGS="$(LD_LIB_DIR_FLAGS) $(LDFLAGS)" && cd $(LIBVGIO_DIR) && rm -Rf CMakeCache.txt CMakeFiles *.cmake install_manifest.txt *.pb.cc *.pb.h *.a && rm -rf build-vg && mkdir build-vg && cd build-vg && PKG_CONFIG_PATH=$(CWD)/$(LIB_DIR)/pkgconfig:$(PKG_CONFIG_PATH) cmake -DCMAKE_C_COMPILER="$(CC)" -DCMAKE_CXX_COMPILER="$(CXX)" -DCMAKE_CXX_STANDARD=$(CXX_STANDARD) -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_PREFIX_PATH="/usr;$(OMP_PREFIXES)" -DCMAKE_INSTALL_PREFIX=$(CWD) -DCMAKE_INSTALL_LIBDIR=lib -DUSE_INSTALLED_LIBHANDLEGRAPH_ONLY=ON .. $(FILTER) && $(MAKE) clean && VERBOSE=1 $(MAKE) $(FILTER) && $(MAKE) install $(LIB_DIR)/libhandlegraph.a: $(LIBHANDLEGRAPH_DIR)/src/include/handlegraph/*.hpp $(LIBHANDLEGRAPH_DIR)/src/*.cpp + +rm -f $(LIB_DIR)/libhandlegraph.a $(LIB_DIR)/libhandlegraph.$(SHARED_SUFFIX) + +rm -Rf $(INC_DIR)/handlegraph $(LIB_DIR)/cmake/libhandlegraph +cd $(LIBHANDLEGRAPH_DIR) && rm -Rf build CMakeCache.txt CMakeFiles && mkdir build && cd build && CXXFLAGS="$(CXXFLAGS) $(CPPFLAGS)" cmake -DCMAKE_C_COMPILER="$(CC)" -DCMAKE_CXX_COMPILER="$(CXX)" -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_INSTALL_PREFIX=$(CWD) -DCMAKE_INSTALL_LIBDIR=lib .. && $(MAKE) $(FILTER) && $(MAKE) install @@ -1001,6 +1004,9 @@ $(UNITTEST_OBJ): $(UNITTEST_OBJ_DIR)/%.o : $(UNITTEST_SRC_DIR)/%.cpp $(UNITTEST_ $(UNITTEST_SUPPORT_OBJ): $(UNITTEST_SUPPORT_OBJ_DIR)/%.o : $(UNITTEST_SUPPORT_SRC_DIR)/%.cpp $(UNITTEST_SUPPORT_OBJ_DIR)/%.d $(DEPS) $(CXX) $(INCLUDE_FLAGS) $(CPPFLAGS) $(CXXFLAGS) $(DEPGEN_FLAGS) -c -o $@ $< $(FILTER) @touch $@ +test/build_graph.o: test/%.o : test/%.cpp test/%.d $(DEPS) + $(CXX) $(INCLUDE_FLAGS) $(CPPFLAGS) $(CXXFLAGS) $(DEPGEN_FLAGS) -c -o $@ $< $(FILTER) + @touch $@ # Config objects get individual rules $(CONFIG_OBJ_DIR)/allocator_config_mimalloc.o: $(CONFIG_SRC_DIR)/allocator_config_mimalloc.cpp $(CONFIG_OBJ_DIR)/allocator_config_mimalloc.d $(DEPS) $(LIB_DIR)/mimalloc.o @@ -1023,10 +1029,10 @@ $(CONFIG_OBJ_DIR)/%.d: ; $(IO_OBJ_DIR)/%.d: ; $(SUBCOMMAND_OBJ_DIR)/%.d: ; $(UNITTEST_OBJ_DIR)/%.d: ; +test/%.d: ; # Don't delete them. -.PRECIOUS: $(OBJ_DIR)/%.d $(ALGORITHMS_OBJ_DIR)/%.d $(CONFIG_OBJ_DIR)/%.d $(IO_OBJ_DIR)/%.d $(SUBCOMMAND_OBJ_DIR)/%.d $(UNITTEST_OBJ_DIR)/%.d - +.PRECIOUS: $(OBJ_DIR)/%.d $(ALGORITHMS_OBJ_DIR)/%.d $(CONFIG_OBJ_DIR)/%.d $(IO_OBJ_DIR)/%.d $(SUBCOMMAND_OBJ_DIR)/%.d $(UNITTEST_OBJ_DIR)/%.d test/%.d # Use no implicit rules .SUFFIXES: diff --git a/deps/gbwtgraph b/deps/gbwtgraph index c35f700024..d2c6861d15 160000 --- a/deps/gbwtgraph +++ b/deps/gbwtgraph @@ -1 +1 @@ -Subproject commit c35f700024784f9a600f2ea26d72532a7fe4d74b +Subproject commit d2c6861d15a6027eadc378dd3244fa692e643018 diff --git a/deps/libhandlegraph b/deps/libhandlegraph index ec2da41d95..ae42926da7 160000 --- a/deps/libhandlegraph +++ b/deps/libhandlegraph @@ -1 +1 @@ -Subproject commit ec2da41d955e30366b6366b8760fd8646e2c0000 +Subproject commit ae42926da7743a7108db0474bfcf1a1cdac34abb diff --git a/deps/libvgio b/deps/libvgio index 0a91f59cd0..fff151be9d 160000 --- a/deps/libvgio +++ b/deps/libvgio @@ -1 +1 @@ -Subproject commit 0a91f59cd0e7e0532bcf080b6c2eb1dee21c4213 +Subproject commit fff151be9d8255672d91f32a5b41285584905743 diff --git a/src/algorithms/alignment_path_offsets.hpp b/src/algorithms/alignment_path_offsets.hpp index f4b7e9c456..50e03075c4 100644 --- a/src/algorithms/alignment_path_offsets.hpp +++ b/src/algorithms/alignment_path_offsets.hpp @@ -23,6 +23,8 @@ using namespace std; /// /// If path_filter is set, and it returns false for a path, that path is not /// used to annotate the read. +/// +/// Doesn't consider haplotype paths. unordered_map > > alignment_path_offsets(const PathPositionHandleGraph& graph, const Alignment& aln, @@ -37,7 +39,8 @@ alignment_path_offsets(const PathPositionHandleGraph& graph, /// is disconnected or fans out toward the sources or sinks. /// /// If path_filter is set, and it returns false for a path, that path is not -/// used to annotate the read. +/// used to annotate the read. If it is not set, all reference and +/// generic-sense paths are used. unordered_map > > multipath_alignment_path_offsets(const PathPositionHandleGraph& graph, const multipath_alignment_t& mp_aln, @@ -53,6 +56,8 @@ multipath_alignment_path_offsets(const PathPositionHandleGraph& graph, /// /// If path_filter is set, and it returns false for a path, that path is not /// used to annotate the read. +/// +/// Doesn't consider haplotype paths. void annotate_with_initial_path_positions(const PathPositionHandleGraph& graph, Alignment& aln, int64_t search_limit = 0, const std::function* path_filter = nullptr); /// Use the graph to annotate an Alignment with the first @@ -67,6 +72,8 @@ void annotate_with_initial_path_positions(const PathPositionHandleGraph& graph, /// /// If path_filter is set, and it returns false for a path, that path is not /// used to annotate the read. +/// +/// Doesn't consider haplotype paths. void annotate_with_node_path_positions(const PathPositionHandleGraph& graph, Alignment& aln, int64_t search_limit = 0, const std::function* path_filter = nullptr); /// Use the graph to annotate an Alignment with positions on each reference @@ -81,6 +88,8 @@ void annotate_with_node_path_positions(const PathPositionHandleGraph& graph, Ali /// /// If path_filter is set, and it returns false for a path, that path is not /// used to annotate the read. +/// +/// Doesn't consider haplotype paths. void annotate_with_path_positions(const PathPositionHandleGraph& graph, Alignment& aln, bool just_min, int64_t search_limit = 0, const std::function* path_filter = nullptr); /// Use the graph annotate Alignments with the first position @@ -93,6 +102,8 @@ void annotate_with_path_positions(const PathPositionHandleGraph& graph, Alignmen /// /// If path_filter is set, and it returns false for a path, that path is not /// used to annotate the read. +/// +/// Doesn't consider haplotype paths. void annotate_with_initial_path_positions(const PathPositionHandleGraph& graph, vector& aln, int64_t search_limit = 0, const std::function* path_filter = nullptr); diff --git a/src/algorithms/nearest_offsets_in_paths.cpp b/src/algorithms/nearest_offsets_in_paths.cpp index 327ce3a591..ff80f8e9c0 100644 --- a/src/algorithms/nearest_offsets_in_paths.cpp +++ b/src/algorithms/nearest_offsets_in_paths.cpp @@ -47,8 +47,8 @@ path_offset_collection_t nearest_offsets_in_paths(const PathPositionHandleGraph* cerr << "traversing " << graph->get_id(here) << (graph->get_is_reverse(here) ? "-" : "+") << " in " << (search_left ? "leftward" : "rightward") << " direction at distance " << dist << endl; #endif - - for (const step_handle_t& step : graph->steps_of_handle(here)) { + + graph->for_each_step_of_sense(here, {PathSense::REFERENCE, PathSense::GENERIC}, [&](const step_handle_t& step) { // For each path visit that occurs on this node #ifdef debug cerr << "handle is on step at path offset " << graph->get_position_of_step(step) << endl; @@ -61,9 +61,9 @@ path_offset_collection_t nearest_offsets_in_paths(const PathPositionHandleGraph* #ifdef debug cerr << "handle is on ignored path " << graph->get_path_name(path_handle) << endl; #endif - continue; + return; } - + // flip the handle back to the orientation it started in handle_t oriented = search_left ? graph->flip(here) : here; @@ -89,7 +89,7 @@ path_offset_collection_t nearest_offsets_in_paths(const PathPositionHandleGraph* // add in the search distance and add the result to the output return_val[path_handle].emplace_back(path_offset, rev_on_path); - } + }); if (!return_val.empty()) { // we found the closest, we're done diff --git a/src/algorithms/nearest_offsets_in_paths.hpp b/src/algorithms/nearest_offsets_in_paths.hpp index 0f8437c4fb..15cfaa446c 100644 --- a/src/algorithms/nearest_offsets_in_paths.hpp +++ b/src/algorithms/nearest_offsets_in_paths.hpp @@ -32,6 +32,8 @@ using path_offset_collection_t = unordered_map* path_filter = nullptr); diff --git a/src/clip.cpp b/src/clip.cpp index a8c2af99b4..f3c59486fb 100644 --- a/src/clip.cpp +++ b/src/clip.cpp @@ -123,7 +123,7 @@ void visit_contained_snarls(const PathPositionHandleGraph* graph, const vector graph_path_name_set; - graph->for_each_path_handle([&](path_handle_t path_handle) { + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](path_handle_t path_handle) { string graph_path_name = graph->get_path_name(path_handle); if (path_name_set.count(Paths::strip_subrange(graph_path_name))) { graph_path_name_set.insert(graph_path_name); @@ -614,7 +614,7 @@ void clip_low_depth_nodes_and_edges_generic(MutablePathMutableHandleGraph* graph bdsg::PackedVector<> edge_depths; edge_depths.resize(edge_count + 1); - graph->for_each_path_handle([&](path_handle_t path_handle) { + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](path_handle_t path_handle) { bool is_ref_path = check_prefixes(graph->get_path_name(path_handle)); handle_t prev_handle; bool first = true; @@ -1053,7 +1053,7 @@ void clip_deletion_edges(MutablePathMutableHandleGraph* graph, int64_t max_delet // load up the reference paths and their ids unordered_set ref_paths; - graph->for_each_path_handle([&](path_handle_t path_handle) { + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](path_handle_t path_handle) { string path_name = graph->get_path_name(path_handle); for (const string& ref_prefix : ref_prefixes) { if (path_name.compare(0, ref_prefix.length(), ref_prefix) == 0) { @@ -1312,7 +1312,7 @@ void clip_contained_stubs(MutablePathMutableHandleGraph* graph, PathPositionHand void stubbify_ref_paths(MutablePathMutableHandleGraph* graph, const vector& ref_prefixes, int64_t min_fragment_len, bool verbose) { unordered_set edges_to_delete; int64_t stubbified_path_count = 0; // just for logging - graph->for_each_path_handle([&](path_handle_t path_handle) { + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](path_handle_t path_handle) { string path_name = graph->get_path_name(path_handle); for (const string& ref_prefix : ref_prefixes) { bool was_stubbified = false; diff --git a/src/deconstructor.cpp b/src/deconstructor.cpp index c8c03cd1a4..b5924a1e6d 100644 --- a/src/deconstructor.cpp +++ b/src/deconstructor.cpp @@ -1158,14 +1158,31 @@ string Deconstructor::get_vcf_header() { if (!long_ref_contig) { long_ref_contig = ref_samples.size() > 1 || ref_haplotypes.size() > 1 || nested_decomposition; } - this->long_ref_contig = long_ref_contig; sample_names.clear(); unordered_map> sample_to_haps; - // find sample names from non-reference paths - graph->for_each_path_handle([&](const path_handle_t& path_handle) { + // We always want generic and reference paths, since we do our own designated deconstruction reference dropping. + std::unordered_set wanted_senses{PathSense::GENERIC, PathSense::REFERENCE}; + + // If we're using a GBWT, we don't want to include any haplotypes in the + // input graph, because we want the haplotypes from the GBWT instead (even + // if the haplotypes in ther input graph are actually backed by the + // provided GBWT). If we aren't, we want the haplotypes from the input + // graph included. + if (!gbwt) { + wanted_senses.insert(PathSense::HAPLOTYPE); + } + + // TODO: Should we unify around using haplotype-sense paths and not the + // GBWT API? + + // find sample names from paths + graph->for_each_path_of_sense(wanted_senses, [&](const path_handle_t& path_handle) { string path_name = graph->get_path_name(path_handle); if (!this->ref_paths.count(path_name)) { + // This isn't a designated decosntruction reference path. + // Note that we allow alt paths through here. + string sample_name = graph->get_sample_name(path_handle); // for backward compatibility if (sample_name == PathMetadata::NO_SAMPLE_NAME) { diff --git a/src/gbwtgraph_helper.cpp b/src/gbwtgraph_helper.cpp index 4337f1fa10..de1d78a189 100644 --- a/src/gbwtgraph_helper.cpp +++ b/src/gbwtgraph_helper.cpp @@ -362,14 +362,17 @@ size_t estimate_hash_table_size(const gbwtgraph::GBZ& gbz, bool progress) { size_t genome_size = 0; if (gbz.graph.get_path_count() > 0) { - gbz.graph.for_each_path_handle([&](const path_handle_t& path_handle) { - gbz.graph.for_each_step_in_path(path_handle, [&](const step_handle_t& step_handle) { - handle_t handle = gbz.graph.get_handle_of_step(step_handle); - genome_size += gbz.graph.get_length(handle); - }); + gbz.graph.for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](const path_handle_t& path_handle) { + std::string path_name = gbz.graph.get_path_name(path_handle); + if (!Paths::is_alt(path_name)) { + gbz.graph.for_each_step_in_path(path_handle, [&](const step_handle_t& step_handle) { + handle_t handle = gbz.graph.get_handle_of_step(step_handle); + genome_size += gbz.graph.get_length(handle); + }); + } }); if (progress) { - std::cerr << "Estimated size based on reference / generic paths: " << genome_size << std::endl; + std::cerr << "Estimated size based on reference / non-alt generic paths: " << genome_size << std::endl; } } diff --git a/src/graph_synchronizer.cpp b/src/graph_synchronizer.cpp index c32b1888dc..c791a024d5 100644 --- a/src/graph_synchronizer.cpp +++ b/src/graph_synchronizer.cpp @@ -12,10 +12,10 @@ GraphSynchronizer::GraphSynchronizer(VG& graph) : graph(graph) { // build a path index after a path has been modified (since we don't keep // the ranks up to date internally), we need to build all the indexes up // front, even if we're just working on a single path. - graph.for_each_path_handle([&](const path_handle_t& path) { + graph.for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](const path_handle_t& path) { string name = graph.get_path_name(path); if (!Paths::is_alt(name)) { - // We only care about reference paths. + // We only care about reference and non-alt generic paths. get_path_index(name); } }); diff --git a/src/haplotype_indexer.cpp b/src/haplotype_indexer.cpp index 38d79601e8..b69907c109 100644 --- a/src/haplotype_indexer.cpp +++ b/src/haplotype_indexer.cpp @@ -30,9 +30,9 @@ HaplotypeIndexer::HaplotypeIndexer() { std::vector HaplotypeIndexer::parse_vcf(const std::string& filename, const PathHandleGraph& graph, const std::string& job_name) const { - // Parse all non-alt paths. + // Parse all non-alt, non-haplotype paths. std::vector path_handles; - graph.for_each_path_handle([&](path_handle_t path_handle) { + graph.for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](path_handle_t path_handle) { std::string path_name = graph.get_path_name(path_handle); if (!Paths::is_alt(path_name)) { path_handles.push_back(path_handle); diff --git a/src/index_registry.cpp b/src/index_registry.cpp index 0fe40efbe1..f84506812a 100644 --- a/src/index_registry.cpp +++ b/src/index_registry.cpp @@ -2606,7 +2606,7 @@ IndexRegistry VGIndexes::get_vg_index_registry() { // need a job to do them. unordered_set broadcast_graph_paths_to_do; if (include_named_paths && broadcast_graph) { - broadcast_graph->for_each_path_handle([&](const path_handle_t& path_handle) { + broadcast_graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](const path_handle_t& path_handle) { // Look at all the paths in advance if (broadcast_graph->is_empty(path_handle) || Paths::is_alt(broadcast_graph->get_path_name(path_handle))) { // Skip empty paths and alt allele paths diff --git a/src/mcmc_genotyper.cpp b/src/mcmc_genotyper.cpp index 51c1b7da1a..1120dc59d0 100644 --- a/src/mcmc_genotyper.cpp +++ b/src/mcmc_genotyper.cpp @@ -397,11 +397,14 @@ namespace vg { unique_ptr genome(new PhasedGenome(snarls)); vector haplotype; //will add twice - graph.for_each_path_handle([&](const path_handle_t& path){ + // TODO: This just concatenates all the reference and generic paths. + // We're going to need to change this to handle multiple contigs, or + // multiple references in the graph, for this to really work. + graph.for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](const path_handle_t& path){ // capture all variables (paths) in scope by reference if(!Paths::is_alt(graph.get_path_name(path))) { - // If it isn't an alt path, we want to trace it + // If it isn't an alt or haplotype path, we want to trace it for (handle_t handle : graph.scan_path(path)) { // For each occurrence from start to end diff --git a/src/rare_variant_simplifier.cpp b/src/rare_variant_simplifier.cpp index 6aec7e4315..abbbe0eb2d 100644 --- a/src/rare_variant_simplifier.cpp +++ b/src/rare_variant_simplifier.cpp @@ -12,11 +12,11 @@ void RareVariantSimplifier::simplify() { // This holds the IDs of all the nodes we want to keep around unordered_set to_keep; - graph.for_each_path_handle([&](const path_handle_t& path) { + graph.for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](const path_handle_t& path) { // For each path if (!Paths::is_alt(graph.get_path_name(path))) { - // If it isn't an alt path, we want to trace it + // If it isn't an alt or haplotype path, we want to trace it // For each occurrence from start to end // Put the ID of the node we are visiting in the to-keep set diff --git a/src/subcommand/call_main.cpp b/src/subcommand/call_main.cpp index ce963d5980..25cc1d6ea2 100644 --- a/src/subcommand/call_main.cpp +++ b/src/subcommand/call_main.cpp @@ -507,10 +507,9 @@ int main_call(int argc, char** argv) { // No paths specified: use them all if (ref_paths.empty()) { set ref_sample_names; - graph->for_each_path_handle([&](path_handle_t path_handle) { + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](path_handle_t path_handle) { const string& name = graph->get_path_name(path_handle); - PathSense path_sense = graph->get_sense(path_handle); - if (!Paths::is_alt(name) && path_sense != PathSense::HAPLOTYPE) { + if (!Paths::is_alt(name)) { string sample_name = graph->get_sample_name(path_handle); if (ref_sample.empty() || sample_name == ref_sample) { ref_paths.push_back(name); @@ -557,7 +556,7 @@ int main_call(int argc, char** argv) { for (const string& ref_path : ref_paths) { ref_path_set[ref_path] = false; } - graph->for_each_path_handle([&](path_handle_t path_handle) { + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](path_handle_t path_handle) { const string& name = graph->get_path_name(path_handle); subrange_t subrange; string base_name = Paths::strip_subrange(name, &subrange); diff --git a/src/subcommand/clip_main.cpp b/src/subcommand/clip_main.cpp index 9929c8e22c..7f1a4fb88e 100644 --- a/src/subcommand/clip_main.cpp +++ b/src/subcommand/clip_main.cpp @@ -300,7 +300,7 @@ int main_clip(int argc, char** argv) { for (const Region& region : bed_regions) { contig_set.insert(region.seq); } - graph->for_each_path_handle([&] (path_handle_t path_handle) { + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&] (path_handle_t path_handle) { string base_name = Paths::strip_subrange(graph->get_path_name(path_handle)); if (contig_set.count(base_name)) { // todo: should take into account coordinate comp @@ -329,7 +329,7 @@ int main_clip(int argc, char** argv) { assert(need_pp); assert(!ref_prefixes.empty()); // load the BED regions from the reference path prefix - pp_graph->for_each_path_handle([&](path_handle_t path_handle) { + pp_graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](path_handle_t path_handle) { string path_name = pp_graph->get_path_name(path_handle); subrange_t subrange; path_name = Paths::strip_subrange(path_name, &subrange); diff --git a/src/subcommand/deconstruct_main.cpp b/src/subcommand/deconstruct_main.cpp index c9b493f1b0..aa98720aaf 100644 --- a/src/subcommand/deconstruct_main.cpp +++ b/src/subcommand/deconstruct_main.cpp @@ -296,19 +296,29 @@ int main_deconstruct(int argc, char** argv) { } if (refpaths.empty() && refpath_prefixes.empty()) { - bool found_hap; - // No paths specified: use them all - graph->for_each_path_handle([&](path_handle_t path_handle) { + // We will set this if we found any haplotypes or alt paths to use as alternatives to the reference. + bool found_hap = false; + + // No paths specified: use all reference and non-alt generic paths as reference to deconstruct against. + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](path_handle_t path_handle) { const string& name = graph->get_path_name(path_handle); - if (!Paths::is_alt(name) && graph->get_sense(path_handle) != PathSense::HAPLOTYPE) { + if (!Paths::is_alt(name)) { refpaths.push_back(name); } else { found_hap = true; } }); + if (!found_hap) { + // See if we have any haplotypes. + graph->for_each_path_of_sense(PathSense::HAPLOTYPE, [&](path_handle_t path_handle) { + found_hap = true; + return false; + }); + } + if (!found_hap && gbwt_index == nullptr) { - logger.error() << "All graph paths selected as references (leaving no alts). Please use -P/-p " + logger.error() << "All graph paths selected as references (leaving no alternative alleles). Please use -P/-p " << "to narrow down the reference to a subset of paths, " << "or GBZ/GBWT input that contains haplotype paths" << endl; } @@ -337,7 +347,7 @@ int main_deconstruct(int argc, char** argv) { // process the prefixes to find ref paths if (!refpath_prefixes.empty()) { - graph->for_each_path_handle([&](const path_handle_t& path_handle) { + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](const path_handle_t& path_handle) { string path_name = graph->get_path_name(path_handle); for (auto& prefix : refpath_prefixes) { if (path_name.compare(0, prefix.size(), prefix) == 0) { diff --git a/src/subcommand/depth_main.cpp b/src/subcommand/depth_main.cpp index c4988c881c..b109f81049 100644 --- a/src/subcommand/depth_main.cpp +++ b/src/subcommand/depth_main.cpp @@ -222,7 +222,7 @@ int main_depth(int argc, char** argv) { map, string> ref_paths; unordered_set base_path_set; - graph->for_each_path_handle([&](path_handle_t path_handle) { + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](path_handle_t path_handle) { string path_name = graph->get_path_name(path_handle); subrange_t subrange; string base_name = Paths::strip_subrange(path_name, &subrange); diff --git a/src/subcommand/gbwt_main.cpp b/src/subcommand/gbwt_main.cpp index a6785b7fc8..00ecd6e660 100644 --- a/src/subcommand/gbwt_main.cpp +++ b/src/subcommand/gbwt_main.cpp @@ -1138,10 +1138,10 @@ std::vector determine_jobs(std::unique_ptr& graph, co std::vector result; - // Determine the non-alt paths. + // Determine the non-alt, non-haplotype paths. std::vector paths; size_t max_length = 0; - graph->for_each_path_handle([&](path_handle_t path_handle) { + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](path_handle_t path_handle) { if (!Paths::is_alt(graph->get_path_name(path_handle))) { paths.emplace_back(path_handle, graph->get_step_count(path_handle)); max_length = std::max(max_length, paths.back().second); diff --git a/src/subcommand/index_main.cpp b/src/subcommand/index_main.cpp index a4595b89aa..3725a546a1 100644 --- a/src/subcommand/index_main.cpp +++ b/src/subcommand/index_main.cpp @@ -561,7 +561,7 @@ int main_index(int argc, char** argv) { if (graph == nullptr) { logger.error() << "-P cannot be used because graph format does not support paths" << endl; } - graph->for_each_path_handle([&](const path_handle_t& path_handle) { + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](const path_handle_t& path_handle) { string path_name = graph->get_path_name(path_handle); if (path_name.compare(0, ref_prefix.size(), ref_prefix) == 0 && !graph->is_empty(path_handle)) { extra_node_weight[graph->get_id(graph->get_handle_of_step(graph->path_begin(path_handle)))] += EXTRA_WEIGHT; diff --git a/src/subcommand/mcmc_main.cpp b/src/subcommand/mcmc_main.cpp index 8b1548aaf5..eda6fe1fa8 100644 --- a/src/subcommand/mcmc_main.cpp +++ b/src/subcommand/mcmc_main.cpp @@ -187,7 +187,7 @@ int main_mcmc(int argc, char** argv) { // No paths specified: use them all if (ref_paths.empty()) { - graph->for_each_path_handle([&](path_handle_t path_handle) { + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](path_handle_t path_handle) { const string& name = graph->get_path_name(path_handle); if (!Paths::is_alt(name)) { ref_paths.push_back(name); diff --git a/src/subcommand/simplify_main.cpp b/src/subcommand/simplify_main.cpp index 782ea52b66..983422ef5d 100644 --- a/src/subcommand/simplify_main.cpp +++ b/src/subcommand/simplify_main.cpp @@ -243,7 +243,7 @@ int main_simplify(int argc, char** argv) { if (!keep_nonref_paths) { vector to_destroy; - graph->for_each_path_handle([&](const path_handle_t path_handle) { + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](const path_handle_t path_handle) { if (graph->get_path_name(path_handle).compare(0, ref_path_prefix.length(), ref_path_prefix) != 0) { to_destroy.push_back(path_handle); } diff --git a/src/subcommand/snarls_main.cpp b/src/subcommand/snarls_main.cpp index e02911f9e7..f1866ed794 100644 --- a/src/subcommand/snarls_main.cpp +++ b/src/subcommand/snarls_main.cpp @@ -248,7 +248,7 @@ int main_snarl(int argc, char** argv) { snarl_finder.reset(new CactusSnarlFinder(*graph)); } else if (algorithm == "integrated") { if (!ref_prefix.empty()) { - graph->for_each_path_handle([&](const path_handle_t& path_handle) { + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](const path_handle_t& path_handle) { string path_name = graph->get_path_name(path_handle); if (path_name.compare(0, ref_prefix.size(), ref_prefix) == 0 && !graph->is_empty(path_handle)) { extra_node_weight[graph->get_id(graph->get_handle_of_step(graph->path_begin(path_handle)))] += EXTRA_WEIGHT; @@ -345,7 +345,7 @@ int main_snarl(int argc, char** argv) { // it's easier to limit traversals using read support, and it takes care of // mapping back to the VCF via the alt paths. vector ref_paths; - graph->for_each_path_handle([&](path_handle_t path_handle) { + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](path_handle_t path_handle) { const string& name = graph->get_path_name(path_handle); if (!Paths::is_alt(name)) { ref_paths.push_back(name); diff --git a/src/traversal_clusters.cpp b/src/traversal_clusters.cpp index ad61cc4a9b..1d8bbb6308 100644 --- a/src/traversal_clusters.cpp +++ b/src/traversal_clusters.cpp @@ -672,7 +672,7 @@ void simplify_graph_using_traversals(MutablePathMutableHandleGraph* graph, const unordered_map ref_paths; std::unordered_map extra_node_weight; constexpr size_t EXTRA_WEIGHT = 10000000000; - graph->for_each_path_handle([&](path_handle_t path_handle) { + graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](path_handle_t path_handle) { if (graph->get_path_name(path_handle).compare(0, ref_path_prefix.length(), ref_path_prefix) == 0) { ref_paths[path_handle] = 0; extra_node_weight[graph->get_id(graph->get_handle_of_step(graph->path_begin(path_handle)))] += EXTRA_WEIGHT; diff --git a/src/traversal_finder.cpp b/src/traversal_finder.cpp index 7327faacd1..b011cfe51f 100644 --- a/src/traversal_finder.cpp +++ b/src/traversal_finder.cpp @@ -72,7 +72,7 @@ vector PathBasedTraversalFinder::find_traversals(const Snarl& si regex alt_str ("(_alt_)"); regex back ("(_[0-9]*)"); set gpath_names; - graph.for_each_path_handle([&](const path_handle_t& path_handle) { + graph.for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](const path_handle_t& path_handle) { gpath_names.insert(graph.get_path_name(path_handle)); }); diff --git a/src/unittest/copy_graph.cpp b/src/unittest/copy_graph.cpp index d00ef43878..581b683130 100644 --- a/src/unittest/copy_graph.cpp +++ b/src/unittest/copy_graph.cpp @@ -349,7 +349,7 @@ namespace vg { REQUIRE(edge_count == 7); } - TEST_CASE( "copy_handle_graph converter works on paths, xg to vg", "[handle][vg][xg]") { + TEST_CASE( "copy_handle_graph converter works on paths xg to vg", "[handle][vg][xg]") { string graph_json = R"( { "node": [ @@ -388,8 +388,8 @@ namespace vg { xg::XG xg; xg.from_path_handle_graph(VG(proto_graph)); VG vg; - handlealgs::copy_path_handle_graph(&xg, &vg); - + handlealgs::copy_path_handle_graph(&xg, &vg); + REQUIRE(xg.get_sequence(xg.get_handle(1)) == "GATT"); @@ -411,7 +411,7 @@ namespace vg { } }); } - TEST_CASE( "copy_handle_graph converter works on paths, xg to pg", "[handle][pg][xg]") { + TEST_CASE( "copy_handle_graph converter works on paths xg to pg", "[handle][pg][xg]") { string graph_json = R"( { "node": [ @@ -450,7 +450,7 @@ namespace vg { xg::XG xg; xg.from_path_handle_graph(VG(proto_graph)); bdsg::PackedGraph pg; - handlealgs::copy_path_handle_graph(&xg, &pg); + handlealgs::copy_path_handle_graph(&xg, &pg); @@ -488,7 +488,7 @@ namespace vg { } }); } - TEST_CASE( "copy_handle_graph converter works on paths, xg to hg", "[handle][hg][xg]") { + TEST_CASE( "copy_handle_graph converter works on paths xg to hg", "[handle][hg][xg]") { string graph_json = R"( { "node": [ @@ -527,7 +527,7 @@ namespace vg { xg::XG xg; xg.from_path_handle_graph(VG(proto_graph)); bdsg::HashGraph hg; - handlealgs::copy_path_handle_graph(&xg, &hg); + handlealgs::copy_path_handle_graph(&xg, &hg); diff --git a/test/t/01_build_graph.t b/test/t/01_build_graph.t index c3f8060240..2125d405d7 100644 --- a/test/t/01_build_graph.t +++ b/test/t/01_build_graph.t @@ -9,4 +9,3 @@ plan tests 1 is $(./build_graph | wc -l) 1 "graph building with the API" -rm -rf build_graph.d build_graph.dSYM \ No newline at end of file diff --git a/test/t/02_vg_construct.t b/test/t/02_vg_construct.t index 8b60f4670e..3f96c74c73 100644 --- a/test/t/02_vg_construct.t +++ b/test/t/02_vg_construct.t @@ -57,14 +57,14 @@ is $x1 1 "the size of the regions used in construction has no effect on the grap x2=$(for i in $(seq 100); do size=10; - threads=$(shuf -i 1-100 -n 1); + threads=$(shuf -i 1-$(nproc) -n 1); vg construct -r small/x.fa -v small/x.vcf.gz -z $size -t $threads | vg view -g - | sort -n -k 2 | md5sum; done | sort | uniq | wc -l) is $x2 1 "the number of threads used in construction has no effect on the graph" x3=$(for i in $(seq 100); do - size=$(shuf -i 1-100 -n 1); + size=$(shuf -i 1-$(nproc) -n 1); threads=$(shuf -i 1-100 -n 1); vg construct -r small/x.fa -v small/x.vcf.gz -z $size -t $threads | vg view -g - | sort -n -k 2 | md5sum; done | sort | uniq | wc -l)