diff --git a/.proj.toml b/.proj.toml index 2e776484ba..14bdcdb3b7 100644 --- a/.proj.toml +++ b/.proj.toml @@ -9,7 +9,7 @@ build_targets = [ "kernels", "pcg", "substitutions", - # "compiler", + "compiler", "substitution-generator", "local-execution", ] @@ -19,7 +19,7 @@ test_targets = [ "op-attrs-tests", "pcg-tests", "substitutions-tests", - # "compiler-tests", + "compiler-tests", "substitution-generator-tests", ] diff --git a/lib/compiler/include/compiler/compiler.h b/lib/compiler/include/compiler/compiler.h index a4f7b0ecd3..178ab19a53 100644 --- a/lib/compiler/include/compiler/compiler.h +++ b/lib/compiler/include/compiler/compiler.h @@ -3,7 +3,7 @@ #include "pcg/cost_values.h" #include "pcg/machine_view.h" -#include "pcg/parallel_computation_graph.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "pcg/tensor_mapping.h" namespace FlexFlow { diff --git a/lib/compiler/include/compiler/cost_estimate.h b/lib/compiler/include/compiler/cost_estimate.h index 5b6eae20f8..2e4ff8448b 100644 --- a/lib/compiler/include/compiler/cost_estimate.h +++ b/lib/compiler/include/compiler/cost_estimate.h @@ -5,7 +5,7 @@ #include "op-attrs/operator_attrs.h" #include "op-attrs/parallel_tensor_shape.h" #include "pcg/machine_view.h" -#include "pcg/parallel_tensor_attrs.dtg.h" +#include "pcg/parallel_computation_graph/parallel_tensor_attrs.dtg.h" namespace FlexFlow { diff --git a/lib/compiler/src/graph_utils.h b/lib/compiler/include/compiler/graph_utils.h similarity index 66% rename from lib/compiler/src/graph_utils.h rename to lib/compiler/include/compiler/graph_utils.h index 711a253b61..1370357837 100644 --- a/lib/compiler/src/graph_utils.h +++ b/lib/compiler/include/compiler/graph_utils.h @@ -2,6 +2,10 @@ #define _FLEXFLOW_COMPILER_GRAPH_UTILS_H #include "compiler/unity_algorithm.h" +#include "pcg/computation_graph.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" +#include "substitutions/sub_parallel_computation_graph.dtg.h" +#include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h" namespace FlexFlow { @@ -9,8 +13,7 @@ SerialParallelDecomposition get_serial_parallel_decomposition(ParallelComputationGraph const &pcg); ParallelComputationGraph cg_to_pcg(ComputationGraph const &g); -SubParallelComputationGraphView - pcg_to_subpcg(ParallelComputationGraph const &g); +SubParallelComputationGraph pcg_to_subpcg(ParallelComputationGraph const &g); // NOTE(@wmdi): I think we should have the following interfaces in the graph // library eventually. diff --git a/lib/compiler/include/compiler/machine_mapping.h b/lib/compiler/include/compiler/machine_mapping.h index 8b21b9522f..5d17cbb373 100644 --- a/lib/compiler/include/compiler/machine_mapping.h +++ b/lib/compiler/include/compiler/machine_mapping.h @@ -1,37 +1,21 @@ #ifndef _FLEXFLOW_COMPILER_MACHINE_MAPPING_H #define _FLEXFLOW_COMPILER_MACHINE_MAPPING_H +#include "compiler/machine_mapping.dtg.h" +#include "compiler/optimal_cost_state.dtg.h" #include "cost_estimate.h" +#include "pcg/machine_specification.dtg.h" #include "pcg/machine_specification.h" #include "pcg/machine_view.h" -#include "pcg/parallel_computation_graph.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "substitutions/sub_parallel_computation_graph.h" +#include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h" namespace FlexFlow { -using SubParallelComputationGraphView = - OutputLabelledOpenMultiDiGraphView; +MachineMapping combine(MachineMapping const &, MachineMapping const &); -struct MachineMapping { - static MachineMapping combine(MachineMapping const &, MachineMapping const &); - static bool nodes_are_disjoint(MachineMapping const &m1, - MachineMapping const &m2); - - req> machine_views; -}; -FF_VISITABLE_STRUCT(MachineMapping, machine_views); - -struct OptimalCostState { - SerialParallelDecomposition subgraph; - MachineSpecification resource; - std::unordered_map given_machine_views; - req> frontier_machine_views; -}; -FF_VISITABLE_STRUCT(OptimalCostState, - subgraph, - resource, - given_machine_views, - frontier_machine_views); +bool nodes_are_disjoint(MachineMapping const &m1, MachineMapping const &m2); struct OptimalCostResult { static OptimalCostResult sequential_combine(OptimalCostResult const &s1, @@ -60,26 +44,26 @@ class OptimalCostCache { std::unordered_map cache; }; -OptimalCostResult - optimal_cost(ParallelComputationGraph const &g, - std::function( - Operator const &, MachineSpecification const &)> const - &allowed_machine_views, - CostEstimator const &cost_estimator, - MachineSpecification const &resources, - OptimalCostCache &cached_subgraph_costs); +OptimalCostResult optimal_cost( + ParallelComputationGraph const &g, + std::function( + ParallelLayerAttrs const &, MachineSpecification const &)> const + &allowed_machine_views, + CostEstimator const &cost_estimator, + MachineSpecification const &resources, + OptimalCostCache &cached_subgraph_costs); } // namespace FlexFlow -namespace std { - -template <> -struct hash> { - size_t operator()( - std::unordered_map const &g) - const; -}; +// namespace std { +// +// template <> +// struct hash> { +// size_t operator()( +// std::unordered_map const &g) +// const; +// }; -}; // namespace std +// }; // namespace std #endif diff --git a/lib/compiler/include/compiler/machine_mapping.struct.toml b/lib/compiler/include/compiler/machine_mapping.struct.toml new file mode 100644 index 0000000000..4c4912a3fd --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping.struct.toml @@ -0,0 +1,21 @@ +namespace = "FlexFlow" +name = "MachineMapping" +features = [ + "eq", + # "ord", + "hash", + # "json", + # "rapidcheck", + "fmt", +] + +includes = [ + "utils/graph/node/node.dtg.h", + "pcg/machine_view.dtg.h", + "utils/hash/unordered_map.h", + "utils/fmt/unordered_map.h", +] + +[[fields]] +name = "machine_views" +type = "std::unordered_map<::FlexFlow::Node, ::FlexFlow::MachineView>" \ No newline at end of file diff --git a/lib/compiler/include/compiler/optimal_cost_state.struct.toml b/lib/compiler/include/compiler/optimal_cost_state.struct.toml new file mode 100644 index 0000000000..50496f661b --- /dev/null +++ b/lib/compiler/include/compiler/optimal_cost_state.struct.toml @@ -0,0 +1,36 @@ +namespace = "FlexFlow" +name = "OptimalCostState" +features = [ + "eq", + # "ord", + "hash", + # "json", + # "rapidcheck", + "fmt", +] + +includes = [ + "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h", + "pcg/machine_specification.dtg.h", + "pcg/machine_view.dtg.h", + "utils/graph/node/node.dtg.h", + "utils/graph/open_dataflow_graph/open_dataflow_edge.dtg.h", + "utils/fmt/unordered_map.h", + "utils/hash/unordered_map.h", +] + +[[fields]] +name = "subgraph" +type = "::FlexFlow::SerialParallelDecomposition" + +[[fields]] +name = "resource" +type = "::FlexFlow::MachineSpecification" + +[[fields]] +name = "given_machine_views" +type = "std::unordered_map<::FlexFlow::Node, ::FlexFlow::MachineView>" + +[[fields]] +name = "frontier_machine_views" +type = "std::unordered_map<::FlexFlow::OpenDataflowEdge, ::FlexFlow::MachineView>" \ No newline at end of file diff --git a/lib/compiler/include/compiler/unity_algorithm.h b/lib/compiler/include/compiler/unity_algorithm.h index 7d7a7a74dc..abddef37ed 100644 --- a/lib/compiler/include/compiler/unity_algorithm.h +++ b/lib/compiler/include/compiler/unity_algorithm.h @@ -1,17 +1,22 @@ #ifndef _FLEXFLOW_COMPILER_UNITY_ALGORITHM_H #define _FLEXFLOW_COMPILER_UNITY_ALGORITHM_H +#include "compiler/machine_mapping.h" #include "cost_estimate.h" #include "machine_mapping.h" #include "pcg/computation_graph.h" +#include "pcg/machine_specification.dtg.h" #include "substitutions/sub_parallel_computation_graph.h" - namespace FlexFlow { struct Strategy { ParallelComputationGraph pcg; MachineMapping machine_mapping; req runtime; + friend bool operator!=(Strategy const &lhs, Strategy const &rhs) { + return (lhs.machine_mapping != rhs.machine_mapping) || + (lhs.runtime != rhs.runtime); + } }; FF_VISITABLE_STRUCT(Strategy, pcg, machine_mapping, runtime); @@ -27,14 +32,14 @@ struct OptimizerConfig { int max_num_ops; }; -Strategy - graph_optimize(ComputationGraph &cg, - CostEstimator const &cost_estimator, - MachineSpecification const &resources, - std::function( - Operator const &, MachineSpecification const &)> const - &allowed_machine_views, - OptimizerConfig const &opt_config); +Strategy graph_optimize( + ComputationGraph &cg, + CostEstimator const &cost_estimator, + MachineSpecification const &resources, + std::function( + ParallelLayerAttrs const &, MachineSpecification const &)> const + &allowed_machine_views, + OptimizerConfig const &opt_config); } // namespace FlexFlow diff --git a/lib/compiler/src/graph_utils.cc b/lib/compiler/src/graph_utils.cc index 5b76beb8c0..08db219a21 100644 --- a/lib/compiler/src/graph_utils.cc +++ b/lib/compiler/src/graph_utils.cc @@ -1,63 +1,70 @@ -#include "graph_utils.h" - +#include "compiler/graph_utils.h" +#include "pcg/computation_graph.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" +#include "substitutions/sub_parallel_computation_graph.dtg.h" +#include "utils/containers/without_order.h" +#include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h" namespace FlexFlow { SerialParallelDecomposition get_serial_parallel_decomposition(ParallelComputationGraph const &pcg) { - return get_serial_parallel_decomposition(pcg.value()); + NOT_IMPLEMENTED(); + // return get_serial_parallel_decomposition(pcg.raw_graph); } ParallelComputationGraph cg_to_pcg(ComputationGraph const &g) { NOT_IMPLEMENTED(); } -SubParallelComputationGraphView - pcg_to_subpcg(ParallelComputationGraph const &pcg) { - return view_output_labelled_as_output_labelled_open(pcg.value()); +SubParallelComputationGraph pcg_to_subpcg(ParallelComputationGraph const &pcg) { + NOT_IMPLEMENTED(); + // return view_output_labelled_as_output_labelled_open(pcg.raw_graph); } -std::vector - get_sorted_node_input_edges(ParallelComputationGraph const &pcg, - Node const &n) { - std::unordered_map> incoming_edges = - get_incoming_edges_by_idx(pcg, n); +// std::vector +// get_sorted_node_input_edges(ParallelComputationGraph const &pcg, +// Node const &n) { +// std::unordered_map> +// incoming_edges = +// get_incoming_edges_by_idx(pcg, n); - std::vector result; - for (auto const &p_id_edge_set : incoming_edges) { - result.push_back(get_only(p_id_edge_set.second)); - } +// std::vector result; +// for (auto const &p_id_edge_set : incoming_edges) { +// result.push_back(get_only(p_id_edge_set.second)); +// } - return result; -} +// return result; +// } -std::unordered_map - infer_tensor_shapes(ParallelComputationGraph const &pcg) { - std::unordered_map result; - for (Node const &n : get_topological_ordering(pcg)) { - PCGOperatorAttrs op = pcg.value().at(n); +// std::unordered_map +// infer_tensor_shapes(ParallelComputationGraph const &pcg) { +// std::unordered_map result; +// for (Node const &n : get_topological_ordering(pcg)) { +// PCGOperatorAttrs op = pcg.raw_graph.at(n); - std::vector input_tensor_shapes = - vector_transform([&](MultiDiEdge const &e) { return result.at(e); }, - get_sorted_node_input_edges(pcg, n)); +// std::vector input_tensor_shapes = +// vector_transform([&](MultiDiEdge const &e) { return result.at(e); }, +// get_sorted_node_input_edges(pcg, n)); - std::vector output_tensor_shapes = - get_output_shapes(op, input_tensor_shapes); +// std::vector output_tensor_shapes = +// get_output_shapes(op, input_tensor_shapes); - auto outgoing_edges = get_outgoing_edges_by_idx(pcg, n); +// auto outgoing_edges = get_outgoing_edges_by_idx(pcg, n); - int i = 0; +// int i = 0; - for (auto const &[node_port, edges] : outgoing_edges) { - for (MultiDiEdge const &e : edges) { - result.insert({e, output_tensor_shapes[i++]}); - } - } - } +// for (auto const &[node_port, edges] : outgoing_edges) { +// for (MultiDiEdge const &e : edges) { +// result.insert({e, output_tensor_shapes[i++]}); +// } +// } +// } - assert(result.size() == get_edges(pcg.value()).size()); +// assert(result.size() == get_edges(pcg.raw_graph).size()); - return result; -} +// return result; +// } /* template /* } */ /* } */ -struct GetNodes { - template - std::unordered_set operator()(T const &t) { - return get_nodes(t); - } -}; - -std::unordered_set get_nodes(SerialParallelDecomposition const &sp) { - return visit(GetNodes{}, sp); -} - -std::unordered_set get_nodes(Serial const &serial) { - return set_union( - transform(serial.children, [](std::variant const child) { - return visit(GetNodes{}, child); - })); -} - -std::unordered_set get_nodes(Parallel const ¶llel) { - return set_union( - transform(parallel.children, [](std::variant const child) { - return visit(GetNodes{}, child); - })); -} - -std::unordered_set get_nodes(Node const &node) { - return {node}; -} +// struct GetNodes { +// template +// std::unordered_set operator()(T const &t) { +// return get_nodes(t); +// } +// }; + +// std::unordered_set get_nodes(SerialParallelDecomposition const &sp) { +// return std::visit(GetNodes{}, sp.raw_variant); +// } + +// std::unordered_set get_nodes(SerialSplit const &serial) { +// return set_union( +// transform(serial.children, [](std::variant const +// child) { +// return std::visit(GetNodes{}, child); +// })); +// } + +// std::unordered_set get_nodes(ParallelSplit const ¶llel) { +// return set_union( +// transform(parallel.children, [](std::variant const +// child) { +// return std::visit(GetNodes{}, child); +// })); +// } + +// std::unordered_set get_nodes(Node const &node) { +// return {node}; +// } } // namespace FlexFlow diff --git a/lib/compiler/src/machine_mapping.cc b/lib/compiler/src/machine_mapping.cc index 2b08e9fe23..12eacb2a30 100644 --- a/lib/compiler/src/machine_mapping.cc +++ b/lib/compiler/src/machine_mapping.cc @@ -1,36 +1,48 @@ #include "compiler/machine_mapping.h" #include "compiler/cost_estimate.h" -#include "graph_utils.h" -#include "pcg/parallel_computation_graph.h" +#include "compiler/graph_utils.h" +#include "pcg/machine_specification.dtg.h" +#include "pcg/machine_specification.h" +#include "pcg/machine_view.dtg.h" +#include "pcg/machine_view.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" +#include "utils/containers.h" +#include "utils/containers/are_disjoint.h" +#include "utils/containers/as_vector.h" +#include "utils/containers/contains_key.h" +#include "utils/containers/get_only.h" +#include "utils/containers/keys.h" #include "utils/exception.h" -#include "utils/graph/serialparallel.h" +#include "utils/graph/graph_split.dtg.h" +#include "utils/graph/node/algorithms.h" +#include "utils/graph/open_dataflow_graph/algorithms.h" +#include "utils/graph/open_dataflow_graph/algorithms/get_subgraph.h" +#include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h" +#include "utils/graph/serial_parallel/serial_parallel_decomposition.h" +#include "utils/graph/serial_parallel/serial_parallel_splits.h" namespace FlexFlow { -MachineMapping MachineMapping::combine(MachineMapping const &s1, - MachineMapping const &s2) { +MachineMapping combine(MachineMapping const &s1, MachineMapping const &s2) { return MachineMapping{merge_maps(s1.machine_views, s2.machine_views)}; } -bool MachineMapping::nodes_are_disjoint(MachineMapping const &m1, - MachineMapping const &m2) { +bool nodes_are_disjoint(MachineMapping const &m1, MachineMapping const &m2) { return are_disjoint(keys(m1.machine_views), keys(m2.machine_views)); } OptimalCostResult OptimalCostResult::sequential_combine(OptimalCostResult const &s1, OptimalCostResult const &s2) { - return OptimalCostResult{ - s1.runtime + s2.runtime, - MachineMapping::combine(s1.machine_mapping, s2.machine_mapping)}; + return OptimalCostResult{s1.runtime + s2.runtime, + combine(s1.machine_mapping, s2.machine_mapping)}; } OptimalCostResult OptimalCostResult::parallel_combine(OptimalCostResult const &s1, OptimalCostResult const &s2) { - return OptimalCostResult{ - std::max(s1.runtime, s2.runtime), - MachineMapping::combine(s1.machine_mapping, s2.machine_mapping)}; + return OptimalCostResult{std::max(s1.runtime, s2.runtime), + combine(s1.machine_mapping, s2.machine_mapping)}; } OptimalCostResult OptimalCostResult::infinity() { @@ -71,42 +83,60 @@ std::vector> } // We may replace this by having unflattened AST -template std::pair - decompose(T const &t) { - if (t.children.size() == 2) { - return {widen(t.children[0]), - widen(t.children[1])}; + decompose(SerialSplit const &serial) { + if (serial.children.size() == 2) { + return {widen(serial.children[0]), + widen(serial.children[1])}; } - T decompn1 = t; + SerialSplit decompn1 = serial; decompn1.children.pop_back(); - return {decompn1, widen(t.children.back())}; + return {SerialParallelDecomposition(decompn1), + widen(serial.children.back())}; +} + +std::pair + decompose(ParallelSplit const ¶llel) { + if (parallel.children.size() == 2) { + std::vector children = + transform(as_vector(parallel.children), [&](auto const &child) { + return widen(child); + }); + return {children[0], children[1]}; + } + ParallelSplit decompn1 = parallel; + std::variant child = *parallel.children.begin(); + decompn1.children.erase(child); + return {SerialParallelDecomposition(decompn1), + widen(child)}; } GraphSplit get_graph_split(SerialParallelDecomposition const &pre_decomposition, SerialParallelDecomposition const &post_decomposition) { - return {get_nodes(pre_decomposition), get_nodes(post_decomposition)}; + return GraphSplit{get_nodes(pre_decomposition), + get_nodes(post_decomposition)}; } -float estimate_cost(SubParallelComputationGraphView const &g, +float estimate_cost(SubParallelComputationGraph const &g, CostEstimator const &estimator, MachineMapping const &device_mapping, - std::unordered_map const + std::unordered_map const &frontier_machine_views) { // TODO: Consider parallelism float cost = 0; - for (Node const &node : get_nodes(g)) { - std::unordered_set incoming_edges = - get_incoming_edges(g, node); - std::vector inputs = - transform(as_vector(incoming_edges), - [&](UpwardOpenMultiDiEdge const &input_edge) { - return g.at(input_edge).get_shape(); - }); - cost += estimator.estimate_cost( - g.at(node).attrs, inputs, device_mapping.machine_views.at(node)); - } + // for (Node const &node : get_nodes(g.raw_graph)) { + // std::vector incoming_edges = + // get_incoming_edges(g.raw_graph, node); + // std::vector inputs = + // transform(incoming_edges, + // [&](OpenDataflowEdge const &input_edge) { + // return g.raw_graph.at(input_edge).get_shape(); + // }); + // cost += estimator.estimate_cost( + // g.raw_graph.at(node).op_attrs, inputs, + // device_mapping.machine_views.at(node)); + // } return cost; } @@ -118,7 +148,7 @@ struct MachineMappingSearcher { MachineMappingSearcher( CostEstimator cost_estimator, std::function( - Operator const &, MachineSpecification const &)> const + ParallelLayerAttrs const &, MachineSpecification const &)> const &allowed_machine_views, OptimalCostCache &cached_subgraph_costs) : cost_estimator(cost_estimator), @@ -126,7 +156,7 @@ struct MachineMappingSearcher { cached_subgraph_costs(cached_subgraph_costs) {} CostEstimator cost_estimator; - std::function(Operator const &, + std::function(ParallelLayerAttrs const &, MachineSpecification const &)> allowed_machine_views; OptimalCostCache &cached_subgraph_costs; @@ -134,24 +164,27 @@ struct MachineMappingSearcher { struct OptimalCostFunctor { OptimalCostFunctor( MachineMappingSearcher *searcher, - SubParallelComputationGraphView const &g, + SubParallelComputationGraph const &g, MachineSpecification resource, std::unordered_map given_machine_views, - std::unordered_map frontier_machine_views) + std::unordered_map + frontier_machine_views) : searcher(searcher), g(g), resource(resource), given_machine_views(given_machine_views), frontier_machine_views(frontier_machine_views) {} MachineMappingSearcher *searcher; - SubParallelComputationGraphView const &g; + SubParallelComputationGraph const &g; MachineSpecification resource; std::unordered_map given_machine_views; - std::unordered_map frontier_machine_views; + std::unordered_map frontier_machine_views; template OptimalCostResult operator()(T const &t) { - OptimalCostState state{ - t, resource, given_machine_views, frontier_machine_views}; + OptimalCostState state{SerialParallelDecomposition{t}, + resource, + given_machine_views, + frontier_machine_views}; std::optional cached_result = searcher->cached_subgraph_costs.load(state); @@ -167,135 +200,137 @@ struct MachineMappingSearcher { }; OptimalCostResult - optimal_cost(SubParallelComputationGraphView const &g, + optimal_cost(SubParallelComputationGraph const &g, MachineSpecification resource, SerialParallelDecomposition const &sp_decomposition) { - return visit(OptimalCostFunctor(this, g, resource, {}, {}), - sp_decomposition); + return std::visit(OptimalCostFunctor(this, g, resource, {}, {}), + sp_decomposition.raw_variant); } OptimalCostResult optimal_cost( - Serial const &serial, - SubParallelComputationGraphView const &g, + SerialSplit const &serial, + SubParallelComputationGraph const &g, MachineSpecification const &resource, std::unordered_map const &given_machine_views, - std::unordered_map const + std::unordered_map const &frontier_machine_views) { - - auto decomposed = decompose(serial); - SerialParallelDecomposition pre_decompn = decomposed.first; - SerialParallelDecomposition post_decompn = decomposed.second; - - GraphSplit graph_split = get_graph_split(pre_decompn, post_decompn); - SubParallelComputationGraphView pre_graph = - get_subgraph(g, graph_split.first); - SubParallelComputationGraphView post_graph = - get_subgraph(g, graph_split.second); - - std::unordered_set post_graph_sources = - get_closed_sources(post_graph); - - assert(post_graph_sources.size() == 1); // assume perfect SP - - Node split_point = get_only(post_graph_sources); - OutputMultiDiEdge split_edge = get_only(get_open_outputs(pre_graph)); - - OptimalCostResult optimal_result = OptimalCostResult::infinity(); - - for (MachineView const &mv : - allowed_machine_views(g.at(split_point), resource)) { - std::unordered_map new_given_machine_views = - given_machine_views; - new_given_machine_views.emplace(split_point, mv); - std::unordered_map - new_frontier_machine_views = frontier_machine_views; - new_frontier_machine_views.emplace(split_edge, mv); - minimize_runtime(optimal_result, - OptimalCostResult::sequential_combine( - visit(OptimalCostFunctor(this, - pre_graph, - resource, - given_machine_views, - new_frontier_machine_views), - pre_decompn), - visit(OptimalCostFunctor(this, - post_graph, - resource, - new_given_machine_views, - frontier_machine_views), - post_decompn))); - } - - return optimal_result; + NOT_IMPLEMENTED(); + // OptimalCostResult optimal_result = OptimalCostResult::infinity(); + + // auto decomposed = decompose(serial); + // SerialParallelDecomposition pre_decompn = decomposed.first; + // SerialParallelDecomposition post_decompn = decomposed.second; + + // GraphSplit graph_split = get_graph_split(pre_decompn, post_decompn); + // SubParallelComputationGraph pre_graph = + // get_subgraph(g, graph_split.first); + // SubParallelComputationGraph post_graph = + // get_subgraph(g, graph_split.second); + + // std::unordered_set post_graph_sources = + // get_closed_sources(post_graph); + + // assert(post_graph_sources.size() == 1); // assume perfect SP + + // Node split_point = get_only(post_graph_sources); + // OutputMultiDiEdge split_edge = get_only(get_open_outputs(pre_graph)); + + // for (MachineView const &mv : + // allowed_machine_views(g.raw_graph.at(split_point), resource)) { + // std::unordered_map new_given_machine_views = + // given_machine_views; + // new_given_machine_views.emplace(split_point, mv); + // std::unordered_map + // new_frontier_machine_views = frontier_machine_views; + // new_frontier_machine_views.emplace(split_edge, mv); + // minimize_runtime( + // optimal_result, + // OptimalCostResult::sequential_combine( + // std::visit(OptimalCostFunctor(this, + // pre_graph, + // resource, + // given_machine_views, + // new_frontier_machine_views), + // pre_decompn.raw_variant), + // std::visit(OptimalCostFunctor(this, + // post_graph, + // resource, + // new_given_machine_views, + // frontier_machine_views), + // post_decompn.raw_variant))); + // } + + // return optimal_result; } OptimalCostResult optimal_cost( - Parallel const ¶llel, - SubParallelComputationGraphView const &g, + ParallelSplit const ¶llel, + SubParallelComputationGraph const &g, MachineSpecification const &resource, std::unordered_map const &given_machine_views, - std::unordered_map const + std::unordered_map const &frontier_machine_views) { - auto decomposed = decompose(parallel); - SerialParallelDecomposition decompn1 = decomposed.first; - SerialParallelDecomposition decompn2 = decomposed.second; - - GraphSplit graph_split = get_graph_split(decompn1, decompn2); - SubParallelComputationGraphView g1 = get_subgraph( - g, graph_split.first), - g2 = get_subgraph( - g, graph_split.second); - - OptimalCostResult optimal_result = OptimalCostResult::sequential_combine( - visit(OptimalCostFunctor(this, - g1, - resource, - given_machine_views, - frontier_machine_views), - decompn1), - visit(OptimalCostFunctor(this, - g2, - resource, - given_machine_views, - frontier_machine_views), - decompn2)); - - for (auto const &resource_split : get_resource_split(resource)) { - minimize_runtime(optimal_result, - OptimalCostResult::parallel_combine( - visit(OptimalCostFunctor(this, - g1, - resource_split.first, - given_machine_views, - frontier_machine_views), - decompn1), - visit(OptimalCostFunctor(this, - g2, - resource_split.second, - given_machine_views, - frontier_machine_views), - decompn2))); - } - return optimal_result; + NOT_IMPLEMENTED(); + // auto decomposed = decompose(parallel); + // SerialParallelDecomposition decompn1 = decomposed.first; + // SerialParallelDecomposition decompn2 = decomposed.second; + + // GraphSplit graph_split = get_graph_split(decompn1, decompn2); + // SubParallelComputationGraph g1 = get_subgraph(g, graph_split.first), + // g2 = get_subgraph(g, graph_split.second); + + // OptimalCostResult optimal_result = OptimalCostResult::sequential_combine( + // std::visit(OptimalCostFunctor(this, + // g1, + // resource, + // given_machine_views, + // frontier_machine_views), + // decompn1.raw_variant), + // std::visit(OptimalCostFunctor(this, + // g2, + // resource, + // given_machine_views, + // frontier_machine_views), + // decompn2.raw_variant)); + + // for (auto const &resource_split : get_resource_split(resource)) { + // minimize_runtime( + // optimal_result, + // OptimalCostResult::parallel_combine( + // std::visit(OptimalCostFunctor(this, + // g1, + // resource_split.first, + // given_machine_views, + // frontier_machine_views), + // decompn1.raw_variant), + // std::visit(OptimalCostFunctor(this, + // g2, + // resource_split.second, + // given_machine_views, + // frontier_machine_views), + // decompn2.raw_variant))); + // } + + // return optimal_result; } OptimalCostResult optimal_cost( Node const &node, - SubParallelComputationGraphView const &g, + SubParallelComputationGraph const &g, MachineSpecification const &resource, std::unordered_map const &given_machine_views, - std::unordered_map const + std::unordered_map const &frontier_machine_views) { if (contains_key(given_machine_views, node)) { - assert(contains(allowed_machine_views(g.at(node), resource), + assert(contains(allowed_machine_views(g.raw_graph.at(node), resource), given_machine_views.at(node))); MachineMapping mv_map{given_machine_views}; return {estimate_cost(g, cost_estimator, mv_map, frontier_machine_views), mv_map}; } else { OptimalCostResult optimal_result = OptimalCostResult::infinity(); - for (auto mv : allowed_machine_views(g.at(node), resource)) { + for (auto mv : allowed_machine_views(g.raw_graph.at(node), resource)) { MachineMapping mv_map{{{node, mv}}}; minimize_runtime( optimal_result, @@ -307,17 +342,17 @@ struct MachineMappingSearcher { } }; -OptimalCostResult - optimal_cost(ParallelComputationGraph const &g, - std::function( - Operator const &, MachineSpecification const &)> const - &allowed_machine_views, - CostEstimator const &cost_estimator, - MachineSpecification const &resources, - OptimalCostCache &cached_subgraph_costs) { +OptimalCostResult optimal_cost( + ParallelComputationGraph const &g, + std::function( + ParallelLayerAttrs const &, MachineSpecification const &)> const + &allowed_machine_views, + CostEstimator const &cost_estimator, + MachineSpecification const &resources, + OptimalCostCache &cached_subgraph_costs) { SerialParallelDecomposition sp_decomposition = get_serial_parallel_decomposition(g); - SubParallelComputationGraphView subpcg = pcg_to_subpcg(g); + SubParallelComputationGraph subpcg = pcg_to_subpcg(g); MachineMappingSearcher searcher( cost_estimator, allowed_machine_views, cached_subgraph_costs); return searcher.optimal_cost(subpcg, resources, sp_decomposition); diff --git a/lib/compiler/src/unity_algorithm.cc b/lib/compiler/src/unity_algorithm.cc index c9666851db..ba6ef28daa 100644 --- a/lib/compiler/src/unity_algorithm.cc +++ b/lib/compiler/src/unity_algorithm.cc @@ -1,8 +1,10 @@ #include "compiler/unity_algorithm.h" -#include "graph_utils.h" +#include "compiler/graph_utils.h" +#include "compiler/machine_mapping.h" +#include "pcg/machine_specification.dtg.h" #include "substitutions/substitution.h" #include "utils/deduplicated_priority_queue.h" - +#include "utils/graph/node/algorithms.h" namespace FlexFlow { bool StrategyRuntimeCmp::operator()(Strategy const &lhs, Strategy const &rhs) { @@ -26,63 +28,67 @@ std::unordered_set NOT_IMPLEMENTED(); } -Strategy - graph_optimize(ComputationGraph &cg, - CostEstimator const &cost_estimator, - MachineSpecification const &resources, - std::function( - Operator const &, MachineSpecification const &)> const - &allowed_machine_views, - OptimizerConfig const &opt_config) { - - ParallelComputationGraph pcg = cg_to_pcg(cg); +Strategy graph_optimize( + ComputationGraph &cg, + CostEstimator const &cost_estimator, + MachineSpecification const &resources, + std::function( + ParallelLayerAttrs const &, MachineSpecification const &)> const + &allowed_machine_views, + OptimizerConfig const &opt_config) { + NOT_IMPLEMENTED(); + // ParallelComputationGraph pcg = cg_to_pcg(cg); - std::unordered_set subs = get_all_applicable_substitutions(pcg); + // std::unordered_set subs = + // get_all_applicable_substitutions(pcg); - OptimalCostCache cached_subgraph_costs; - DeduplicatedPriorityQueue, StrategyRuntimeCmp> - candidates; + // OptimalCostCache cached_subgraph_costs; + // DeduplicatedPriorityQueue, + // StrategyRuntimeCmp> + // candidates; - OptimalCostResult initial_pcg_result = optimal_cost(pcg, - allowed_machine_views, - cost_estimator, - resources, - cached_subgraph_costs); - Strategy initial_result{ - pcg, initial_pcg_result.machine_mapping, initial_pcg_result.runtime}; + // OptimalCostResult initial_pcg_result = optimal_cost(pcg, + // allowed_machine_views, + // cost_estimator, + // resources, + // cached_subgraph_costs); + // Strategy initial_result{ + // pcg, initial_pcg_result.machine_mapping, initial_pcg_result.runtime}; - Strategy best_result = initial_result; - candidates.push(initial_result); + // Strategy best_result = initial_result; + // candidates.push(initial_result); - for (int iteration = 0; !candidates.empty() && iteration < opt_config.budget; - ++iteration) { - Strategy const ¤t_result = candidates.top(); - candidates.pop(); + // for (int iteration = 0; !candidates.empty() && iteration < + // opt_config.budget; + // ++iteration) { + // Strategy const ¤t_result = candidates.top(); + // candidates.pop(); - if (current_result.runtime < best_result.runtime) { - best_result = current_result; - } else if (current_result.runtime > - best_result.runtime * opt_config.alpha) { - continue; - } + // if (current_result.runtime < best_result.runtime) { + // best_result = current_result; + // } else if (current_result.runtime > + // best_result.runtime * opt_config.alpha) { + // continue; + // } - for (auto const &sub : subs) { - for (auto const &new_pcg : apply_substitution(current_result.pcg, sub)) { - OptimalCostResult c = optimal_cost(new_pcg, - allowed_machine_views, - cost_estimator, - resources, - cached_subgraph_costs); - Strategy new_result{new_pcg, c.machine_mapping, c.runtime}; - if (new_result.runtime <= opt_config.threshold && - get_nodes(new_pcg.value()).size() <= opt_config.max_num_ops) { - candidates.push(new_result); - } - } - } - } + // for (auto const &sub : subs) { + // for (auto const &new_pcg : apply_substitution(current_result.pcg, sub)) + // { + // OptimalCostResult c = optimal_cost(new_pcg, + // allowed_machine_views, + // cost_estimator, + // resources, + // cached_subgraph_costs); + // Strategy new_result{new_pcg, c.machine_mapping, c.runtime}; + // if (new_result.runtime <= opt_config.threshold && + // get_nodes(new_pcg.raw_graph).size() <= opt_config.max_num_ops) { + // candidates.push(new_result); + // } + // } + // } + // } - return best_result; + // return best_result; } } // namespace FlexFlow diff --git a/lib/compiler/test/src/test_cost_estimator.h b/lib/compiler/test/src/test_cost_estimator.h index 9a4ea56156..9417b863e4 100644 --- a/lib/compiler/test/src/test_cost_estimator.h +++ b/lib/compiler/test/src/test_cost_estimator.h @@ -8,6 +8,8 @@ namespace FlexFlow { struct TestCostEstimator : public ICostEstimator { float estimate_cost(PCGOperatorAttrs const &op, std::vector const &inputs, + std::vector const &weights, + std::vector const &outputs, MachineView const &mv) const override { return 0.1; } diff --git a/lib/compiler/test/src/test_labelled_open_graph.cc b/lib/compiler/test/src/test_labelled_open_graph.cc index ccad7b19ff..59fa0f1e5e 100644 --- a/lib/compiler/test/src/test_labelled_open_graph.cc +++ b/lib/compiler/test/src/test_labelled_open_graph.cc @@ -1,130 +1,132 @@ -#include "compiler/unity_algorithm.h" -#include "doctest/doctest.h" -// #include "rapidcheck.h" - -using namespace FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("get_subgraph(OpenMultiDiGraphView)") { - auto g = OpenMultiDiGraph::create(); - - Node n0 = g.add_node(); - Node n1 = g.add_node(); - Node n2 = g.add_node(); - Node n3 = g.add_node(); - Node n4 = g.add_node(); - - NodePort p0 = g.add_node_port(); - NodePort p1 = g.add_node_port(); - NodePort p2 = g.add_node_port(); - NodePort p3 = g.add_node_port(); - NodePort p4 = g.add_node_port(); - NodePort p5 = g.add_node_port(); - NodePort p6 = g.add_node_port(); - NodePort p7 = g.add_node_port(); - NodePort p8 = g.add_node_port(); - NodePort p9 = g.add_node_port(); - - MultiDiEdge e0{n1, p1, n0, p0}; - MultiDiEdge e1{n2, p2, n0, p0}; - MultiDiEdge e2{n3, p5, n1, p3}; - MultiDiEdge e3{n3, p6, n2, p4}; - MultiDiEdge e4{n4, p8, n3, p7}; - OutputMultiDiEdge e5{n4, p9, std::make_pair(p9.value(), p9.value())}; - - g.add_edge(e0); - g.add_edge(e1); - g.add_edge(e2); - g.add_edge(e3); - g.add_edge(e4); - g.add_edge(e5); - - std::unordered_set node_set0{n3, n4}; - - auto subgraph0 = get_subgraph(g, node_set0); - auto subgraph1 = get_subgraph(g, node_set0); - auto subgraph2 = - get_subgraph(g, node_set0); - auto subgraph3 = get_subgraph(g, node_set0); - - CHECK(bool(get_nodes(subgraph0) == node_set0)); - CHECK(bool(get_nodes(subgraph1) == node_set0)); - CHECK(bool(get_nodes(subgraph2) == node_set0)); - CHECK(bool(get_nodes(subgraph3) == node_set0)); - - std::unordered_set input_set{split_edge(e2).second, - split_edge(e3).second}; - std::unordered_set output_set{e5}; - - CHECK(bool(get_open_inputs(subgraph0) == input_set)); - CHECK(bool(get_open_inputs(subgraph1) == input_set)); - CHECK(bool(get_open_inputs(subgraph2).empty())); - CHECK(bool(get_open_inputs(subgraph3).empty())); - - CHECK(bool(get_open_outputs(subgraph0) == output_set)); - CHECK(bool(get_open_outputs(subgraph1).empty())); - CHECK(bool(get_open_outputs(subgraph2) == output_set)); - CHECK(bool(get_open_outputs(subgraph3).empty())); - - CHECK(bool(get_edges(subgraph0) == - std::unordered_set{ - split_edge(e2).second, split_edge(e3).second, e4, e5})); - CHECK(bool(get_edges(subgraph1) == - std::unordered_set{ - split_edge(e2).second, split_edge(e3).second, e4})); - CHECK(bool(get_edges(subgraph2) == - std::unordered_set{e4, e5})); - CHECK( - bool(get_edges(subgraph3) == std::unordered_set{e4})); - - CHECK(bool(get_closed_sources(subgraph2) == std::unordered_set{n3})); - } - - TEST_CASE("view OutputLabelledMultiDiGraph as open") { - OutputLabelledMultiDiGraph g = - OutputLabelledMultiDiGraph::create< - UnorderedOutputLabelledMultiDiGraph>(); - - Node n0 = g.add_node(0); - Node n1 = g.add_node(1); - - NodePort p0 = g.add_node_port(); - NodePort p1 = g.add_node_port(); - - MultiDiEdge e0{n1, p1, n0, p0}; - - g.add_edge(e0); - g.add_output(e0, 2); - - CHECK(bool(get_edges(g).size() == 1)); - - OutputLabelledOpenMultiDiGraphView open_graph = - view_output_labelled_as_output_labelled_open(g); - - CHECK(bool(open_graph.at(n0) == 0)); - CHECK(bool(open_graph.at(n1) == 1)); - CHECK(bool(open_graph.at(e0) == 2)); - - CHECK(get_edges(open_graph).size() == 1); - } - - TEST_CASE("OutputLabelledOpenMultiDiGraph") { - OutputLabelledOpenMultiDiGraph g = - OutputLabelledOpenMultiDiGraph::create< - UnorderedOutputLabelledOpenMultiDiGraph>(); - - Node n0 = g.add_node(0); - Node n1 = g.add_node(1); - - NodePort p0 = g.add_node_port(); - NodePort p1 = g.add_node_port(); - - MultiDiEdge e0{n1, p1, n0, p0}; - - g.add_edge(e0); - g.add_label(e0, 2); - - CHECK(bool(g.query_edges(OpenMultiDiEdgeQuery::all()).size() == 1)); - CHECK(bool(get_edges(g).size() == 1)); - } -} +// #include "compiler/unity_algorithm.h" +// #include "doctest/doctest.h" +// // #include "rapidcheck.h" + +// using namespace FlexFlow; + +// TEST_SUITE(FF_TEST_SUITE) { +// TEST_CASE("get_subgraph(OpenMultiDiGraphView)") { +// auto g = OpenMultiDiGraph::create(); + +// Node n0 = g.add_node(); +// Node n1 = g.add_node(); +// Node n2 = g.add_node(); +// Node n3 = g.add_node(); +// Node n4 = g.add_node(); + +// NodePort p0 = g.add_node_port(); +// NodePort p1 = g.add_node_port(); +// NodePort p2 = g.add_node_port(); +// NodePort p3 = g.add_node_port(); +// NodePort p4 = g.add_node_port(); +// NodePort p5 = g.add_node_port(); +// NodePort p6 = g.add_node_port(); +// NodePort p7 = g.add_node_port(); +// NodePort p8 = g.add_node_port(); +// NodePort p9 = g.add_node_port(); + +// MultiDiEdge e0{n1, p1, n0, p0}; +// MultiDiEdge e1{n2, p2, n0, p0}; +// MultiDiEdge e2{n3, p5, n1, p3}; +// MultiDiEdge e3{n3, p6, n2, p4}; +// MultiDiEdge e4{n4, p8, n3, p7}; +// OutputMultiDiEdge e5{n4, p9, std::make_pair(p9.value(), p9.value())}; + +// g.add_edge(e0); +// g.add_edge(e1); +// g.add_edge(e2); +// g.add_edge(e3); +// g.add_edge(e4); +// g.add_edge(e5); + +// std::unordered_set node_set0{n3, n4}; + +// auto subgraph0 = get_subgraph(g, node_set0); +// auto subgraph1 = get_subgraph(g, +// node_set0); auto subgraph2 = +// get_subgraph(g, node_set0); +// auto subgraph3 = get_subgraph(g, node_set0); + +// CHECK(bool(get_nodes(subgraph0) == node_set0)); +// CHECK(bool(get_nodes(subgraph1) == node_set0)); +// CHECK(bool(get_nodes(subgraph2) == node_set0)); +// CHECK(bool(get_nodes(subgraph3) == node_set0)); + +// std::unordered_set input_set{split_edge(e2).second, +// split_edge(e3).second}; +// std::unordered_set output_set{e5}; + +// CHECK(bool(get_open_inputs(subgraph0) == input_set)); +// CHECK(bool(get_open_inputs(subgraph1) == input_set)); +// CHECK(bool(get_open_inputs(subgraph2).empty())); +// CHECK(bool(get_open_inputs(subgraph3).empty())); + +// CHECK(bool(get_open_outputs(subgraph0) == output_set)); +// CHECK(bool(get_open_outputs(subgraph1).empty())); +// CHECK(bool(get_open_outputs(subgraph2) == output_set)); +// CHECK(bool(get_open_outputs(subgraph3).empty())); + +// CHECK(bool(get_edges(subgraph0) == +// std::unordered_set{ +// split_edge(e2).second, split_edge(e3).second, e4, e5})); +// CHECK(bool(get_edges(subgraph1) == +// std::unordered_set{ +// split_edge(e2).second, split_edge(e3).second, e4})); +// CHECK(bool(get_edges(subgraph2) == +// std::unordered_set{e4, e5})); +// CHECK( +// bool(get_edges(subgraph3) == +// std::unordered_set{e4})); + +// CHECK(bool(get_closed_sources(subgraph2) == +// std::unordered_set{n3})); +// } + +// TEST_CASE("view OutputLabelledMultiDiGraph as open") { +// OutputLabelledMultiDiGraph g = +// OutputLabelledMultiDiGraph::create< +// UnorderedOutputLabelledMultiDiGraph>(); + +// Node n0 = g.add_node(0); +// Node n1 = g.add_node(1); + +// NodePort p0 = g.add_node_port(); +// NodePort p1 = g.add_node_port(); + +// MultiDiEdge e0{n1, p1, n0, p0}; + +// g.add_edge(e0); +// g.add_output(e0, 2); + +// CHECK(bool(get_edges(g).size() == 1)); + +// OutputLabelledOpenMultiDiGraphView open_graph = +// view_output_labelled_as_output_labelled_open(g); + +// CHECK(bool(open_graph.at(n0) == 0)); +// CHECK(bool(open_graph.at(n1) == 1)); +// CHECK(bool(open_graph.at(e0) == 2)); + +// CHECK(get_edges(open_graph).size() == 1); +// } + +// TEST_CASE("OutputLabelledOpenMultiDiGraph") { +// OutputLabelledOpenMultiDiGraph g = +// OutputLabelledOpenMultiDiGraph::create< +// UnorderedOutputLabelledOpenMultiDiGraph>(); + +// Node n0 = g.add_node(0); +// Node n1 = g.add_node(1); + +// NodePort p0 = g.add_node_port(); +// NodePort p1 = g.add_node_port(); + +// MultiDiEdge e0{n1, p1, n0, p0}; + +// g.add_edge(e0); +// g.add_label(e0, 2); + +// CHECK(bool(g.query_edges(OpenMultiDiEdgeQuery::all()).size() == 1)); +// CHECK(bool(get_edges(g).size() == 1)); +// } +// } diff --git a/lib/compiler/test/src/test_open_graph.cc b/lib/compiler/test/src/test_open_graph.cc index db3630d316..e3426aa293 100644 --- a/lib/compiler/test/src/test_open_graph.cc +++ b/lib/compiler/test/src/test_open_graph.cc @@ -1,76 +1,81 @@ -#include "compiler/unity_algorithm.h" -#include "doctest/doctest.h" -#include "utils/graph/algorithms.h" - -using namespace FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("get_source_sink_open_graph") { - OpenMultiDiGraph g = OpenMultiDiGraph::create(); - - Node n0 = g.add_node(); - NodePort p0 = g.add_node_port(); - InputMultiDiEdge e0{ - n0, g.add_node_port(), std::make_pair(n0.value(), n0.value())}; - g.add_edge(e0); - - CHECK(bool(get_closed_sources(g) == std::unordered_set{})); - CHECK(bool(get_closed_sinks(g) == std::unordered_set{n0})); - - CHECK(bool(get_open_sources(g) == std::unordered_set{n0})); - CHECK(bool(get_open_sinks(g) == std::unordered_set{})); - } - - TEST_CASE("get_source_sink_open_graph:unconnected") { - OpenMultiDiGraph g = OpenMultiDiGraph::create(); - - Node n0 = g.add_node(); - Node n1 = g.add_node(); - - NodePort p0 = g.add_node_port(); - NodePort p1 = g.add_node_port(); - - InputMultiDiEdge e0{n0, p0, std::make_pair(p0.value(), p0.value())}; - OutputMultiDiEdge e1{n1, p1, std::make_pair(p1.value(), p1.value())}; - g.add_edge(e0); - g.add_edge(e1); - - /* - g: ->n0 - n1-> - */ - - CHECK(bool(get_closed_sources(g) == std::unordered_set{n1})); - CHECK(bool(get_closed_sinks(g) == std::unordered_set{n0})); - - CHECK(bool(get_open_sources(g) == std::unordered_set{n0})); - CHECK(bool(get_open_sinks(g) == std::unordered_set{n1})); - } - - TEST_CASE("get_cut") { - auto g = OpenMultiDiGraph::create(); - - std::vector ns = add_nodes(g, 5); - - MultiDiEdge e0{ns[1], g.add_node_port(), ns[0], g.add_node_port()}; - MultiDiEdge e1{ns[2], g.add_node_port(), ns[1], g.add_node_port()}; - MultiDiEdge e2{ns[3], g.add_node_port(), ns[1], g.add_node_port()}; - MultiDiEdge e3{ns[4], g.add_node_port(), ns[2], g.add_node_port()}; - MultiDiEdge e4{ns[4], g.add_node_port(), ns[3], g.add_node_port()}; - OutputMultiDiEdge e5{ - ns[4], g.add_node_port(), std::make_pair(ns[4].value(), ns[4].value())}; - - g.add_edge(e0); - g.add_edge(e1); - g.add_edge(e2); - g.add_edge(e3); - g.add_edge(e4); - g.add_edge(e5); - - GraphSplit gs0{{ns[0], ns[1]}, {ns[2], ns[3], ns[4]}}; - CHECK(bool(get_cut_set(g, gs0) == std::unordered_set{e1, e2})); - - GraphSplit gs1{{ns[0], ns[1], ns[2], ns[3]}, {ns[4]}}; - CHECK(bool(get_cut_set(g, gs1) == std::unordered_set{e3, e4})); - } -} +// #include "compiler/unity_algorithm.h" +// #include "doctest/doctest.h" +// #include "utils/graph/algorithms.h" + +// using namespace FlexFlow; + +// TEST_SUITE(FF_TEST_SUITE) { +// TEST_CASE("get_source_sink_open_graph") { +// OpenMultiDiGraph g = +// OpenMultiDiGraph::create(); + +// Node n0 = g.add_node(); +// NodePort p0 = g.add_node_port(); +// InputMultiDiEdge e0{ +// n0, g.add_node_port(), std::make_pair(n0.value(), n0.value())}; +// g.add_edge(e0); + +// CHECK(bool(get_closed_sources(g) == std::unordered_set{})); +// CHECK(bool(get_closed_sinks(g) == std::unordered_set{n0})); + +// CHECK(bool(get_open_sources(g) == std::unordered_set{n0})); +// CHECK(bool(get_open_sinks(g) == std::unordered_set{})); +// } + +// TEST_CASE("get_source_sink_open_graph:unconnected") { +// OpenMultiDiGraph g = +// OpenMultiDiGraph::create(); + +// Node n0 = g.add_node(); +// Node n1 = g.add_node(); + +// NodePort p0 = g.add_node_port(); +// NodePort p1 = g.add_node_port(); + +// InputMultiDiEdge e0{n0, p0, std::make_pair(p0.value(), p0.value())}; +// OutputMultiDiEdge e1{n1, p1, std::make_pair(p1.value(), p1.value())}; +// g.add_edge(e0); +// g.add_edge(e1); + +// /* +// g: ->n0 +// n1-> +// */ + +// CHECK(bool(get_closed_sources(g) == std::unordered_set{n1})); +// CHECK(bool(get_closed_sinks(g) == std::unordered_set{n0})); + +// CHECK(bool(get_open_sources(g) == std::unordered_set{n0})); +// CHECK(bool(get_open_sinks(g) == std::unordered_set{n1})); +// } + +// TEST_CASE("get_cut") { +// auto g = OpenMultiDiGraph::create(); + +// std::vector ns = add_nodes(g, 5); + +// MultiDiEdge e0{ns[1], g.add_node_port(), ns[0], g.add_node_port()}; +// MultiDiEdge e1{ns[2], g.add_node_port(), ns[1], g.add_node_port()}; +// MultiDiEdge e2{ns[3], g.add_node_port(), ns[1], g.add_node_port()}; +// MultiDiEdge e3{ns[4], g.add_node_port(), ns[2], g.add_node_port()}; +// MultiDiEdge e4{ns[4], g.add_node_port(), ns[3], g.add_node_port()}; +// OutputMultiDiEdge e5{ +// ns[4], g.add_node_port(), std::make_pair(ns[4].value(), +// ns[4].value())}; + +// g.add_edge(e0); +// g.add_edge(e1); +// g.add_edge(e2); +// g.add_edge(e3); +// g.add_edge(e4); +// g.add_edge(e5); + +// GraphSplit gs0{{ns[0], ns[1]}, {ns[2], ns[3], ns[4]}}; +// CHECK(bool(get_cut_set(g, gs0) == std::unordered_set{e1, +// e2})); + +// GraphSplit gs1{{ns[0], ns[1], ns[2], ns[3]}, {ns[4]}}; +// CHECK(bool(get_cut_set(g, gs1) == std::unordered_set{e3, +// e4})); +// } +// } diff --git a/lib/compiler/test/src/test_optimal_cost.cc b/lib/compiler/test/src/test_optimal_cost.cc index 82c731888f..133558f83a 100644 --- a/lib/compiler/test/src/test_optimal_cost.cc +++ b/lib/compiler/test/src/test_optimal_cost.cc @@ -1,68 +1,72 @@ -#include "compiler/unity_algorithm.h" -#include "doctest/doctest.h" -#include "test_cost_estimator.h" +// #include "compiler/unity_algorithm.h" +// #include "doctest/doctest.h" +// #include "test_cost_estimator.h" -using namespace FlexFlow; +// using namespace FlexFlow; -TEST_SUITE(FF_TEST_SUITE) { - // Rapidcheck infrastructures for graphs does not work for now - /* - Tests whether optimal_cost can give a valid result given random PCG, trivial - allowed machine views, trivial cost estimator and random machine - specification. - */ - // TEST_CASE("optimal_cost") { - // auto test_allowed_machine_views = [](Operator const &, - // MachineSpecification const &) { - // return std::unordered_set{make_1d_machine_view(0, 1, 1)}; - // }; - // RC_SUBCASE([](ParallelComputationGraph const &g, - // MachineSpecification const &machine_spec) { - // OptimalCostCache cached_subgraph_costs; - // OptimalCostResult result = optimal_cost(g, - // test_allowed_machine_views, - // TestCostEstimator{}, - // machine_spec, - // cached_subgraph_costs); - // RC_ASSERT(result.runtime > 0); - // RC_ASSERT(keys(result.machine_mapping.machine_views) == get_nodes(g)); - // }); - // } +// TEST_SUITE(FF_TEST_SUITE) { +// // Rapidcheck infrastructures for graphs does not work for now +// /* +// Tests whether optimal_cost can give a valid result given random PCG, +// trivial allowed machine views, trivial cost estimator and random machine +// specification. +// */ +// // TEST_CASE("optimal_cost") { +// // auto test_allowed_machine_views = [](Operator const &, +// // MachineSpecification const &) { +// // return std::unordered_set{make_1d_machine_view(0, 1, +// 1)}; +// // }; +// // RC_SUBCASE([](ParallelComputationGraph const &g, +// // MachineSpecification const &machine_spec) { +// // OptimalCostCache cached_subgraph_costs; +// // OptimalCostResult result = optimal_cost(g, +// // test_allowed_machine_views, +// // TestCostEstimator{}, +// // machine_spec, +// // cached_subgraph_costs); +// // RC_ASSERT(result.runtime > 0); +// // RC_ASSERT(keys(result.machine_mapping.machine_views) == +// get_nodes(g)); +// // }); +// // } - TEST_CASE("optimal_cost_0") { - auto pcg = - OutputLabelledMultiDiGraph::template create< - UnorderedOutputLabelledMultiDiGraph>(); +// TEST_CASE("optimal_cost_0") { +// auto pcg = +// OutputLabelledMultiDiGraph::template +// create< +// UnorderedOutputLabelledMultiDiGraph>(); - Node n0 = pcg.add_node(Operator{InputAttrs{}, "input"}); - Node n1 = pcg.add_node(Operator{ - LinearAttrs{1, false, DataType::FLOAT, Activation::RELU, std::nullopt}, - "linear"}); +// Node n0 = pcg.add_node(Operator{InputAttrs{}, "input"}); +// Node n1 = pcg.add_node(Operator{ +// LinearAttrs{1, false, DataType::FLOAT, Activation::RELU, +// std::nullopt}, "linear"}); - MultiDiEdge e{n1, pcg.add_node_port(), n0, pcg.add_node_port()}; - pcg.add_edge(e); - ParallelDim dim = {2, 1, false}; - ParallelTensorDims dims = {FFOrdered{dim}}; - pcg.add_output(e, ParallelTensor(dims, DataType::FLOAT, CreateGrad::YES)); +// MultiDiEdge e{n1, pcg.add_node_port(), n0, pcg.add_node_port()}; +// pcg.add_edge(e); +// ParallelDim dim = {2, 1, false}; +// ParallelTensorDims dims = {FFOrdered{dim}}; +// pcg.add_output(e, ParallelTensor(dims, DataType::FLOAT, +// CreateGrad::YES)); - auto test_allowed_machine_views = [](Operator const &, - MachineSpecification const &) { - return std::unordered_set{ - make_1d_machine_view(gpu_id_t(1), gpu_id_t(2))}; - }; +// auto test_allowed_machine_views = [](Operator const &, +// MachineSpecification const &) { +// return std::unordered_set{ +// make_1d_machine_view(gpu_id_t(1), gpu_id_t(2))}; +// }; - CostEstimator estimator = CostEstimator::create(); +// CostEstimator estimator = CostEstimator::create(); - MachineSpecification machine_spec{1, 1, 1, 1, 1}; +// MachineSpecification machine_spec{1, 1, 1, 1, 1}; - OptimalCostCache cached_results; +// OptimalCostCache cached_results; - OptimalCostResult result = optimal_cost(ParallelComputationGraph(pcg), - test_allowed_machine_views, - estimator, - machine_spec, - cached_results); +// OptimalCostResult result = optimal_cost(ParallelComputationGraph(pcg), +// test_allowed_machine_views, +// estimator, +// machine_spec, +// cached_results); - CHECK(bool(result.runtime > 0)); - } -} +// CHECK(bool(result.runtime > 0)); +// } +// } diff --git a/lib/pcg/include/pcg/machine_specification.h b/lib/pcg/include/pcg/machine_specification.h index cf84bf5048..f66723b0ff 100644 --- a/lib/pcg/include/pcg/machine_specification.h +++ b/lib/pcg/include/pcg/machine_specification.h @@ -1,8 +1,6 @@ #ifndef _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_SPECIFICATION_H #define _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_SPECIFICATION_H -#include "machine_specification_t.h" - namespace FlexFlow {} // namespace FlexFlow #endif diff --git a/lib/pcg/include/pcg/model_compilation.h b/lib/pcg/include/pcg/model_compilation.h index 0ac1b89522..1ab66161ec 100644 --- a/lib/pcg/include/pcg/model_compilation.h +++ b/lib/pcg/include/pcg/model_compilation.h @@ -3,7 +3,7 @@ #include "pcg/computation_graph.h" #include "pcg/optimizer.h" -#include "pcg/parallel_computation_graph.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "pcg/tensor_mapping.h" namespace FlexFlow { diff --git a/lib/pcg/src/strided_rectangle.cc b/lib/pcg/src/pcg/strided_rectangle.cc similarity index 100% rename from lib/pcg/src/strided_rectangle.cc rename to lib/pcg/src/pcg/strided_rectangle.cc diff --git a/lib/runtime/src/parallel_computation_graph.h b/lib/runtime/src/parallel_computation_graph.h index bd4776cab3..5ffd6f7cad 100644 --- a/lib/runtime/src/parallel_computation_graph.h +++ b/lib/runtime/src/parallel_computation_graph.h @@ -5,7 +5,7 @@ #include "op-attrs/operator_attrs.h" #include "pcg/operator_guid_t.h" #include "pcg/optimizer.h" -#include "pcg/parallel_computation_graph.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "pcg/parallel_tensor.h" #include "task_spec/op_task_invocation.h" #include "utils/graph.h"