diff --git a/doc/ref_other.rst b/doc/ref_other.rst index d41109b9d..55a72d5c2 100644 --- a/doc/ref_other.rst +++ b/doc/ref_other.rst @@ -29,6 +29,8 @@ Automatic Testing Checking Dependencies at the Statement-Instance Level ----------------------------------------------------- +See also :func:`~loopy.add_dependency` for how to add dependencies. + .. automodule:: loopy.schedule.checker Troubleshooting diff --git a/loopy/__init__.py b/loopy/__init__.py index ad245c014..b6d7940a1 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -80,7 +80,8 @@ from loopy.transform.instruction import ( find_instructions, map_instructions, - set_instruction_priority, add_dependency, + set_instruction_priority, + add_dependency, remove_instructions, replace_instruction_ids, tag_instructions, @@ -131,7 +132,8 @@ get_one_linearized_kernel, linearize) from loopy.schedule.checker import ( get_pairwise_statement_orderings, -) + find_unsatisfied_dependencies, + ) from loopy.statistics import (ToCountMap, ToCountPolynomialMap, CountGranularity, stringify_stats_mapping, Op, MemAccess, get_op_map, get_mem_access_map, get_synchronization_map, gather_access_footprints, @@ -215,7 +217,8 @@ "rename_argument", "set_temporary_scope", "find_instructions", "map_instructions", - "set_instruction_priority", "add_dependency", + "set_instruction_priority", + "add_dependency", "remove_instructions", "replace_instruction_ids", "tag_instructions", @@ -272,6 +275,7 @@ "get_one_scheduled_kernel", "get_one_linearized_kernel", "linearize", "get_pairwise_statement_orderings", + "find_unsatisfied_dependencies", "GeneratedProgram", "CodeGenerationResult", "PreambleInfo", diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 9fb9757a6..a29447b6b 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -75,7 +75,7 @@ class UseStreamingStoreTag(Tag): # {{{ instructions: base class class InstructionBase(ImmutableRecord, Taggable): - """A base class for all types of instruction that can occur in + r"""A base class for all types of instruction that can occur in a kernel. .. attribute:: id @@ -87,7 +87,7 @@ class InstructionBase(ImmutableRecord, Taggable): .. attribute:: depends_on - a :class:`frozenset` of :attr:`id` values of :class:`InstructionBase` + A :class:`frozenset` of :attr:`id` values of :class:`InstructionBase` instances that *must* be executed before this one. Note that :func:`loopy.preprocess_kernel` (usually invoked automatically) augments this by adding dependencies on any writes to temporaries read @@ -106,6 +106,40 @@ class InstructionBase(ImmutableRecord, Taggable): :func:`loopy.make_kernel`. Note, that this is not meant as a user-facing interface. + .. attribute:: dependencies + + A :class:`dict` mapping :attr:`id` values + instances, each referring to a dependee statement (i.e., a statement + with statement instances that must be executed before instances of this + statement), to lists (one list per key) of class:`islpy.Map`\ s that + express dependency relationships by mapping each instance of the + dependee statement to all instances of this statement that must occur + later. + + The name of the first dimension in the `in_` and `out` spaces must be + :data:`loopy.schedule.checker.schedule.STATEMENT_VAR_NAME`, suffixed by + :data:`loopy.schedule.checker.schedule.BEFORE_MARK` for the `in_` + dimension. This dimension in the `in_` space must be assigned the value + 0, and in the `out` space it must be assigned 0 for self-dependencies + (dependencies describing instances of a statement that must happen + before other instances of the same statement) and 1 otherwise. + + In addition to the statement dimension, the `in_` space of a dependency + map must contain one dimension per iname in :attr:`within_inames` for + the dependee, and the `out` space must contain one dimension per iname + in :attr:`within_inames` for this statement. The dimension names should + match the corresponding iname, with those in the `in_` space suffixed + by :data:`loopy.schedule.checker.schedule.BEFORE_MARK`. Reduction + inames are not considered (for now). Only dependencies involving + instances of statements within the domain on either end of the map are + expected to be represented. + + Creation of these maps may be facilitated with + :func:`loopy.schedule.checker.utils.make_dep_map`. + + This dict expresses the new statement-instance-level dependencies and + will eventually replace :attr:`depends_on`. + .. attribute:: depends_on_is_final A :class:`bool` determining whether :attr:`depends_on` constitutes @@ -212,6 +246,7 @@ class InstructionBase(ImmutableRecord, Taggable): pymbolic_set_fields = {"predicates"} def __init__(self, id, depends_on, depends_on_is_final, + dependencies, groups, conflicts_with_groups, no_sync_with, within_inames_is_final, within_inames, @@ -241,6 +276,9 @@ def __init__(self, id, depends_on, depends_on_is_final, if depends_on is None: depends_on = frozenset() + if dependencies is None: + dependencies = {} + if groups is None: groups = frozenset() @@ -297,6 +335,7 @@ def __init__(self, id, depends_on, depends_on_is_final, id=id, depends_on=depends_on, depends_on_is_final=depends_on_is_final, + dependencies=dependencies, no_sync_with=no_sync_with, groups=groups, conflicts_with_groups=conflicts_with_groups, within_inames_is_final=within_inames_is_final, @@ -392,6 +431,8 @@ def get_str_options(self): if self.depends_on: result.append("dep="+":".join(self.depends_on)) + if self.dependencies: + result.append("dependencies="+":".join(self.dependencies.keys())) if self.no_sync_with: result.append("nosync="+":".join( "%s@%s" % entry for entry in self.no_sync_with)) @@ -461,6 +502,9 @@ def __setstate__(self, val): if self.id is not None: # pylint:disable=access-member-before-definition self.id = intern(self.id) self.depends_on = intern_frozenset_of_ids(self.depends_on) + self.dependencies = { + intern(dependee_id): deps + for dependee_id, deps in self.dependencies.items()} self.groups = intern_frozenset_of_ids(self.groups) self.conflicts_with_groups = ( intern_frozenset_of_ids(self.conflicts_with_groups)) @@ -887,6 +931,7 @@ def __init__(self, id=None, depends_on=None, depends_on_is_final=None, + dependencies=None, groups=None, conflicts_with_groups=None, no_sync_with=None, @@ -903,6 +948,7 @@ def __init__(self, id=id, depends_on=depends_on, depends_on_is_final=depends_on_is_final, + dependencies=dependencies, groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, @@ -1038,6 +1084,7 @@ def __init__(self, id=None, depends_on=None, depends_on_is_final=None, + dependencies=None, groups=None, conflicts_with_groups=None, no_sync_with=None, @@ -1051,6 +1098,7 @@ def __init__(self, id=id, depends_on=depends_on, depends_on_is_final=depends_on_is_final, + dependencies=dependencies, groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, @@ -1331,6 +1379,7 @@ def __init__(self, iname_exprs, code, read_variables=frozenset(), assignees=tuple(), id=None, depends_on=None, depends_on_is_final=None, + dependencies=None, groups=None, conflicts_with_groups=None, no_sync_with=None, within_inames_is_final=None, within_inames=None, @@ -1350,6 +1399,7 @@ def __init__(self, id=id, depends_on=depends_on, depends_on_is_final=depends_on_is_final, + dependencies=dependencies, groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, within_inames_is_final=within_inames_is_final, @@ -1495,16 +1545,25 @@ class NoOpInstruction(_DataObliviousInstruction): ... nop """ - def __init__(self, id=None, depends_on=None, depends_on_is_final=None, - groups=None, conflicts_with_groups=None, + def __init__( + self, + id=None, + depends_on=None, + depends_on_is_final=None, + dependencies=None, + groups=None, + conflicts_with_groups=None, no_sync_with=None, - within_inames_is_final=None, within_inames=None, + within_inames_is_final=None, + within_inames=None, priority=None, - predicates=None, tags=None): + predicates=None, + tags=None): super().__init__( id=id, depends_on=depends_on, depends_on_is_final=depends_on_is_final, + dependencies=dependencies, groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, @@ -1554,12 +1613,21 @@ class BarrierInstruction(_DataObliviousInstruction): fields = _DataObliviousInstruction.fields | {"synchronization_kind", "mem_kind"} - def __init__(self, id, depends_on=None, depends_on_is_final=None, - groups=None, conflicts_with_groups=None, + def __init__( + self, + id, + depends_on=None, + depends_on_is_final=None, + dependencies=None, + groups=None, + conflicts_with_groups=None, no_sync_with=None, - within_inames_is_final=None, within_inames=None, + within_inames_is_final=None, + within_inames=None, priority=None, - predicates=None, tags=None, synchronization_kind="global", + predicates=None, + tags=None, + synchronization_kind="global", mem_kind="local"): if predicates: @@ -1569,6 +1637,7 @@ def __init__(self, id, depends_on=None, depends_on_is_final=None, id=id, depends_on=depends_on, depends_on_is_final=depends_on_is_final, + dependencies=dependencies, groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py index b987255d4..aa571b0e6 100644 --- a/loopy/schedule/checker/__init__.py +++ b/loopy/schedule/checker/__init__.py @@ -1,7 +1,10 @@ """ .. autofunction:: get_pairwise_statement_orderings +.. autofunction:: find_unsatisfied_dependencies + .. automodule:: loopy.schedule.checker.schedule +.. automodule:: loopy.schedule.checker.utils """ @@ -158,4 +161,155 @@ def get_pairwise_statement_orderings( # }}} + +# {{{ find_unsatisfied_dependencies() + +def find_unsatisfied_dependencies( + knl, + lin_items=None, + stop_on_first_violation=True, + ): + """For each statement (:class:`loopy.InstructionBase`) found in a + preprocessed kernel, determine which dependencies, if any, have been + violated by the linearization described by `lin_items`, and return these + dependencies. + + :arg knl: A preprocessed (or linearized) :class:`loopy.LoopKernel` + containing the statements (:class:`loopy.InstructionBase`) whose + dependencies will be checked against the linearization items. + + :arg lin_items: A sequence of :class:`loopy.schedule.ScheduleItem` + (to be renamed to `loopy.schedule.LinearizationItem`) containing all + linearization items in `knl.linearization`. To allow usage of + this routine during linearization, a truncated (i.e. partial) + linearization may be passed through this argument. If not provided, + `knl.linearization` will be used. + + :arg stop_on_first_violation: A :class:`bool` determining whether to stop + checking dependencies once the first unsatisfied dependency is found. + + :returns: A list of unsatisfied dependencies, each represented as a + :func:`collections.namedtuple` containing the following: + + - `statement_pair`: The (before, after) pair of statement IDs involved + in the dependency. + - `dependency`: An class:`islpy.Map` from each instance of the first + statement to all instances of the second statement that must occur + later. + - `statement_ordering`: A + :class:`~loopy.schedule.checker.schedule.StatementOrdering` + resulting from :func:`get_pairwise_statement_orderings` (defined above). + + """ + + # {{{ Handle lin_items=None and make sure kernel has been preprocessed + + from loopy.kernel import KernelState + if lin_items is None: + assert knl.state == KernelState.LINEARIZED + lin_items = knl.linearization + else: + # Note: kernels must always be preprocessed before scheduling + assert knl.state in [ + KernelState.PREPROCESSED, + KernelState.LINEARIZED] + + # }}} + + # {{{ Create map from before->after statement id pairs to dependency maps + + # For efficiency, all pairwise SIOs will be created + # in one pass, which first requires finding all pairs of statements that + # are connected by at least one dependency. + # We will also later need to collect all deps for each statement pair, + # so do this at the same time; create stmt_pairs_to_deps: + + # stmt_pairs_to_deps: + # {(stmt_id_before1, stmt_id_after1): [dep1, dep2, ...], + # (stmt_id_before2, stmt_id_after2): [dep1, dep2, ...], + # ...} + stmt_pairs_to_deps = {} + + for stmt_after in knl.instructions: + for before_id, dep_list in stmt_after.dependencies.items(): + stmt_pairs_to_deps.setdefault( + (before_id, stmt_after.id), []).extend(dep_list) + + # }}} + + # {{{ Get statement instance ordering for every before->after statement pair + + pworders = get_pairwise_statement_orderings( + knl, + lin_items, + stmt_pairs_to_deps.keys(), + ) + + # }}} + + # {{{ For each depender-dependee pair of statements, check all deps vs. SIO + + unsatisfied_deps = [] + + # Collect info about unsatisfied deps + from collections import namedtuple + UnsatisfiedDependencyInfo = namedtuple( + "UnsatisfiedDependencyInfo", + ["statement_pair", "dependency", "statement_ordering"]) + + for stmt_id_pair, dependencies in stmt_pairs_to_deps.items(): + + # Get the pairwise ordering info (includes SIOs) + pworder = pworders[stmt_id_pair] + + # Check each dep for this statement pair + for dependency in dependencies: + + # Align constraint map space to match SIO so we can + # check to see whether the constraint map is a subset of the SIO + from loopy.schedule.checker.utils import ( + ensure_dim_names_match_and_align, + ) + aligned_dep_map = ensure_dim_names_match_and_align( + dependency, pworder.sio_intra_thread) + + # {{{ Assert that map spaces match + + assert aligned_dep_map.space == pworder.sio_intra_thread.space + assert aligned_dep_map.space == pworder.sio_intra_group.space + assert aligned_dep_map.space == pworder.sio_global.space + assert (aligned_dep_map.get_var_dict() == + pworder.sio_intra_thread.get_var_dict()) + assert (aligned_dep_map.get_var_dict() == + pworder.sio_intra_group.get_var_dict()) + assert (aligned_dep_map.get_var_dict() == + pworder.sio_global.get_var_dict()) + + # }}} + + # Check dependency + if not aligned_dep_map <= ( + pworder.sio_intra_thread | + pworder.sio_intra_group | + pworder.sio_global + ): + # FIXME This could be done by computing (via intersection) + # intra-thread, intra-group, and global parts of aligned_dep_map + # and demanding that each is a subset of the corresponding sio. + # Determine whether this would be more efficient, and if so, do + # it. + + unsatisfied_deps.append(UnsatisfiedDependencyInfo( + stmt_id_pair, aligned_dep_map, pworder)) + + # Break here if stop_on_first_violation==True + if stop_on_first_violation: + break + + # }}} + + return unsatisfied_deps + +# }}} + # vim: foldmethod=marker diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py index 39b44c2ce..14f8f1134 100644 --- a/loopy/schedule/checker/schedule.py +++ b/loopy/schedule/checker/schedule.py @@ -745,6 +745,9 @@ def _pad_tuples_and_assign_integer_vals_to_map_template( bottom_to_top_map = _pad_tuples_and_assign_integer_vals_to_map_template( bottom_tuple, top_tuple) + bottom_to_top_map = _pad_tuples_and_assign_integer_vals_to_map_template( + key_lex_tuples[slex.BOTTOM], key_lex_tuples[slex.TOP]) + # Add constraint iname = iname' + 1 blex_var_for_iname = seq_iname_to_blex_var[iname] bottom_to_top_map = bottom_to_top_map.add_constraint( diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py index 0fc0971da..12d3480e1 100644 --- a/loopy/schedule/checker/utils.py +++ b/loopy/schedule/checker/utils.py @@ -1,3 +1,8 @@ +""" +.. autofunction:: make_dep_map +""" + + __copyright__ = "Copyright (C) 2019 James Stevens" __license__ = """ @@ -196,6 +201,132 @@ def append_mark_to_strings(strings, mark): return [s+mark for s in strings] +# {{{ make_dep_map + +def make_dep_map(s, self_dep=False, knl_with_domains=None): + """Given a string representation of a before->after mapping of statement + instances, create an :class:`islpy.Map` representing the dependency. Insert + a dimension into this map to represent the statement identifier for both + the 'before' and 'after' statements. If ``knl_with_domains`` is provided, + intersect the input and output map domains with the domains for the + inames found in the kernel. + + :arg s: An :class:`str` describing a before->after mapping of statement + instances using islpy map syntax. The input and output spaces + of the map represented by this string should *not* include a dimension + for statement identifiers; these dimension will be added. Inames in + the input space should be suffixed with + ``loopy.schedule.checker.schedule.BEFORE_MARK``. + + :arg self_dep: A :class`bool` expressing whether the depender and + dependee are the same instruction. If so, the value for *both* the + input and output statement identifier dimensions will be set to 0. + If not, the value for the *output* statement identifier dimension will + be set to 1. + + :arg knl_with_domains: A :class:`loopy.LoopKernel` containing iname + domains that will be used to constrain the inames in the dependency map. + If provided, the domains for the inames found in the dependency will be + intersected with their domains expressed in the kernel. + + :returns: An :class:`islpy.Map` representing a dependency as a mapping from + from each instance of the first statement to all instances of the + second statement that must occur later. + + """ + + from loopy.schedule.checker.schedule import ( + BEFORE_MARK, + STATEMENT_VAR_NAME, + ) + + # Pass the input string to isl.Map to initialize the map + map_init = isl.Map(s) + + # {{{ Islpy drops apostrophes, make sure this hasn't changed + # and manually add the mark if necessary + + if BEFORE_MARK == "'": + for dim_name in map_init.get_var_names(dim_type.in_): + assert BEFORE_MARK not in dim_name + + # Append BEFORE_MARK to in_ dims + map_marked = append_mark_to_isl_map_var_names( + map_init, dim_type.in_, BEFORE_MARK) + + # }}} + + # {{{ Insert input/output statement dims and set them to 0 or 1 + + map_with_stmts = insert_and_name_isl_dims( + map_marked, dim_type.in_, [STATEMENT_VAR_NAME+BEFORE_MARK], 0) + map_with_stmts = insert_and_name_isl_dims( + map_with_stmts, dim_type.out, [STATEMENT_VAR_NAME], 0) + + sid_after = 0 if self_dep else 1 + + map_with_stmts = map_with_stmts.add_constraint( + isl.Constraint.eq_from_names( + map_with_stmts.space, + {1: 0, STATEMENT_VAR_NAME+BEFORE_MARK: -1})) + + map_with_stmts = map_with_stmts.add_constraint( + isl.Constraint.eq_from_names( + map_with_stmts.space, + {1: sid_after, STATEMENT_VAR_NAME: -1})) + + # }}} + + # {{{ Intersect map domain and range with iname domains in knl + + if knl_with_domains is not None: + + if BEFORE_MARK != "'": + raise NotImplementedError( + "make_dep_map() does not yet handle a knl_with_domains argument " + "when BEFORE_MARK != \"'\"") + + # {{{ Get inames domain for input and output inames + + # Get the inames from map_init; islpy already dropped the apostrophes + inames_in = map_init.get_var_names(dim_type.in_) + inames_out = map_init.get_var_names(dim_type.out) + + # Get inames domain + inames_in_dom = knl_with_domains.get_inames_domain( + inames_in).project_out_except(inames_in, [dim_type.set]) + inames_out_dom = knl_with_domains.get_inames_domain( + inames_out).project_out_except(inames_out, [dim_type.set]) + + # Mark dependee inames + inames_in_dom_marked = append_mark_to_isl_map_var_names( + inames_in_dom, dim_type.set, BEFORE_MARK) + + # }}} + + # {{{ Align spaces for iname domains with dep map (which adds the stmt var) + + inames_in_dom_marked_aligned = isl.align_spaces( + inames_in_dom_marked, map_with_stmts.domain(), + obj_bigger_ok=True) # e.g., params might exist + inames_out_dom_aligned = isl.align_spaces( + inames_out_dom, map_with_stmts.range(), + obj_bigger_ok=True) # e.g., params might exist + + # }}} + + # Intersect iname domains with dependency map + map_with_stmts = map_with_stmts.intersect_range( + inames_out_dom_aligned + ).intersect_domain(inames_in_dom_marked_aligned) + + # }}} + + return map_with_stmts + +# }}} + + def sorted_union_of_names_in_isl_sets( isl_sets, set_dim=dim_type.set): diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index c7598c356..3c3c16274 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -98,11 +98,24 @@ def set_prio(insn): @for_each_kernel def add_dependency(kernel, insn_match, depends_on): - """Add the instruction dependency *dependency* to the instructions matched + """Add dependency contained in *depends_on* to the instructions matched by *insn_match*. - *insn_match* and *depends_on* may be any instruction id match understood by - :func:`loopy.match.parse_match`. + :arg kernel: A :class:`loopy.LoopKernel`. + + :arg insn_match: An instruction id match understood by + :func:`loopy.match.parse_match` identifying the statement to which the + dependency will be added. + + :arg depends_on: A two-tuple containing an instruction id match understood by + :func:`loopy.match.parse_match` identifying the dependee statement(s), + and an class:`islpy.Map` from each instance of the dependee + statement(s) to all instances of the depender statement(s) that must occur + later. For backward compatability, *depends_on* may also be an + instruction id match identifying dependee statement ids to be added to + `stmt.depends_on` for any stmt matching *insn_match*. + + :returns: The updated :class:`loopy.LoopKernel` with the new dependency. .. versionchanged:: 2016.3 @@ -110,28 +123,44 @@ def add_dependency(kernel, insn_match, depends_on): be not just ID but also match expression. """ + # Determine whether we received legacy or contemporary dependency + if isinstance(depends_on, tuple): + dep_map = depends_on[1] + depends_on = depends_on[0] + else: + dep_map = None + if isinstance(depends_on, str) and depends_on in kernel.id_to_insn: - added_deps = frozenset([depends_on]) + dependee_ids = frozenset([depends_on]) else: - added_deps = frozenset( - dep.id for dep in find_instructions_in_single_kernel(kernel, - depends_on)) + dependee_ids = frozenset( + dependee.id for dependee in find_instructions_in_single_kernel( + kernel, depends_on)) - if not added_deps: + if not dependee_ids: raise LoopyError("no instructions found matching '%s' " "(to add as dependencies)" % depends_on) matched = [False] - def add_dep(insn): - new_deps = insn.depends_on - matched[0] = True - if new_deps is None: - new_deps = added_deps - else: - new_deps = new_deps | added_deps - - return insn.copy(depends_on=new_deps) + if dep_map is None: + # Handle legacy dependencies + def add_dep(insn): + new_deps = insn.depends_on # Set of dependee ids + matched[0] = True + if new_deps is None: + new_deps = dependee_ids + else: + new_deps = new_deps | dependee_ids + return insn.copy(depends_on=new_deps) + else: + # Handle contemporary dependencies + def add_dep(stmt): + new_deps_dict = stmt.dependencies # Mapping of dependee ids to dep maps + matched[0] = True + for dependee_id in dependee_ids: + new_deps_dict.setdefault(dependee_id, []).append(dep_map) + return stmt.copy(dependencies=new_deps_dict) result = map_instructions(kernel, insn_match, add_dep) diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py index 4901b1fd8..4258a2c52 100644 --- a/test/test_linearization_checker.py +++ b/test/test_linearization_checker.py @@ -45,6 +45,7 @@ ) from loopy.schedule.checker.utils import ( ensure_dim_names_match_and_align, + make_dep_map, prettier_map_string, ) from loopy.schedule.checker import ( @@ -1775,6 +1776,323 @@ def test_blex_map_transitivity_with_duplicate_conc_inames(): # }}} +# {{{ Dependency tests + +# {{{ test_add_dependency_with_new_deps + +def test_add_dependency_with_new_deps(): + """Use add_dependency to add new deps to kernels and make sure that the + correct dep is being added to the correct instruction. Also make sure that + these deps can be succesfully checked for violation. Also, while we're + here, test to make sure make_dep_map() produces the correct result.""" + + # Make kernel and use OLD deps to control linearization order for now + i_range_str = "0 <= i < pi" + i_range_str_p = "0 <= i' < pi" + assumptions_str = "pi >= 1" + knl = lp.make_kernel( + "{[i]: %s}" % (i_range_str), + """ + a[i] = 3.14 {id=stmt_a} + b[i] = a[i] {id=stmt_b, dep=stmt_a} + c[i] = b[i] {id=stmt_c, dep=stmt_b} + """, + lang_version=(2018, 2) + ) + knl = lp.add_and_infer_dtypes( + knl, {"a": np.float32, "b": np.float32, "c": np.float32}) + + for stmt in knl["loopy_kernel"].instructions: + assert not stmt.dependencies + + # Add a dependency to stmt_b + dep_b_on_a = make_dep_map( + "[pi] -> {{ [i'] -> [i] : i > i' " + "and {0} " + "}}".format(assumptions_str), + knl_with_domains=knl["loopy_kernel"]) + knl = lp.add_dependency(knl, "id:stmt_b", ("id:stmt_a", dep_b_on_a)) + + # Make sure knl instructions all have the expected deps + for stmt in knl["loopy_kernel"].instructions: + if stmt.id == "stmt_b": + assert stmt.dependencies == { + "stmt_a": [dep_b_on_a, ], + } + else: + assert not stmt.dependencies + + # {{{ Test make_dep_map while we're here + + dep_b_on_a_test = _isl_map_with_marked_dims( + "[pi] -> {{ [{3}'=0, i'] -> [{3}=1, i] : i > i' " + "and {0} and {1} and {2} }}".format( + i_range_str, + i_range_str_p, + assumptions_str, + STATEMENT_VAR_NAME, + )) + _align_and_compare_maps([(dep_b_on_a, dep_b_on_a_test)]) + + # }}} + + # Add a second dependency to stmt_b + dep_b_on_a_2 = make_dep_map( + "[pi] -> {{ [i'] -> [i] : i = i' " + "and {0}" + "}}".format(assumptions_str), + knl_with_domains=knl["loopy_kernel"]) + knl = lp.add_dependency(knl, "id:stmt_b", ("id:stmt_a", dep_b_on_a_2)) + + # Make sure knl instructions all have the expected deps + for stmt in knl["loopy_kernel"].instructions: + if stmt.id == "stmt_b": + assert stmt.dependencies == { + "stmt_a": [dep_b_on_a, dep_b_on_a_2], + } + else: + assert not stmt.dependencies + + # {{{ Test make_dep_map while we're here + + dep_b_on_a_2_test = _isl_map_with_marked_dims( + "[pi] -> {{ [{3}'=0, i'] -> [{3}=1, i] : i = i' " + "and {0} and {1} and {2} }}".format( + i_range_str, + i_range_str_p, + assumptions_str, + STATEMENT_VAR_NAME, + )) + _align_and_compare_maps([(dep_b_on_a_2, dep_b_on_a_2_test)]) + + # }}} + + # Add dependencies to stmt_c + + dep_c_on_a = make_dep_map( + "[pi] -> {{ [i'] -> [i] : i >= i' " + "and {0} " + "}}".format(assumptions_str), + knl_with_domains=knl["loopy_kernel"]) + + dep_c_on_b = make_dep_map( + "[pi] -> {{ [i'] -> [i] : i >= i' " + "and {0} " + "}}".format(assumptions_str), + knl_with_domains=knl["loopy_kernel"]) + + knl = lp.add_dependency(knl, "id:stmt_c", ("id:stmt_a", dep_c_on_a)) + knl = lp.add_dependency(knl, "id:stmt_c", ("id:stmt_b", dep_c_on_b)) + + # Make sure knl instructions all have the expected deps + for stmt in knl["loopy_kernel"].instructions: + if stmt.id == "stmt_b": + assert stmt.dependencies == { + "stmt_a": [dep_b_on_a, dep_b_on_a_2], + } + elif stmt.id == "stmt_c": + assert stmt.dependencies == { + "stmt_a": [dep_c_on_a, ], + "stmt_b": [dep_c_on_b, ], + } + else: + assert not stmt.dependencies + + # {{{ Now make sure deps can be checked. These should be satisfied. + + lin_items, proc_knl, lin_knl = _process_and_linearize(knl) + + unsatisfied_deps = lp.find_unsatisfied_dependencies( + proc_knl, lin_items) + + assert not unsatisfied_deps + + # Make sure dep checking also works when only the linearized kernel is + # provided to find_unsatisfied_dependencies() + unsatisfied_deps = lp.find_unsatisfied_dependencies(lin_knl) + + assert not unsatisfied_deps + + # }}} + +# }}} + + +# {{{ test_make_dep_map + +def test_make_dep_map(): + """Make sure make_dep_map() produces the desired result. This is also + tested inside other test functions, but here we specifically test cases + where the statement inames don't match.""" + + # Make kernel and use OLD deps to control linearization order for now + i_range_str = "0 <= i < n" + i_range_str_p = "0 <= i' < n" + j_range_str = "0 <= j < n" + j_range_str_p = "0 <= j' < n" + k_range_str = "0 <= k < n" + knl = lp.make_kernel( + "{[i,j,k]: %s}" % (" and ".join([i_range_str, j_range_str, k_range_str])), + """ + a[i,j] = 3.14 {id=stmt_a} + b[k] = a[i,k] {id=stmt_b, dep=stmt_a} + """, + lang_version=(2018, 2) + ) + knl = lp.add_and_infer_dtypes(knl, {"a,b": np.float32}) + + for stmt in knl["loopy_kernel"].instructions: + assert not stmt.dependencies + + # Add a dependency to stmt_b + dep_b_on_a = make_dep_map( + "[n] -> { [i',j'] -> [i,k] : i > i' and j' < k}", + knl_with_domains=knl["loopy_kernel"]) + + # Create expected dep + dep_b_on_a_test = _isl_map_with_marked_dims( + "[n] -> {{ [{0}'=0, i', j'] -> [{0}=1, i, k] : i > i' and j' < k" + " and {1} }}".format( + STATEMENT_VAR_NAME, + " and ".join([ + i_range_str, + i_range_str_p, + j_range_str_p, + k_range_str, + ]) + )) + _align_and_compare_maps([(dep_b_on_a, dep_b_on_a_test)]) + +# }}} + + +# {{{ test_new_dependencies_finite_diff: + +def test_new_dependencies_finite_diff(): + """Test find_unsatisfied_dependencies() using several variants of a finite + difference kernel, some of which violate dependencies.""" + + # Define kernel + knl = lp.make_kernel( + "[nx,nt] -> {[x, t]: 0<=x { [x', t'] -> [x, t] : " + "((x = x' and t = t'+2) or " + " (x'-1 <= x <= x'+1 and t = t' + 1)) }", + self_dep=True, knl_with_domains=knl["loopy_kernel"]) + knl = lp.add_dependency(knl, "id:stmt", ("id:stmt", dep)) + + # {{{ Test make_dep_map while we're here + + dep_test = make_dep_map( + "[nx,nt] -> { [x', t'] -> [x, t] : " + "((x = x' and t = t'+2) or " + " (x'-1 <= x <= x'+1 and t = t' + 1)) and " + "0 <= x < nx and 0 <= t < nt and " + "0 <= x' < nx and 0 <= t' < nt }", + self_dep=True) + + _align_and_compare_maps([(dep, dep_test)]) + + # }}} + + ref_knl = knl + + # {{{ Test find_unsatisfied_dependencies with corrct loop nest order + + # Prioritize loops correctly + knl = lp.prioritize_loops(knl, "t,x") + + # Make sure deps are satisfied + lin_items, proc_knl, lin_knl = _process_and_linearize(knl) + + unsatisfied_deps = lp.find_unsatisfied_dependencies( + proc_knl, lin_items) + + assert not unsatisfied_deps + + # Make sure dep checking also works with just linearized kernel + unsatisfied_deps = lp.find_unsatisfied_dependencies(lin_knl) + + assert not unsatisfied_deps + + # }}} + + # {{{ Test find_unsatisfied_dependencies with incorrect loop nest order + + # Now prioritize loops incorrectly + knl = ref_knl + knl = lp.prioritize_loops(knl, "x,t") + + # Make sure unsatisfied deps are caught + lin_items, proc_knl, lin_knl = _process_and_linearize(knl) + + unsatisfied_deps = lp.find_unsatisfied_dependencies( + proc_knl, lin_items, stop_on_first_violation=False) + + assert len(unsatisfied_deps) == 1 + + # }}} + + # {{{ Test find_unsatisfied_dependencies with parallel x and no barrier + + # Parallelize the x loop + knl = ref_knl + knl = lp.prioritize_loops(knl, "t,x") + knl = lp.tag_inames(knl, "x:l.0") + + # Make sure unsatisfied deps are caught + lin_items, proc_knl, lin_knl = _process_and_linearize(knl) + + # Without a barrier, deps are not satisfied + # Make sure there is no barrier, and that unsatisfied deps are caught + from loopy.schedule import Barrier + for lin_item in lin_items: + assert not isinstance(lin_item, Barrier) + + unsatisfied_deps = lp.find_unsatisfied_dependencies( + proc_knl, lin_items, stop_on_first_violation=False) + + assert len(unsatisfied_deps) == 1 + + # }}} + + # {{{ Test find_unsatisfied_dependencies with parallel x and included barrier + + # Insert a barrier to satisfy deps + knl = lp.make_kernel( + "[nx,nt] -> {[x, t]: 0<=x 1: exec(sys.argv[1])