From 91cf69458f6b9c590828d748cbe38165c4310b7e Mon Sep 17 00:00:00 2001 From: Addison Alvey-Blanco Date: Tue, 18 Feb 2025 22:10:06 -0600 Subject: [PATCH 01/12] adjust reverse writer map construction --- loopy/kernel/dependency.py | 186 ++++++++++++++++++++++++++++++++++ loopy/kernel/instruction.py | 32 +----- loopy/target/c/compyte | 2 +- loopy/transform/dependency.py | 115 +++++++++++++++++++++ 4 files changed, 305 insertions(+), 30 deletions(-) create mode 100644 loopy/kernel/dependency.py create mode 100644 loopy/transform/dependency.py diff --git a/loopy/kernel/dependency.py b/loopy/kernel/dependency.py new file mode 100644 index 000000000..cdc91014d --- /dev/null +++ b/loopy/kernel/dependency.py @@ -0,0 +1,186 @@ +from __future__ import annotations + + +__copyright__ = "Copyright (C) 2025 Addison Alvey-Blanco" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +from typing import Mapping + +from immutabledict import immutabledict +import islpy as isl +from islpy import dim_type + +from loopy import HappensAfter, LoopKernel, for_each_kernel +from loopy.kernel.instruction import ( + InstructionBase, + VariableSpecificHappensAfter, +) +from loopy.transform.dependency import AccessMapFinder + + +@for_each_kernel +def add_lexicographic_happens_after(knl: LoopKernel) -> LoopKernel: + """ + Impose a sequential, top-down execution order to instructions in a program. + It is expected that this strict order will be relaxed with + :func:`reduce_strict_ordering_with_dependencies` using data dependencies. + """ + + new_insns = [knl.instructions[0].copy()] + for iafter, after_insn in enumerate(knl.instructions[1:], start=1): + before_insn = knl.instructions[iafter-1] + + domain_before = knl.get_inames_domain(before_insn.within_inames) + domain_after = knl.get_inames_domain(after_insn.within_inames) + + happens_after = isl.Map.from_domain_and_range(domain_before, + domain_after) + for idim in range(happens_after.dim(dim_type.out)): + happens_after = happens_after.set_dim_name( + dim_type.out, + idim, + happens_after.get_dim_name(dim_type.out, idim) + "'" + ) + + # NOTE: using this in place of what's in the fold breaks stuff bc sets + shared_inames = before_insn.within_inames & after_insn.within_inames + + # {{{ removes non-determinism from 'bad' ordering of inames + + shared_inames_order_before = [ + domain_before.get_dim_name(dim_type.out, idim) + for idim in range(domain_before.dim(dim_type.out)) + if domain_before.get_dim_name(dim_type.out, idim) + in shared_inames + ] + + shared_inames_order_after = [ + domain_after.get_dim_name(dim_type.out, idim) + for idim in range(domain_after.dim(dim_type.out)) + if domain_after.get_dim_name(dim_type.out, idim) + in shared_inames + ] + + assert shared_inames_order_after == shared_inames_order_before + shared_inames_order = shared_inames_order_after + + # }}} + + affs_in = isl.affs_from_space(happens_after.domain().space) + affs_out = isl.affs_from_space(happens_after.range().space) + + lex_map = isl.Map.empty(happens_after.space) + for iinnermost, innermost_iname in enumerate(shared_inames_order): + innermost_map = affs_in[innermost_iname].lt_map( + affs_out[innermost_iname + "'"] + ) + + for outer_iname in list(shared_inames_order)[:iinnermost]: + innermost_map = innermost_map & ( + affs_in[outer_iname].eq_map( + affs_out[outer_iname + "'"] + ) + ) + + lex_map = lex_map | innermost_map + + happens_after = happens_after & lex_map + new_happens_after = {before_insn.id: HappensAfter(happens_after)} + new_insns.append(after_insn.copy(happens_after=new_happens_after)) + + return knl.copy(instructions=new_insns) + + +@for_each_kernel +def reduce_strict_ordering(knl) -> LoopKernel: + def narrow_dependencies( + source: InstructionBase, + after_insn: InstructionBase, + access_mapper: AccessMapFinder, + happens_afters: dict, + dependency_map: isl.Map | None = None, # type: ignore + ) -> dict: + assert isinstance(source.id, str) + assert isinstance(after_insn.id, str) + + if dependency_map is not None and dependency_map.is_empty(): + return happens_afters + + new_happens_after = {} + for insn, happens_after in after_insn.happens_after.items(): + if dependency_map is None: + dependency_map = happens_after.instances_rel + else: + dependency_map = dependency_map.apply_range( + happens_after.instances_rel + ) + + common_vars = \ + wmap_r[insn] & access_mapper.get_accessed_variables(source.id) + for var in common_vars: + write_map = access_mapper.get_map(insn, var) + source_map = access_mapper.get_map(source.id, var) + assert write_map is not None + assert source_map is not None + + dependency_map &= write_map.apply_range(source_map.reverse()) + if dependency_map is not None and not dependency_map.is_empty(): + new_happens_after[insn] = VariableSpecificHappensAfter( + instances_rel=dependency_map, variable_name=var + ) + happens_afters.update(new_happens_after) + + happens_afters.update( + narrow_dependencies( + source, + knl.id_to_insn[insn], + access_mapper, + happens_afters, + dependency_map, + ) + ) + + return happens_afters + + access_mapper = AccessMapFinder(knl) + for insn in knl.instructions: + access_mapper(insn.expression, insn.id) + access_mapper(insn.assignee, insn.id) + + wmap_r = {} + for var, insns in knl.writer_map().items(): + for insn in insns: + wmap_r.setdefault(insn, set()) + wmap_r[insn].add(var) + + new_insns = [] + for insn in knl.instructions[::-1]: + new_insns.append( + insn.copy( + happens_after=narrow_dependencies( + insn, insn, access_mapper, {} + ) + ) + ) + + return knl.copy(instructions=new_insns[::-1]) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 604b581e3..ca84a6c18 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -99,36 +99,12 @@ class UseStreamingStoreTag(Tag): @dataclass(frozen=True) class HappensAfter: - """A class representing a "happens-after" relationship between two - statements found in a :class:`loopy.LoopKernel`. Used to validate that a - given kernel transformation respects the data dependencies in a given - program. + instances_rel: isl.Map | None # type: ignore - .. attribute:: variable_name - - The name of the variable responsible for the dependency. For - backward compatibility purposes, this may be *None*. In this case, the - dependency semantics revert to the deprecated, statement-level - dependencies of prior versions of :mod:`loopy`. - - .. attribute:: instances_rel - - An :class:`islpy.Map` representing the precise happens-after - relationship. The domain and range are sets of statement instances. The - instances in the domain are required to execute before the instances in - the range. - - Map dimensions are named according to the order of appearance of the - inames in a :mod:`loopy` program. The dimension names in the range are - appended with a prime to signify that the mapped instances are distinct. - - As a (deprecated) matter of backward compatibility, this may be *None*, - in which case the semantics revert to the (underspecified) - statement-level dependencies of prior versions of :mod:`loopy`. - """ +@dataclass(frozen=True) +class VariableSpecificHappensAfter(HappensAfter): variable_name: str | None - instances_rel: isl.Map | None # }}} @@ -335,14 +311,12 @@ def __init__(self, happens_after = constantdict({ after_id.strip(): HappensAfter( - variable_name=None, instances_rel=None) for after_id in happens_after.split(",") if after_id.strip()}) elif isinstance(happens_after, frozenset): happens_after = constantdict({ after_id: HappensAfter( - variable_name=None, instances_rel=None) for after_id in happens_after}) elif isinstance(happens_after, dict): diff --git a/loopy/target/c/compyte b/loopy/target/c/compyte index 955160ac2..fcb59401c 160000 --- a/loopy/target/c/compyte +++ b/loopy/target/c/compyte @@ -1 +1 @@ -Subproject commit 955160ac2f504dabcd8641471a56146fa1afe35d +Subproject commit fcb59401cd61704037002b714519d0f7af2c4c59 diff --git a/loopy/transform/dependency.py b/loopy/transform/dependency.py new file mode 100644 index 000000000..339325a63 --- /dev/null +++ b/loopy/transform/dependency.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +""" +.. autoclass:: AccessMapFinder +""" +__copyright__ = "Copyright (C) 2022 Addison Alvey-Blanco" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import islpy as isl + +from loopy.kernel import LoopKernel +from loopy.symbolic import ( + WalkMapper, + get_access_map, \ + UnableToDetermineAccessRangeError, +) +from loopy.typing import Expression + +import pymbolic.primitives as p +from typing import List, Dict +from pyrsistent import pmap, PMap + + +class AccessMapFinder(WalkMapper): + def __init__(self, knl: LoopKernel) -> None: + self.kernel = knl + self._access_maps: PMap[str, PMap[str, isl.Map]] = pmap({}) + from collections import defaultdict + self.bad_subscripts: Dict[str, List[Expression]] = defaultdict(list) + + super().__init__() + + def get_map(self, insn_id: str, variable_name: str) -> isl.Map | None: + """Retrieve an access map indexed by an instruction ID and variable + name. + """ + try: + return self._access_maps[insn_id][variable_name] + except KeyError: + return None + + def get_accessed_variables(self, insn_id: str) -> set[str] | None: + try: + return set(self._access_maps[insn_id].keys()) + except KeyError: + return None + + def map_subscript(self, expr, insn_id): + domain = self.kernel.get_inames_domain( + self.kernel.id_to_insn[insn_id].within_inames + ) + WalkMapper.map_subscript(self, expr, insn_id) + + assert isinstance(expr.aggregate, p.Variable) + + arg_name = expr.aggregate.name + subscript = expr.index_tuple + + try: + access_map = get_access_map( + domain, subscript, self.kernel.assumptions) + except UnableToDetermineAccessRangeError: + # may not have enough info to generate access map at current point + self.bad_subscripts[arg_name].append(expr) + return + + # analyze what we have in our access map dict before storing map + insn_to_args = self._access_maps.get(insn_id) + if insn_to_args is not None: + existing_relation = insn_to_args.get(arg_name) + + if existing_relation is not None: + access_map |= existing_relation + + self._access_maps = self._access_maps.set( + insn_id, self._access_maps[insn_id].set( + arg_name, access_map)) + + else: + self._access_maps = self._access_maps.set( + insn_id, pmap({arg_name: access_map})) + + def map_linear_subscript(self, expr, insn_id): + raise NotImplementedError("linear subscripts cannot be used with " + "precise dependency finding. Use " + "multidimensional accesses to take advantage " + "of this feature.") + + def map_reduction(self, expr, insn_id): + return WalkMapper.map_reduction(self, expr, insn_id) + + def map_type_cast(self, expr, insn_id): + return self.rec(expr.child, insn_id) + + def map_sub_array_ref(self, expr, insn_id): + raise NotImplementedError("Not yet implemented") From 7d3e749ff797479be34c17d8913e7f86fa9b1034 Mon Sep 17 00:00:00 2001 From: Addison Alvey-Blanco Date: Fri, 21 Feb 2025 08:19:36 -0600 Subject: [PATCH 02/12] add happensafter chasing --- loopy/kernel/dependency.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/loopy/kernel/dependency.py b/loopy/kernel/dependency.py index cdc91014d..f9223fcc9 100644 --- a/loopy/kernel/dependency.py +++ b/loopy/kernel/dependency.py @@ -116,7 +116,6 @@ def reduce_strict_ordering(knl) -> LoopKernel: def narrow_dependencies( source: InstructionBase, after_insn: InstructionBase, - access_mapper: AccessMapFinder, happens_afters: dict, dependency_map: isl.Map | None = None, # type: ignore ) -> dict: @@ -154,7 +153,6 @@ def narrow_dependencies( narrow_dependencies( source, knl.id_to_insn[insn], - access_mapper, happens_afters, dependency_map, ) @@ -176,11 +174,7 @@ def narrow_dependencies( new_insns = [] for insn in knl.instructions[::-1]: new_insns.append( - insn.copy( - happens_after=narrow_dependencies( - insn, insn, access_mapper, {} - ) - ) + insn.copy(happens_after=narrow_dependencies(insn, insn, {})) ) return knl.copy(instructions=new_insns[::-1]) From d6cb99de60faf5a07061fcab2b81376ce510f219 Mon Sep 17 00:00:00 2001 From: Addison Alvey-Blanco Date: Fri, 21 Feb 2025 09:12:45 -0600 Subject: [PATCH 03/12] add some tests for dependencies --- test/test_dependencies.py | 98 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 test/test_dependencies.py diff --git a/test/test_dependencies.py b/test/test_dependencies.py new file mode 100644 index 000000000..df87e3b00 --- /dev/null +++ b/test/test_dependencies.py @@ -0,0 +1,98 @@ +import sys + +import numpy as np +import pytest + +import pyopencl as cl +from pyopencl.tools import ( + pytest_generate_tests_for_pyopencl as pytest_generate_tests, # noqa +) + +import loopy as lp +from loopy.kernel.dependency import ( + add_lexicographic_happens_after, + reduce_strict_ordering, +) + + +def test_no_dependency(): + t_unit = lp.make_kernel( + "{ [i,j] : 0 <= i, j < n}", + """ + a[i,j] = 2*i {id=source} + b[i,j] = a[i,j] {id=sink} + """, + ) + + t_unit = add_lexicographic_happens_after(t_unit) + t_unit = reduce_strict_ordering(t_unit) + knl = t_unit.default_entrypoint + + assert len(knl.id_to_insn["sink"].happens_after) == 0 + + +@pytest.mark.parametrize("img_size", [(512, 512), (1920, 1080)]) +def test_3x3_blur(ctx_factory, img_size): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + hx, hy = img_size + img = np.random.default_rng(seed=42).random(size=(hx, hy)) + + knl = lp.make_kernel( + "{ [x, y]: 0 <= x < hx and 0 <= y < hy }", + """ + img_(i, j) := img[i+1, j+1] + blurx(i, j) := img_(i-1, j) + img_(i, j) + img_(i+1, j) + + out[x, y] = blurx(x, y-1) + blurx(x, y) + blurx(x, y+1) + """, + [ + lp.GlobalArg("out", + dtype=np.float64, + shape=(hx, hy), + is_output=True), + lp.GlobalArg("img", + dtype=np.float64, + shape=(hx, hy)) + ] + ) + + knl = lp.fix_parameters(knl, hx=hx-2, hy=hy-2) + + bsize = 4 + knl = lp.split_iname(knl, "x", bsize, inner_tag="vec", outer_tag="for") + knl = lp.split_iname(knl, "y", bsize, inner_tag="for", outer_tag="g.0") + knl = lp.precompute( + knl, + "blurx", + sweep_inames="x_inner, y_inner", + precompute_outer_inames="x_outer, y_outer", + precompute_inames="bx, by" + ) + + knl = lp.prioritize_loops(knl, "y_outer, x_outer, y_inner, x_inner") + knl = lp.expand_subst(knl) + + knl = add_lexicographic_happens_after(knl) + knl = reduce_strict_ordering(knl) + + _, out = knl(queue, img=img) + blurx = np.zeros_like(img) + out_np = np.zeros_like(img) + for x in range(hx-2): + blurx[x, :] = img[x, :] + img[x+1, :] + img[x+2, :] + for y in range(hy-2): + out_np[:, y] = blurx[:, y] + blurx[:, y+1] + blurx[:, y+2] + + import numpy.linalg as la + assert (la.norm(out[0] - out_np) / la.norm(out_np)) <= 1e-14 + + +if __name__ == "__main__": + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + + main([__file__]) From 645d173c1c482750105de964abfa00c9e143943e Mon Sep 17 00:00:00 2001 From: Addison Alvey-Blanco Date: Fri, 21 Feb 2025 09:18:02 -0600 Subject: [PATCH 04/12] fix version mismatch of compyte; fix other target conflicts --- loopy/target/c/compyte | 2 +- loopy/target/cuda.py | 2 +- loopy/target/opencl.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/target/c/compyte b/loopy/target/c/compyte index fcb59401c..955160ac2 160000 --- a/loopy/target/c/compyte +++ b/loopy/target/c/compyte @@ -1 +1 @@ -Subproject commit fcb59401cd61704037002b714519d0f7af2c4c59 +Subproject commit 955160ac2f504dabcd8641471a56146fa1afe35d diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 7a9b4c11a..50d2ac7fe 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -431,7 +431,7 @@ def wrap_global_constant(self, decl: Declarator) -> Declarator: def get_array_base_declarator(self, ary: ArrayBase) -> Declarator: dtype = ary.dtype - vec_size = ary.vector_length() + vec_size = ary.vector_size(self.target) if vec_size > 1: dtype = self.target.vector_dtype(dtype, vec_size) diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 07c5b49d0..3fe951c4e 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -751,7 +751,7 @@ def wrap_global_constant(self, decl: Declarator) -> Declarator: def get_array_base_declarator(self, ary: ArrayBase) -> Declarator: dtype = ary.dtype - vec_size = ary.vector_length() + vec_size = ary.vector_size(self.target) if vec_size > 1: dtype = self.target.vector_dtype(dtype, vec_size) From bcf8219ee5678abc81777bb127d3fa8b1b7f1f3e Mon Sep 17 00:00:00 2001 From: Addison Alvey-Blanco Date: Fri, 21 Feb 2025 09:19:15 -0600 Subject: [PATCH 05/12] revert target changes --- loopy/target/cuda.py | 2 +- loopy/target/opencl.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 50d2ac7fe..7a9b4c11a 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -431,7 +431,7 @@ def wrap_global_constant(self, decl: Declarator) -> Declarator: def get_array_base_declarator(self, ary: ArrayBase) -> Declarator: dtype = ary.dtype - vec_size = ary.vector_size(self.target) + vec_size = ary.vector_length() if vec_size > 1: dtype = self.target.vector_dtype(dtype, vec_size) diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 3fe951c4e..07c5b49d0 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -751,7 +751,7 @@ def wrap_global_constant(self, decl: Declarator) -> Declarator: def get_array_base_declarator(self, ary: ArrayBase) -> Declarator: dtype = ary.dtype - vec_size = ary.vector_size(self.target) + vec_size = ary.vector_length() if vec_size > 1: dtype = self.target.vector_dtype(dtype, vec_size) From 8553244c7690abe2aa5c56605cdb6d71efbedd02 Mon Sep 17 00:00:00 2001 From: Addison Alvey-Blanco Date: Fri, 21 Feb 2025 09:58:44 -0600 Subject: [PATCH 06/12] address some ruff and mypy complaints --- loopy/kernel/dependency.py | 6 +---- loopy/transform/dependency.py | 43 +++++++++++++++++++---------------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/loopy/kernel/dependency.py b/loopy/kernel/dependency.py index f9223fcc9..d15322fe5 100644 --- a/loopy/kernel/dependency.py +++ b/loopy/kernel/dependency.py @@ -23,10 +23,6 @@ THE SOFTWARE. """ - -from typing import Mapping - -from immutabledict import immutabledict import islpy as isl from islpy import dim_type @@ -165,7 +161,7 @@ def narrow_dependencies( access_mapper(insn.expression, insn.id) access_mapper(insn.assignee, insn.id) - wmap_r = {} + wmap_r: dict[str, set[str]] = {} for var, insns in knl.writer_map().items(): for insn in insns: wmap_r.setdefault(insn, set()) diff --git a/loopy/transform/dependency.py b/loopy/transform/dependency.py index 339325a63..a60138a25 100644 --- a/loopy/transform/dependency.py +++ b/loopy/transform/dependency.py @@ -1,5 +1,6 @@ from __future__ import annotations + """ .. autoclass:: AccessMapFinder """ @@ -25,31 +26,31 @@ THE SOFTWARE. """ +from pyrsistent import PMap, pmap + import islpy as isl +import pymbolic.primitives as p from loopy.kernel import LoopKernel from loopy.symbolic import ( - WalkMapper, - get_access_map, \ - UnableToDetermineAccessRangeError, + UnableToDetermineAccessRangeError, + WalkMapper, + get_access_map, ) from loopy.typing import Expression -import pymbolic.primitives as p -from typing import List, Dict -from pyrsistent import pmap, PMap - class AccessMapFinder(WalkMapper): def __init__(self, knl: LoopKernel) -> None: self.kernel = knl - self._access_maps: PMap[str, PMap[str, isl.Map]] = pmap({}) + self._access_maps: PMap[str, PMap[str, isl.Map]] = pmap({}) # type: ignore from collections import defaultdict - self.bad_subscripts: Dict[str, List[Expression]] = defaultdict(list) + + self.bad_subscripts: dict[str, list[Expression]] = defaultdict(list) super().__init__() - def get_map(self, insn_id: str, variable_name: str) -> isl.Map | None: + def get_map(self, insn_id: str, variable_name: str) -> isl.Map | None: # type: ignore """Retrieve an access map indexed by an instruction ID and variable name. """ @@ -66,7 +67,7 @@ def get_accessed_variables(self, insn_id: str) -> set[str] | None: def map_subscript(self, expr, insn_id): domain = self.kernel.get_inames_domain( - self.kernel.id_to_insn[insn_id].within_inames + self.kernel.id_to_insn[insn_id].within_inames ) WalkMapper.map_subscript(self, expr, insn_id) @@ -76,8 +77,7 @@ def map_subscript(self, expr, insn_id): subscript = expr.index_tuple try: - access_map = get_access_map( - domain, subscript, self.kernel.assumptions) + access_map = get_access_map(domain, subscript, self.kernel.assumptions) except UnableToDetermineAccessRangeError: # may not have enough info to generate access map at current point self.bad_subscripts[arg_name].append(expr) @@ -92,18 +92,21 @@ def map_subscript(self, expr, insn_id): access_map |= existing_relation self._access_maps = self._access_maps.set( - insn_id, self._access_maps[insn_id].set( - arg_name, access_map)) + insn_id, self._access_maps[insn_id].set(arg_name, access_map) + ) else: self._access_maps = self._access_maps.set( - insn_id, pmap({arg_name: access_map})) + insn_id, pmap({arg_name: access_map}) + ) def map_linear_subscript(self, expr, insn_id): - raise NotImplementedError("linear subscripts cannot be used with " - "precise dependency finding. Use " - "multidimensional accesses to take advantage " - "of this feature.") + raise NotImplementedError( + "linear subscripts cannot be used with " + "precise dependency finding. Use " + "multidimensional accesses to take advantage " + "of this feature." + ) def map_reduction(self, expr, insn_id): return WalkMapper.map_reduction(self, expr, insn_id) From a88e4d685ca3b79d1db768f6f8d4c4f4ed5b142a Mon Sep 17 00:00:00 2001 From: Addison Alvey-Blanco Date: Fri, 21 Feb 2025 10:36:18 -0600 Subject: [PATCH 07/12] add odd-even test --- loopy/kernel/dependency.py | 5 ++--- test/test_dependencies.py | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/loopy/kernel/dependency.py b/loopy/kernel/dependency.py index d15322fe5..7ac7c1eba 100644 --- a/loopy/kernel/dependency.py +++ b/loopy/kernel/dependency.py @@ -58,7 +58,6 @@ def add_lexicographic_happens_after(knl: LoopKernel) -> LoopKernel: happens_after.get_dim_name(dim_type.out, idim) + "'" ) - # NOTE: using this in place of what's in the fold breaks stuff bc sets shared_inames = before_insn.within_inames & after_insn.within_inames # {{{ removes non-determinism from 'bad' ordering of inames @@ -121,7 +120,7 @@ def narrow_dependencies( if dependency_map is not None and dependency_map.is_empty(): return happens_afters - new_happens_after = {} + new_happens_after: dict[str, VariableSpecificHappensAfter] = {} for insn, happens_after in after_insn.happens_after.items(): if dependency_map is None: dependency_map = happens_after.instances_rel @@ -131,7 +130,7 @@ def narrow_dependencies( ) common_vars = \ - wmap_r[insn] & access_mapper.get_accessed_variables(source.id) + wmap_r[insn] & access_mapper.get_accessed_variables(source.id) # type: ignore for var in common_vars: write_map = access_mapper.get_map(insn, var) source_map = access_mapper.get_map(source.id, var) diff --git a/test/test_dependencies.py b/test/test_dependencies.py index df87e3b00..440c957fd 100644 --- a/test/test_dependencies.py +++ b/test/test_dependencies.py @@ -31,6 +31,43 @@ def test_no_dependency(): assert len(knl.id_to_insn["sink"].happens_after) == 0 +def test_odd_even_dependencies(): + t_unit = lp.make_kernel( + "{ [i] : 0 <= i < np }", + """ + u[i] = i {id=no_deps_0} + u[2*i+1] = i {id=src_odd_0} + u[2*i] = i {id=src_even_0} + u[i] = i {id=sink_0} + + u[i] = i {id=no_deps_1} + u[2*i+1] = i {id=src_odd_1} + u[2*i] = i {id=src_even_1} + u[i] = i {id=sink_1} + + u[i] = i {id=no_deps_2} + u[2*i+1] = i {id=src_odd_2} + u[2*i] = i {id=src_even_2} + u[i] = i {id=sink_2} + """ + ) + + t_unit = add_lexicographic_happens_after(t_unit) + t_unit = reduce_strict_ordering(t_unit) + + knl = t_unit.default_entrypoint + for i in range(3): + assert len(knl.id_to_insn[f"src_odd_{i}"].happens_after) == 0 + assert len(knl.id_to_insn[f"src_even_{i}"].happens_after) == 0 + assert len(knl.id_to_insn[f"no_deps_{i}"].happens_after) == 0 + assert len(knl.id_to_insn[f"sink_{i}"].happens_after) == 2 + for dep_id in knl.id_to_insn[f"sink_{i}"].happens_after.keys(): + assert ((dep_id == f"src_odd_{i}") or (dep_id == f"src_even_{i}")) + + for insn in knl.instructions: + print(f"{insn.id}: {insn.happens_after}") + + @pytest.mark.parametrize("img_size", [(512, 512), (1920, 1080)]) def test_3x3_blur(ctx_factory, img_size): ctx = ctx_factory() From 6c41a85399439e3a5c9dad1f69bfad4eaeca6468 Mon Sep 17 00:00:00 2001 From: Addison Alvey-Blanco Date: Tue, 11 Mar 2025 08:27:15 -0500 Subject: [PATCH 08/12] fix buggy dependency finding --- loopy/kernel/dependency.py | 54 ++++++++++++++++++++------------------ test/test_dependencies.py | 13 ++++----- 2 files changed, 36 insertions(+), 31 deletions(-) diff --git a/loopy/kernel/dependency.py b/loopy/kernel/dependency.py index 7ac7c1eba..a3946bf6f 100644 --- a/loopy/kernel/dependency.py +++ b/loopy/kernel/dependency.py @@ -26,6 +26,8 @@ import islpy as isl from islpy import dim_type +from typing import Mapping + from loopy import HappensAfter, LoopKernel, for_each_kernel from loopy.kernel.instruction import ( InstructionBase, @@ -49,8 +51,8 @@ def add_lexicographic_happens_after(knl: LoopKernel) -> LoopKernel: domain_before = knl.get_inames_domain(before_insn.within_inames) domain_after = knl.get_inames_domain(after_insn.within_inames) - happens_after = isl.Map.from_domain_and_range(domain_before, - domain_after) + happens_after = isl.Map.from_domain_and_range(domain_after, + domain_before) for idim in range(happens_after.dim(dim_type.out)): happens_after = happens_after.set_dim_name( dim_type.out, @@ -60,7 +62,7 @@ def add_lexicographic_happens_after(knl: LoopKernel) -> LoopKernel: shared_inames = before_insn.within_inames & after_insn.within_inames - # {{{ removes non-determinism from 'bad' ordering of inames + # {{{ use whatever iname ordering exists at this point shared_inames_order_before = [ domain_before.get_dim_name(dim_type.out, idim) @@ -86,7 +88,7 @@ def add_lexicographic_happens_after(knl: LoopKernel) -> LoopKernel: lex_map = isl.Map.empty(happens_after.space) for iinnermost, innermost_iname in enumerate(shared_inames_order): - innermost_map = affs_in[innermost_iname].lt_map( + innermost_map = affs_in[innermost_iname].gt_map( affs_out[innermost_iname + "'"] ) @@ -109,35 +111,37 @@ def add_lexicographic_happens_after(knl: LoopKernel) -> LoopKernel: @for_each_kernel def reduce_strict_ordering(knl) -> LoopKernel: def narrow_dependencies( - source: InstructionBase, - after_insn: InstructionBase, - happens_afters: dict, - dependency_map: isl.Map | None = None, # type: ignore - ) -> dict: - assert isinstance(source.id, str) - assert isinstance(after_insn.id, str) - - if dependency_map is not None and dependency_map.is_empty(): + after: InstructionBase, + before: InstructionBase, + happens_afters: Mapping, + remaining: isl.Map | None = None, # type: ignore + ) -> Mapping: + assert isinstance(after.id, str) + assert isinstance(before.id, str) + + if remaining is not None and remaining.is_empty(): return happens_afters new_happens_after: dict[str, VariableSpecificHappensAfter] = {} - for insn, happens_after in after_insn.happens_after.items(): - if dependency_map is None: - dependency_map = happens_after.instances_rel + for insn, happens_after in before.happens_after.items(): + if remaining is None: + remaining = happens_after.instances_rel else: - dependency_map = dependency_map.apply_range( - happens_after.instances_rel - ) + assert happens_after.instances_rel is not None + if remaining.space != happens_after.instances_rel.space: + remaining = remaining.apply_range(happens_after.instances_rel) - common_vars = \ - wmap_r[insn] & access_mapper.get_accessed_variables(source.id) # type: ignore + source_vars = access_mapper.get_accessed_variables(after.id) + common_vars = wmap_r[insn] & source_vars # type: ignore for var in common_vars: write_map = access_mapper.get_map(insn, var) - source_map = access_mapper.get_map(source.id, var) + source_map = access_mapper.get_map(after.id, var) assert write_map is not None assert source_map is not None - dependency_map &= write_map.apply_range(source_map.reverse()) + source_to_writer = source_map.apply_range(write_map.reverse()) + dependency_map = source_to_writer & remaining + remaining = remaining - dependency_map if dependency_map is not None and not dependency_map.is_empty(): new_happens_after[insn] = VariableSpecificHappensAfter( instances_rel=dependency_map, variable_name=var @@ -146,10 +150,10 @@ def narrow_dependencies( happens_afters.update( narrow_dependencies( - source, + after, knl.id_to_insn[insn], happens_afters, - dependency_map, + remaining, ) ) diff --git a/test/test_dependencies.py b/test/test_dependencies.py index 440c957fd..aba898758 100644 --- a/test/test_dependencies.py +++ b/test/test_dependencies.py @@ -28,6 +28,7 @@ def test_no_dependency(): t_unit = reduce_strict_ordering(t_unit) knl = t_unit.default_entrypoint + print(knl.id_to_insn["sink"].happens_after) assert len(knl.id_to_insn["sink"].happens_after) == 0 @@ -35,7 +36,6 @@ def test_odd_even_dependencies(): t_unit = lp.make_kernel( "{ [i] : 0 <= i < np }", """ - u[i] = i {id=no_deps_0} u[2*i+1] = i {id=src_odd_0} u[2*i] = i {id=src_even_0} u[i] = i {id=sink_0} @@ -45,7 +45,6 @@ def test_odd_even_dependencies(): u[2*i] = i {id=src_even_1} u[i] = i {id=sink_1} - u[i] = i {id=no_deps_2} u[2*i+1] = i {id=src_odd_2} u[2*i] = i {id=src_even_2} u[i] = i {id=sink_2} @@ -56,17 +55,19 @@ def test_odd_even_dependencies(): t_unit = reduce_strict_ordering(t_unit) knl = t_unit.default_entrypoint + for insn in knl.instructions: + print(f"{insn.id}:") + for after, happens_after in insn.happens_after.items(): + print(f"\t{after}: {happens_after.instances_rel}") + + for i in range(3): assert len(knl.id_to_insn[f"src_odd_{i}"].happens_after) == 0 assert len(knl.id_to_insn[f"src_even_{i}"].happens_after) == 0 - assert len(knl.id_to_insn[f"no_deps_{i}"].happens_after) == 0 assert len(knl.id_to_insn[f"sink_{i}"].happens_after) == 2 for dep_id in knl.id_to_insn[f"sink_{i}"].happens_after.keys(): assert ((dep_id == f"src_odd_{i}") or (dep_id == f"src_even_{i}")) - for insn in knl.instructions: - print(f"{insn.id}: {insn.happens_after}") - @pytest.mark.parametrize("img_size", [(512, 512), (1920, 1080)]) def test_3x3_blur(ctx_factory, img_size): From d48de86c2b2fa9d3738b01860da640eb2ca20cb2 Mon Sep 17 00:00:00 2001 From: Addison Alvey-Blanco Date: Thu, 13 Mar 2025 22:01:13 -0500 Subject: [PATCH 09/12] add self dependence checking --- loopy/kernel/dependency.py | 148 +++++++++++++++++++++------------- loopy/transform/dependency.py | 3 +- test/test_dependencies.py | 30 ++++--- 3 files changed, 111 insertions(+), 70 deletions(-) diff --git a/loopy/kernel/dependency.py b/loopy/kernel/dependency.py index a3946bf6f..2e395c344 100644 --- a/loopy/kernel/dependency.py +++ b/loopy/kernel/dependency.py @@ -36,6 +36,63 @@ from loopy.transform.dependency import AccessMapFinder +def _add_lexicographic_happens_after_inner(knl, after_insn, before_insn): + domain_before = knl.get_inames_domain(before_insn.within_inames) + domain_after = knl.get_inames_domain(after_insn.within_inames) + + happens_after = isl.Map.from_domain_and_range(domain_after, + domain_before) + for idim in range(happens_after.dim(dim_type.out)): + happens_after = happens_after.set_dim_name( + dim_type.out, + idim, + happens_after.get_dim_name(dim_type.out, idim) + "'" + ) + + shared_inames = before_insn.within_inames & after_insn.within_inames + + # {{{ use whatever iname ordering exists at this point + + shared_inames_order_before = [ + domain_before.get_dim_name(dim_type.out, idim) + for idim in range(domain_before.dim(dim_type.out)) + if domain_before.get_dim_name(dim_type.out, idim) + in shared_inames + ] + + shared_inames_order_after = [ + domain_after.get_dim_name(dim_type.out, idim) + for idim in range(domain_after.dim(dim_type.out)) + if domain_after.get_dim_name(dim_type.out, idim) + in shared_inames + ] + + assert shared_inames_order_after == shared_inames_order_before + shared_inames_order = shared_inames_order_after + + # }}} + + affs_in = isl.affs_from_space(happens_after.domain().space) + affs_out = isl.affs_from_space(happens_after.range().space) + + lex_map = isl.Map.empty(happens_after.space) + for iinnermost, innermost_iname in enumerate(shared_inames_order): + innermost_map = affs_in[innermost_iname].gt_map( + affs_out[innermost_iname + "'"] + ) + + for outer_iname in list(shared_inames_order)[:iinnermost]: + innermost_map = innermost_map & ( + affs_in[outer_iname].eq_map( + affs_out[outer_iname + "'"] + ) + ) + + lex_map = lex_map | innermost_map + + return happens_after & lex_map + + @for_each_kernel def add_lexicographic_happens_after(knl: LoopKernel) -> LoopKernel: """ @@ -44,65 +101,39 @@ def add_lexicographic_happens_after(knl: LoopKernel) -> LoopKernel: :func:`reduce_strict_ordering_with_dependencies` using data dependencies. """ - new_insns = [knl.instructions[0].copy()] - for iafter, after_insn in enumerate(knl.instructions[1:], start=1): - before_insn = knl.instructions[iafter-1] - - domain_before = knl.get_inames_domain(before_insn.within_inames) - domain_after = knl.get_inames_domain(after_insn.within_inames) - - happens_after = isl.Map.from_domain_and_range(domain_after, - domain_before) - for idim in range(happens_after.dim(dim_type.out)): - happens_after = happens_after.set_dim_name( - dim_type.out, - idim, - happens_after.get_dim_name(dim_type.out, idim) + "'" - ) - - shared_inames = before_insn.within_inames & after_insn.within_inames - - # {{{ use whatever iname ordering exists at this point - - shared_inames_order_before = [ - domain_before.get_dim_name(dim_type.out, idim) - for idim in range(domain_before.dim(dim_type.out)) - if domain_before.get_dim_name(dim_type.out, idim) - in shared_inames - ] + rmap = knl.reader_map() + wmap_r: dict[str, set[str]] = {} + for var, insns in knl.writer_map().items(): + for insn in insns: + wmap_r.setdefault(insn, set()) + wmap_r[insn].add(var) - shared_inames_order_after = [ - domain_after.get_dim_name(dim_type.out, idim) - for idim in range(domain_after.dim(dim_type.out)) - if domain_after.get_dim_name(dim_type.out, idim) - in shared_inames - ] + new_insns = [] + for iafter, after_insn in enumerate(knl.instructions): + assert after_insn.id is not None - assert shared_inames_order_after == shared_inames_order_before - shared_inames_order = shared_inames_order_after + new_happens_after = {} - # }}} + # check for self dependencies + for var in wmap_r[after_insn.id]: + if rmap.get(var) and after_insn.id in rmap[var]: + self_happens_after = _add_lexicographic_happens_after_inner( + knl, after_insn, after_insn + ) + new_happens_after[after_insn.id] = HappensAfter( + self_happens_after + ) - affs_in = isl.affs_from_space(happens_after.domain().space) - affs_out = isl.affs_from_space(happens_after.range().space) + # add happens after relation with previous instruction + if iafter != 0: + before_insn = knl.instructions[iafter - 1] - lex_map = isl.Map.empty(happens_after.space) - for iinnermost, innermost_iname in enumerate(shared_inames_order): - innermost_map = affs_in[innermost_iname].gt_map( - affs_out[innermost_iname + "'"] + happens_after = _add_lexicographic_happens_after_inner( + knl, after_insn, before_insn ) - for outer_iname in list(shared_inames_order)[:iinnermost]: - innermost_map = innermost_map & ( - affs_in[outer_iname].eq_map( - affs_out[outer_iname + "'"] - ) - ) - - lex_map = lex_map | innermost_map + new_happens_after[before_insn.id] = HappensAfter(happens_after) - happens_after = happens_after & lex_map - new_happens_after = {before_insn.id: HappensAfter(happens_after)} new_insns.append(after_insn.copy(happens_after=new_happens_after)) return knl.copy(instructions=new_insns) @@ -148,14 +179,15 @@ def narrow_dependencies( ) happens_afters.update(new_happens_after) - happens_afters.update( - narrow_dependencies( - after, - knl.id_to_insn[insn], - happens_afters, - remaining, + if insn != after.id: + happens_afters.update( + narrow_dependencies( + after, + knl.id_to_insn[insn], + happens_afters, + remaining, + ) ) - ) return happens_afters diff --git a/loopy/transform/dependency.py b/loopy/transform/dependency.py index a60138a25..4ae3f77a8 100644 --- a/loopy/transform/dependency.py +++ b/loopy/transform/dependency.py @@ -51,7 +51,8 @@ def __init__(self, knl: LoopKernel) -> None: super().__init__() def get_map(self, insn_id: str, variable_name: str) -> isl.Map | None: # type: ignore - """Retrieve an access map indexed by an instruction ID and variable + """ + Retrieve an access map indexed by an instruction ID and variable name. """ try: diff --git a/test/test_dependencies.py b/test/test_dependencies.py index aba898758..5649eef3d 100644 --- a/test/test_dependencies.py +++ b/test/test_dependencies.py @@ -19,8 +19,8 @@ def test_no_dependency(): t_unit = lp.make_kernel( "{ [i,j] : 0 <= i, j < n}", """ - a[i,j] = 2*i {id=source} - b[i,j] = a[i,j] {id=sink} + a[i,j] = 2*i {id=S} + b[i,j] = a[i,j] {id=T} """, ) @@ -28,8 +28,8 @@ def test_no_dependency(): t_unit = reduce_strict_ordering(t_unit) knl = t_unit.default_entrypoint - print(knl.id_to_insn["sink"].happens_after) - assert len(knl.id_to_insn["sink"].happens_after) == 0 + assert len(knl.id_to_insn["S"].happens_after) == 0 + assert len(knl.id_to_insn["T"].happens_after) == 0 def test_odd_even_dependencies(): @@ -40,7 +40,6 @@ def test_odd_even_dependencies(): u[2*i] = i {id=src_even_0} u[i] = i {id=sink_0} - u[i] = i {id=no_deps_1} u[2*i+1] = i {id=src_odd_1} u[2*i] = i {id=src_even_1} u[i] = i {id=sink_1} @@ -55,12 +54,6 @@ def test_odd_even_dependencies(): t_unit = reduce_strict_ordering(t_unit) knl = t_unit.default_entrypoint - for insn in knl.instructions: - print(f"{insn.id}:") - for after, happens_after in insn.happens_after.items(): - print(f"\t{after}: {happens_after.instances_rel}") - - for i in range(3): assert len(knl.id_to_insn[f"src_odd_{i}"].happens_after) == 0 assert len(knl.id_to_insn[f"src_even_{i}"].happens_after) == 0 @@ -127,6 +120,21 @@ def test_3x3_blur(ctx_factory, img_size): assert (la.norm(out[0] - out_np) / la.norm(out_np)) <= 1e-14 +def test_self_dependence(): + t_unit = lp.make_kernel( + "[nt, nx] -> { [t, x]: 0 <= t < nt and 0 <= x < nx }", + """ + u[t+2,x+1] = 2*u[t+1,x+1] {id=self} + """ + ) + + t_unit = add_lexicographic_happens_after(t_unit) + t_unit = reduce_strict_ordering(t_unit) + + knl = t_unit.default_entrypoint + assert "self" in knl.instructions[0].happens_after.keys() + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) From ef87637a1c7e27db696755040fc33ff5501e9402 Mon Sep 17 00:00:00 2001 From: Addison Alvey-Blanco Date: Thu, 13 Mar 2025 22:37:00 -0500 Subject: [PATCH 10/12] get rid of in-place updates --- loopy/kernel/dependency.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/loopy/kernel/dependency.py b/loopy/kernel/dependency.py index 2e395c344..fb65beab1 100644 --- a/loopy/kernel/dependency.py +++ b/loopy/kernel/dependency.py @@ -23,6 +23,7 @@ THE SOFTWARE. """ +from constantdict import constantdict import islpy as isl from islpy import dim_type @@ -51,8 +52,6 @@ def _add_lexicographic_happens_after_inner(knl, after_insn, before_insn): shared_inames = before_insn.within_inames & after_insn.within_inames - # {{{ use whatever iname ordering exists at this point - shared_inames_order_before = [ domain_before.get_dim_name(dim_type.out, idim) for idim in range(domain_before.dim(dim_type.out)) @@ -70,8 +69,6 @@ def _add_lexicographic_happens_after_inner(knl, after_insn, before_insn): assert shared_inames_order_after == shared_inames_order_before shared_inames_order = shared_inames_order_after - # }}} - affs_in = isl.affs_from_space(happens_after.domain().space) affs_out = isl.affs_from_space(happens_after.range().space) @@ -124,14 +121,11 @@ def add_lexicographic_happens_after(knl: LoopKernel) -> LoopKernel: self_happens_after ) - # add happens after relation with previous instruction if iafter != 0: before_insn = knl.instructions[iafter - 1] - happens_after = _add_lexicographic_happens_after_inner( knl, after_insn, before_insn ) - new_happens_after[before_insn.id] = HappensAfter(happens_after) new_insns.append(after_insn.copy(happens_after=new_happens_after)) @@ -144,16 +138,17 @@ def reduce_strict_ordering(knl) -> LoopKernel: def narrow_dependencies( after: InstructionBase, before: InstructionBase, - happens_afters: Mapping, + happens_afters: Mapping[str, VariableSpecificHappensAfter] = {}, remaining: isl.Map | None = None, # type: ignore - ) -> Mapping: + ) -> Mapping[str, VariableSpecificHappensAfter]: + # FIXME: can we get rid of all the "assert x is not None" stuff? + assert isinstance(after.id, str) assert isinstance(before.id, str) if remaining is not None and remaining.is_empty(): return happens_afters - new_happens_after: dict[str, VariableSpecificHappensAfter] = {} for insn, happens_after in before.happens_after.items(): if remaining is None: remaining = happens_after.instances_rel @@ -167,6 +162,7 @@ def narrow_dependencies( for var in common_vars: write_map = access_mapper.get_map(insn, var) source_map = access_mapper.get_map(after.id, var) + assert write_map is not None assert source_map is not None @@ -174,19 +170,21 @@ def narrow_dependencies( dependency_map = source_to_writer & remaining remaining = remaining - dependency_map if dependency_map is not None and not dependency_map.is_empty(): - new_happens_after[insn] = VariableSpecificHappensAfter( - instances_rel=dependency_map, variable_name=var + happens_after_obj = VariableSpecificHappensAfter( + dependency_map, var ) - happens_afters.update(new_happens_after) + + happens_afters = constantdict( + dict(happens_afters) | {insn: happens_after_obj}) if insn != after.id: - happens_afters.update( - narrow_dependencies( + happens_afters = constantdict( + dict(happens_afters) | dict(narrow_dependencies( after, knl.id_to_insn[insn], happens_afters, remaining, - ) + )) ) return happens_afters @@ -205,7 +203,7 @@ def narrow_dependencies( new_insns = [] for insn in knl.instructions[::-1]: new_insns.append( - insn.copy(happens_after=narrow_dependencies(insn, insn, {})) + insn.copy(happens_after=narrow_dependencies(insn, insn)) ) return knl.copy(instructions=new_insns[::-1]) From a22f4728795a4aede7cd7f1b6616ae9dc4411116 Mon Sep 17 00:00:00 2001 From: Addison Alvey-Blanco Date: Fri, 14 Mar 2025 09:35:32 -0500 Subject: [PATCH 11/12] use ruff to fix ruff complaints --- loopy/kernel/dependency.py | 5 +++-- loopy/transform/dependency.py | 12 +++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/loopy/kernel/dependency.py b/loopy/kernel/dependency.py index fb65beab1..e0aa919cc 100644 --- a/loopy/kernel/dependency.py +++ b/loopy/kernel/dependency.py @@ -23,12 +23,13 @@ THE SOFTWARE. """ +from typing import Mapping + from constantdict import constantdict + import islpy as isl from islpy import dim_type -from typing import Mapping - from loopy import HappensAfter, LoopKernel, for_each_kernel from loopy.kernel.instruction import ( InstructionBase, diff --git a/loopy/transform/dependency.py b/loopy/transform/dependency.py index 4ae3f77a8..39604f505 100644 --- a/loopy/transform/dependency.py +++ b/loopy/transform/dependency.py @@ -26,18 +26,24 @@ THE SOFTWARE. """ +from typing import TYPE_CHECKING + from pyrsistent import PMap, pmap -import islpy as isl import pymbolic.primitives as p -from loopy.kernel import LoopKernel from loopy.symbolic import ( UnableToDetermineAccessRangeError, WalkMapper, get_access_map, ) -from loopy.typing import Expression + + +if TYPE_CHECKING: + import islpy as isl + + from loopy.kernel import LoopKernel + from loopy.typing import Expression class AccessMapFinder(WalkMapper): From 929b33e8f472c4b935a7b22cd55556ff1c2ddc66 Mon Sep 17 00:00:00 2001 From: Addison Alvey-Blanco Date: Mon, 24 Mar 2025 19:09:15 -0500 Subject: [PATCH 12/12] whittle away domain of dependee instead of happens after --- loopy/kernel/dependency.py | 36 ++++++++++++++++++++++++------------ test/test_dependencies.py | 37 +++++++++++++++---------------------- 2 files changed, 39 insertions(+), 34 deletions(-) diff --git a/loopy/kernel/dependency.py b/loopy/kernel/dependency.py index e0aa919cc..5512c62a6 100644 --- a/loopy/kernel/dependency.py +++ b/loopy/kernel/dependency.py @@ -68,7 +68,7 @@ def _add_lexicographic_happens_after_inner(knl, after_insn, before_insn): ] assert shared_inames_order_after == shared_inames_order_before - shared_inames_order = shared_inames_order_after + shared_inames_order = list(shared_inames_order_after) affs_in = isl.affs_from_space(happens_after.domain().space) affs_out = isl.affs_from_space(happens_after.range().space) @@ -79,13 +79,20 @@ def _add_lexicographic_happens_after_inner(knl, after_insn, before_insn): affs_out[innermost_iname + "'"] ) - for outer_iname in list(shared_inames_order)[:iinnermost]: + for outer_iname in shared_inames_order[:iinnermost]: innermost_map = innermost_map & ( affs_in[outer_iname].eq_map( affs_out[outer_iname + "'"] ) ) + if before_insn != after_insn: + innermost_map = innermost_map | ( + affs_in[shared_inames_order[iinnermost]].eq_map( + affs_out[shared_inames_order[iinnermost] + "'"] + ) + ) + lex_map = lex_map | innermost_map return happens_after & lex_map @@ -139,24 +146,25 @@ def reduce_strict_ordering(knl) -> LoopKernel: def narrow_dependencies( after: InstructionBase, before: InstructionBase, + remaining_instances: isl.Set, # type: ignore happens_afters: Mapping[str, VariableSpecificHappensAfter] = {}, - remaining: isl.Map | None = None, # type: ignore + happens_after_map: isl.Map | None = None, # type: ignore ) -> Mapping[str, VariableSpecificHappensAfter]: # FIXME: can we get rid of all the "assert x is not None" stuff? assert isinstance(after.id, str) assert isinstance(before.id, str) - if remaining is not None and remaining.is_empty(): + if remaining_instances.is_empty(): return happens_afters for insn, happens_after in before.happens_after.items(): - if remaining is None: - remaining = happens_after.instances_rel + if happens_after_map is None: + happens_after_map = happens_after.instances_rel else: assert happens_after.instances_rel is not None - if remaining.space != happens_after.instances_rel.space: - remaining = remaining.apply_range(happens_after.instances_rel) + happens_after_map = happens_after_map.apply_range( + happens_after.instances_rel) source_vars = access_mapper.get_accessed_variables(after.id) common_vars = wmap_r[insn] & source_vars # type: ignore @@ -168,8 +176,8 @@ def narrow_dependencies( assert source_map is not None source_to_writer = source_map.apply_range(write_map.reverse()) - dependency_map = source_to_writer & remaining - remaining = remaining - dependency_map + dependency_map = source_to_writer & happens_after_map + remaining_instances = remaining_instances - dependency_map.domain() if dependency_map is not None and not dependency_map.is_empty(): happens_after_obj = VariableSpecificHappensAfter( dependency_map, var @@ -183,8 +191,9 @@ def narrow_dependencies( dict(happens_afters) | dict(narrow_dependencies( after, knl.id_to_insn[insn], + remaining_instances, happens_afters, - remaining, + happens_after_map, )) ) @@ -204,7 +213,10 @@ def narrow_dependencies( new_insns = [] for insn in knl.instructions[::-1]: new_insns.append( - insn.copy(happens_after=narrow_dependencies(insn, insn)) + insn.copy(happens_after=narrow_dependencies( + after=insn, + before=insn, + remaining_instances=knl.get_inames_domain(insn.within_inames))) ) return knl.copy(instructions=new_insns[::-1]) diff --git a/test/test_dependencies.py b/test/test_dependencies.py index 5649eef3d..a9117515e 100644 --- a/test/test_dependencies.py +++ b/test/test_dependencies.py @@ -19,8 +19,8 @@ def test_no_dependency(): t_unit = lp.make_kernel( "{ [i,j] : 0 <= i, j < n}", """ - a[i,j] = 2*i {id=S} - b[i,j] = a[i,j] {id=T} + a[i,j] = 2*i {id=S} + b[i,j] = a[i+1,j+1] {id=T} """, ) @@ -36,17 +36,9 @@ def test_odd_even_dependencies(): t_unit = lp.make_kernel( "{ [i] : 0 <= i < np }", """ - u[2*i+1] = i {id=src_odd_0} - u[2*i] = i {id=src_even_0} - u[i] = i {id=sink_0} - - u[2*i+1] = i {id=src_odd_1} - u[2*i] = i {id=src_even_1} - u[i] = i {id=sink_1} - - u[2*i+1] = i {id=src_odd_2} - u[2*i] = i {id=src_even_2} - u[i] = i {id=sink_2} + u[2*i+1] = i {id=S} + u[2*i] = i {id=T} + u[i] = i {id=V} """ ) @@ -54,12 +46,12 @@ def test_odd_even_dependencies(): t_unit = reduce_strict_ordering(t_unit) knl = t_unit.default_entrypoint - for i in range(3): - assert len(knl.id_to_insn[f"src_odd_{i}"].happens_after) == 0 - assert len(knl.id_to_insn[f"src_even_{i}"].happens_after) == 0 - assert len(knl.id_to_insn[f"sink_{i}"].happens_after) == 2 - for dep_id in knl.id_to_insn[f"sink_{i}"].happens_after.keys(): - assert ((dep_id == f"src_odd_{i}") or (dep_id == f"src_even_{i}")) + assert "S" in knl.id_to_insn["V"].happens_after + assert "T" in knl.id_to_insn["V"].happens_after + for insn in knl.instructions: + print(f"{insn.id}:") + for insn_after, instances_rel in insn.happens_after.items(): + print(f" {insn_after}: {instances_rel}") @pytest.mark.parametrize("img_size", [(512, 512), (1920, 1080)]) @@ -91,6 +83,9 @@ def test_3x3_blur(ctx_factory, img_size): knl = lp.fix_parameters(knl, hx=hx-2, hy=hy-2) + knl = add_lexicographic_happens_after(knl) + knl = reduce_strict_ordering(knl) + bsize = 4 knl = lp.split_iname(knl, "x", bsize, inner_tag="vec", outer_tag="for") knl = lp.split_iname(knl, "y", bsize, inner_tag="for", outer_tag="g.0") @@ -105,9 +100,6 @@ def test_3x3_blur(ctx_factory, img_size): knl = lp.prioritize_loops(knl, "y_outer, x_outer, y_inner, x_inner") knl = lp.expand_subst(knl) - knl = add_lexicographic_happens_after(knl) - knl = reduce_strict_ordering(knl) - _, out = knl(queue, img=img) blurx = np.zeros_like(img) out_np = np.zeros_like(img) @@ -133,6 +125,7 @@ def test_self_dependence(): knl = t_unit.default_entrypoint assert "self" in knl.instructions[0].happens_after.keys() + print(knl.id_to_insn["self"].happens_after["self"].instances_rel) if __name__ == "__main__":