From ac5f97417907980e07cbc807cdd06326d61834bd Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Fri, 4 Apr 2025 15:01:57 -0500 Subject: [PATCH 01/14] Fix ruff 0.11.4 error --- arraycontext/impl/pyopencl/taggable_cl_array.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arraycontext/impl/pyopencl/taggable_cl_array.py b/arraycontext/impl/pyopencl/taggable_cl_array.py index 39f92586..4a6e1861 100644 --- a/arraycontext/impl/pyopencl/taggable_cl_array.py +++ b/arraycontext/impl/pyopencl/taggable_cl_array.py @@ -119,9 +119,9 @@ def with_tagged_axis(self, iaxis: int, """ Returns a copy of *self* with *iaxis*-th axis tagged with *tags*. """ - new_axes = (self.axes[:iaxis] - + (self.axes[iaxis].tagged(tags),) - + self.axes[iaxis+1:]) + new_axes = ((*self.axes[:iaxis], + self.axes[iaxis].tagged(tags), + *self.axes[iaxis + 1:])) return type(self)(None, tags=self.tags, axes=new_axes, **_unwrap_cl_array(self)) From 676d4bcfd4bcedbfd39ce10e6735f2ab38ef1de2 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Fri, 4 Apr 2025 12:51:36 -0700 Subject: [PATCH 02/14] PytatoPyOpenCLArrayContext: add support for kernel profiling --- arraycontext/impl/pytato/__init__.py | 120 ++++++++++++++++++++++++++- arraycontext/impl/pytato/compile.py | 12 ++- 2 files changed, 127 insertions(+), 5 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 52b97403..81e2678d 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -54,10 +54,13 @@ import abc import sys from collections.abc import Callable +from dataclasses import dataclass from typing import TYPE_CHECKING, Any import numpy as np +import pyopencl as cl +import pytools from pytools import memoize_method from pytools.tag import Tag, ToTagSetConvertible, normalize_tags @@ -74,7 +77,6 @@ if TYPE_CHECKING: import loopy as lp - import pyopencl as cl import pytato if getattr(sys, "_BUILDING_SPHINX_DOCS", False): @@ -235,6 +237,24 @@ def get_target(self): # {{{ PytatoPyOpenCLArrayContext + +@dataclass +class ProfileEvent: + """Holds a profile event that has not been collected by the profiler yet.""" + + cl_event: cl._cl.Event + translation_unit: Any + # args_tuple: tuple + + +@dataclass +class MultiCallKernelProfile: + """Class to hold the results of multiple kernel executions.""" + + num_calls: int + time: int + + class PytatoPyOpenCLArrayContext(_BasePytatoArrayContext): """ An :class:`ArrayContext` that uses :mod:`pytato` data types to represent @@ -259,7 +279,7 @@ def __init__( self, queue: cl.CommandQueue, allocator=None, *, use_memory_pool: bool | None = None, compile_trace_callback: Callable[[Any, str, Any], None] | None = None, - + profile_kernels: bool = False, # do not use: only for testing _force_svm_arg_limit: int | None = None, ) -> None: @@ -271,9 +291,26 @@ def __init__( representation. This interface should be considered unstable. """ + import pyopencl as cl + if allocator is not None and use_memory_pool is not None: raise TypeError("may not specify both allocator and use_memory_pool") + self.profile_kernels = profile_kernels + + if profile_kernels: + if not queue.properties & cl.command_queue_properties.PROFILING_ENABLE: + raise RuntimeError("Profiling was not enabled in the command queue. " + "Please create the queue with " + "cl.command_queue_properties.PROFILING_ENABLE.") + + # List of ProfileEvents that haven't been transferred to profiled + # results yet + self.profile_events: list[ProfileEvent] = [] + + # Dict of kernel name -> list of kernel execution times + self.profile_results: dict[str, list[int]] = {} + self.using_svm = None if allocator is None: @@ -322,6 +359,79 @@ def __init__( self._force_svm_arg_limit = _force_svm_arg_limit + def _wait_and_transfer_profile_events(self) -> None: + # First, wait for completion of all events + if self.profile_events: + cl.wait_for_events([p_event.cl_event for p_event in self.profile_events]) + + # Then, collect all events and store them + for t in self.profile_events: + name = t.translation_unit.program.entrypoint + + time = t.cl_event.profile.end - t.cl_event.profile.start + + self.profile_results.setdefault(name, []).append(time) + + self.profile_events = [] + + def get_profiling_data_for_kernel(self, kernel_name: str) \ + -> MultiCallKernelProfile: + """Return profiling data for kernel `kernel_name`.""" + self._wait_and_transfer_profile_events() + + time = 0 + num_calls = 0 + + if kernel_name in self.profile_results: + knl_results = self.profile_results[kernel_name] + + num_calls = len(knl_results) + + for r in knl_results: + time += r + + return MultiCallKernelProfile(num_calls, time) + + def reset_profiling_data_for_kernel(self, kernel_name: str) -> None: + """Reset profiling data for kernel `kernel_name`.""" + self.profile_results.pop(kernel_name, None) + + def tabulate_profiling_data(self) -> pytools.Table: + """Return a :class:`pytools.Table` with the profiling results.""" + self._wait_and_transfer_profile_events() + + tbl = pytools.Table() + + # Table header + tbl.add_row(("Function", "# Calls", "Time_sum [s]", "Time_avg [s]")) + + # Precision of results + g = ".4g" + + total_calls = 0 + total_time = 0.0 + + for knl in self.profile_results: + r = self.get_profiling_data_for_kernel(knl) + + total_calls += r.num_calls + + t_sum = r.time + t_avg = r.time / r.num_calls + if t_sum is not None: + total_time += t_sum + + time_sum = f"{t_sum:{g}}" + time_avg = f"{t_avg:{g}}" + + tbl.add_row((knl, r.num_calls, time_sum, + time_avg,)) + + tbl.add_row(("", "", "", "")) + tbl.add_row(("Total", total_calls, f"{total_time:{g}}", "--")) + + return tbl + @property def _frozen_array_types(self) -> tuple[type, ...]: import pyopencl.array as cla @@ -546,9 +656,13 @@ def _to_frozen(key: tuple[Any, ...], ary) -> TaggableCLArray: self._dag_transform_cache[normalized_expr]) assert len(pt_prg.bound_arguments) == 0 - _evt, out_dict = pt_prg(self.queue, + evt, out_dict = pt_prg(self.queue, allocator=self.allocator, **bound_arguments) + + if self.profile_kernels: + self.profile_events.append(ProfileEvent(evt, pt_prg)) + assert len(set(out_dict) & set(key_to_frozen_subary)) == 0 key_to_frozen_subary = { diff --git a/arraycontext/impl/pytato/compile.py b/arraycontext/impl/pytato/compile.py index b1611c7d..03c0b469 100644 --- a/arraycontext/impl/pytato/compile.py +++ b/arraycontext/impl/pytato/compile.py @@ -636,10 +636,14 @@ def __call__(self, arg_id_to_arg) -> ArrayContainer: input_kwargs_for_loopy = _args_to_device_buffers( self.actx, self.input_id_to_name_in_program, arg_id_to_arg, fn_name) - _evt, out_dict = self.pytato_program(queue=self.actx.queue, + evt, out_dict = self.pytato_program(queue=self.actx.queue, allocator=self.actx.allocator, **input_kwargs_for_loopy) + if self.actx.profile_kernels: + from arraycontext.impl.pytato import ProfileEvent + self.actx.profile_events.append(ProfileEvent(evt, self.pytato_program)) + def to_output_template(keys, _): name_in_program = self.output_id_to_name_in_program[keys] return self.actx.thaw(to_tagged_cl_array( @@ -675,10 +679,14 @@ def __call__(self, arg_id_to_arg) -> ArrayContainer: input_kwargs_for_loopy = _args_to_device_buffers( self.actx, self.input_id_to_name_in_program, arg_id_to_arg, fn_name) - _evt, out_dict = self.pytato_program(queue=self.actx.queue, + evt, out_dict = self.pytato_program(queue=self.actx.queue, allocator=self.actx.allocator, **input_kwargs_for_loopy) + if self.actx.profile_kernels: + from arraycontext.impl.pytato import ProfileEvent + self.actx.profile_events.append(ProfileEvent(evt, self.pytato_program)) + return self.actx.thaw(to_tagged_cl_array(out_dict[self.output_name], axes=get_cl_axes_from_pt_axes( self.output_axes), From 040f7395782f51080ad78a4d8a5ae8cf3957821d Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Fri, 4 Apr 2025 15:27:03 -0500 Subject: [PATCH 03/14] add a simple test --- arraycontext/impl/pytato/__init__.py | 2 +- test/test_pytato_arraycontext.py | 40 ++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 81e2678d..93746eff 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -403,7 +403,7 @@ def tabulate_profiling_data(self) -> pytools.Table: tbl = pytools.Table() # Table header - tbl.add_row(("Function", "# Calls", "Time_sum [s]", "Time_avg [s]")) + tbl.add_row(("Function", "# Calls", "Time_sum [ns]", "Time_avg [ns]")) # Precision of results g = ".4g" diff --git a/test/test_pytato_arraycontext.py b/test/test_pytato_arraycontext.py index deee7405..5597e3e1 100644 --- a/test/test_pytato_arraycontext.py +++ b/test/test_pytato_arraycontext.py @@ -274,6 +274,46 @@ def twice(x, y, a): assert isinstance(ep.arg_dict["_actx_in_2"], lp.ArrayArg) +def test_profiling_actx(): + import pyopencl as cl + cl_ctx = cl.create_some_context() + queue = cl.CommandQueue(cl_ctx, + properties=cl.command_queue_properties.PROFILING_ENABLE) + + actx = PytatoPyOpenCLArrayContext(queue, profile_kernels=True) + + def twice(x): + return 2 * x + + # {{{ Compiled test + + f = actx.compile(twice) + + for _ in range(10): + assert actx.to_numpy(f(99)) == 198 + + actx._wait_and_transfer_profile_events() + assert len(actx.profile_results) == 1 + assert len(actx.profile_results["twice"]) == 10 + + print(actx.tabulate_profiling_data()) + + # }}} + + # {{{ Uncompiled/frozen test + + for _ in range(10): + assert actx.to_numpy(twice(99)) == 198 + + actx._wait_and_transfer_profile_events() + assert len(actx.profile_results) == 1 + assert len(actx.profile_results["twice"]) == 10 + + print(actx.tabulate_profiling_data()) + + # }}} + + if __name__ == "__main__": import sys if len(sys.argv) > 1: From b0c00b4d4d3af8dcbaab0870b431c44c9d4bb568 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Fri, 4 Apr 2025 15:29:23 -0500 Subject: [PATCH 04/14] remove global pyopencl import --- arraycontext/impl/pytato/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 93746eff..1fa0b9c7 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -59,7 +59,6 @@ import numpy as np -import pyopencl as cl import pytools from pytools import memoize_method from pytools.tag import Tag, ToTagSetConvertible, normalize_tags @@ -360,6 +359,7 @@ def __init__( self._force_svm_arg_limit = _force_svm_arg_limit def _wait_and_transfer_profile_events(self) -> None: + import pyopencl as cl # First, wait for completion of all events if self.profile_events: cl.wait_for_events([p_event.cl_event for p_event in self.profile_events]) From 8ade2598beb1bf86c9117c27b48e576b1f0e7f9a Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Fri, 4 Apr 2025 15:34:39 -0500 Subject: [PATCH 05/14] replace argument indicator Co-authored-by: Alexandru Fikl --- arraycontext/impl/pytato/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 1fa0b9c7..a879fd1a 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -376,7 +376,7 @@ def _wait_and_transfer_profile_events(self) -> None: def get_profiling_data_for_kernel(self, kernel_name: str) \ -> MultiCallKernelProfile: - """Return profiling data for kernel `kernel_name`.""" + """Return profiling data for kernel *kernel_name*.""" self._wait_and_transfer_profile_events() time = 0 @@ -393,7 +393,7 @@ def get_profiling_data_for_kernel(self, kernel_name: str) \ return MultiCallKernelProfile(num_calls, time) def reset_profiling_data_for_kernel(self, kernel_name: str) -> None: - """Reset profiling data for kernel `kernel_name`.""" + """Reset profiling data for kernel *kernel_name*.""" self.profile_results.pop(kernel_name, None) def tabulate_profiling_data(self) -> pytools.Table: From 55a0c2c5cdd91c5813e41ef4bb6a4a63af03a24d Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Fri, 4 Apr 2025 15:42:30 -0500 Subject: [PATCH 06/14] add add_profiling_event --- arraycontext/impl/pytato/__init__.py | 8 ++++++-- arraycontext/impl/pytato/compile.py | 8 ++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index a879fd1a..d567c107 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -374,6 +374,11 @@ def _wait_and_transfer_profile_events(self) -> None: self.profile_events = [] + def add_profiling_event(self, evt: cl._cl.Event, translation_unit: Any) -> None: + """Add a profiling event to the list of profiling events.""" + if self.profile_kernels: + self.profile_events.append(ProfileEvent(evt, translation_unit)) + def get_profiling_data_for_kernel(self, kernel_name: str) \ -> MultiCallKernelProfile: """Return profiling data for kernel *kernel_name*.""" @@ -660,8 +665,7 @@ def _to_frozen(key: tuple[Any, ...], ary) -> TaggableCLArray: allocator=self.allocator, **bound_arguments) - if self.profile_kernels: - self.profile_events.append(ProfileEvent(evt, pt_prg)) + self.add_profiling_event(evt, pt_prg) assert len(set(out_dict) & set(key_to_frozen_subary)) == 0 diff --git a/arraycontext/impl/pytato/compile.py b/arraycontext/impl/pytato/compile.py index 03c0b469..7d46aa15 100644 --- a/arraycontext/impl/pytato/compile.py +++ b/arraycontext/impl/pytato/compile.py @@ -640,9 +640,7 @@ def __call__(self, arg_id_to_arg) -> ArrayContainer: allocator=self.actx.allocator, **input_kwargs_for_loopy) - if self.actx.profile_kernels: - from arraycontext.impl.pytato import ProfileEvent - self.actx.profile_events.append(ProfileEvent(evt, self.pytato_program)) + self.actx.add_profiling_event(evt, self.pytato_program) def to_output_template(keys, _): name_in_program = self.output_id_to_name_in_program[keys] @@ -683,9 +681,7 @@ def __call__(self, arg_id_to_arg) -> ArrayContainer: allocator=self.actx.allocator, **input_kwargs_for_loopy) - if self.actx.profile_kernels: - from arraycontext.impl.pytato import ProfileEvent - self.actx.profile_events.append(ProfileEvent(evt, self.pytato_program)) + self.actx.add_profiling_event(evt, self.pytato_program) return self.actx.thaw(to_tagged_cl_array(out_dict[self.output_name], axes=get_cl_axes_from_pt_axes( From 7216ccf34e16cb390170d5251848f90eb2787406 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Mon, 7 Apr 2025 16:39:25 -0500 Subject: [PATCH 07/14] outline multi-kernel per t_unit implementation --- arraycontext/impl/pytato/__init__.py | 27 +++++++++++++++++---------- arraycontext/impl/pytato/compile.py | 14 ++++++++++++-- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index d567c107..2a01644c 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -241,9 +241,9 @@ def get_target(self): class ProfileEvent: """Holds a profile event that has not been collected by the profiler yet.""" - cl_event: cl._cl.Event - translation_unit: Any - # args_tuple: tuple + start_cl_event: cl._cl.Event + stop_cl_event: cl._cl.Event + t_unit_name: str @dataclass @@ -362,22 +362,24 @@ def _wait_and_transfer_profile_events(self) -> None: import pyopencl as cl # First, wait for completion of all events if self.profile_events: - cl.wait_for_events([p_event.cl_event for p_event in self.profile_events]) + cl.wait_for_events([p_event.stop_cl_event + for p_event in self.profile_events]) # Then, collect all events and store them for t in self.profile_events: - name = t.translation_unit.program.entrypoint + name = t.t_unit_name - time = t.cl_event.profile.end - t.cl_event.profile.start + time = t.start_cl_event.profile.end - t.stop_cl_event.profile.start self.profile_results.setdefault(name, []).append(time) self.profile_events = [] - def add_profiling_event(self, evt: cl._cl.Event, translation_unit: Any) -> None: - """Add a profiling event to the list of profiling events.""" + def add_profiling_events(self, start: cl._cl.Event, stop: cl._cl.Event, + t_unit_name: str) -> None: + """Add profiling events to the list of profiling events.""" if self.profile_kernels: - self.profile_events.append(ProfileEvent(evt, translation_unit)) + self.profile_events.append(ProfileEvent(start, stop, t_unit_name)) def get_profiling_data_for_kernel(self, kernel_name: str) \ -> MultiCallKernelProfile: @@ -661,11 +663,16 @@ def _to_frozen(key: tuple[Any, ...], ary) -> TaggableCLArray: self._dag_transform_cache[normalized_expr]) assert len(pt_prg.bound_arguments) == 0 + + if self.profile_kernels: + start_evt = cl.enqueue_marker(self.queue) + evt, out_dict = pt_prg(self.queue, allocator=self.allocator, **bound_arguments) - self.add_profiling_event(evt, pt_prg) + if self.profile_kernels: + self.add_profiling_events(start_evt, evt, pt_prg.program.entrypoint) assert len(set(out_dict) & set(key_to_frozen_subary)) == 0 diff --git a/arraycontext/impl/pytato/compile.py b/arraycontext/impl/pytato/compile.py index 7d46aa15..8f0d9c9f 100644 --- a/arraycontext/impl/pytato/compile.py +++ b/arraycontext/impl/pytato/compile.py @@ -636,11 +636,16 @@ def __call__(self, arg_id_to_arg) -> ArrayContainer: input_kwargs_for_loopy = _args_to_device_buffers( self.actx, self.input_id_to_name_in_program, arg_id_to_arg, fn_name) + if self.actx.profile_kernels: + import pyopencl as cl + start_evt = cl.enqueue_marker(self.actx.queue) + evt, out_dict = self.pytato_program(queue=self.actx.queue, allocator=self.actx.allocator, **input_kwargs_for_loopy) - self.actx.add_profiling_event(evt, self.pytato_program) + if self.actx.profile_kernels: + self.actx.add_profiling_events(start_evt, evt, fn_name) def to_output_template(keys, _): name_in_program = self.output_id_to_name_in_program[keys] @@ -677,11 +682,16 @@ def __call__(self, arg_id_to_arg) -> ArrayContainer: input_kwargs_for_loopy = _args_to_device_buffers( self.actx, self.input_id_to_name_in_program, arg_id_to_arg, fn_name) + if self.actx.profile_kernels: + import pyopencl as cl + start_evt = cl.enqueue_marker(self.actx.queue) + evt, out_dict = self.pytato_program(queue=self.actx.queue, allocator=self.actx.allocator, **input_kwargs_for_loopy) - self.actx.add_profiling_event(evt, self.pytato_program) + if self.actx.profile_kernels: + self.actx.add_profiling_events(start_evt, evt, fn_name) return self.actx.thaw(to_tagged_cl_array(out_dict[self.output_name], axes=get_cl_axes_from_pt_axes( From 55537cce96fd4fda0a881a63f78a98c05d40b572 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Mon, 7 Apr 2025 17:58:28 -0500 Subject: [PATCH 08/14] small fixes --- arraycontext/impl/pytato/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 2a01644c..bf03e03a 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -369,7 +369,7 @@ def _wait_and_transfer_profile_events(self) -> None: for t in self.profile_events: name = t.t_unit_name - time = t.start_cl_event.profile.end - t.stop_cl_event.profile.start + time = t.stop_cl_event.profile.end - t.start_cl_event.profile.end self.profile_results.setdefault(name, []).append(time) @@ -410,7 +410,7 @@ def tabulate_profiling_data(self) -> pytools.Table: tbl = pytools.Table() # Table header - tbl.add_row(("Function", "# Calls", "Time_sum [ns]", "Time_avg [ns]")) + tbl.add_row(("Kernel", "# Calls", "Time_sum [ns]", "Time_avg [ns]")) # Precision of results g = ".4g" @@ -437,6 +437,8 @@ def tabulate_profiling_data(self) -> pytools.Table: tbl.add_row(("", "", "", "")) tbl.add_row(("Total", total_calls, f"{total_time:{g}}", "--")) + self.profile_results = {} + return tbl @property @@ -665,6 +667,7 @@ def _to_frozen(key: tuple[Any, ...], ary) -> TaggableCLArray: assert len(pt_prg.bound_arguments) == 0 if self.profile_kernels: + import pyopencl as cl start_evt = cl.enqueue_marker(self.queue) evt, out_dict = pt_prg(self.queue, From 1123c16c25fd9b6c114fd1c2031cef963fedcf4b Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Tue, 8 Apr 2025 11:34:50 -0500 Subject: [PATCH 09/14] refactor tabulate_profiling_data --- arraycontext/impl/pytato/__init__.py | 45 ++++++++-------------------- arraycontext/impl/pytato/utils.py | 36 ++++++++++++++++++++++ test/test_pytato_arraycontext.py | 20 ++++++++++--- 3 files changed, 64 insertions(+), 37 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index bf03e03a..4dd8d0da 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -59,7 +59,7 @@ import numpy as np -import pytools +import pytools as pytools from pytools import memoize_method from pytools.tag import Tag, ToTagSetConvertible, normalize_tags @@ -358,6 +358,8 @@ def __init__( self._force_svm_arg_limit = _force_svm_arg_limit + # {{{ Profiling functionality + def _wait_and_transfer_profile_events(self) -> None: import pyopencl as cl # First, wait for completion of all events @@ -403,43 +405,20 @@ def reset_profiling_data_for_kernel(self, kernel_name: str) -> None: """Reset profiling data for kernel *kernel_name*.""" self.profile_results.pop(kernel_name, None) - def tabulate_profiling_data(self) -> pytools.Table: - """Return a :class:`pytools.Table` with the profiling results.""" + def get_and_reset_profiling_data(self) -> dict[str, MultiCallKernelProfile]: + """Return and reset profiling data.""" self._wait_and_transfer_profile_events() - tbl = pytools.Table() - - # Table header - tbl.add_row(("Kernel", "# Calls", "Time_sum [ns]", "Time_avg [ns]")) - - # Precision of results - g = ".4g" - - total_calls = 0 - total_time = 0.0 - - for knl in self.profile_results: - r = self.get_profiling_data_for_kernel(knl) - - total_calls += r.num_calls - - t_sum = r.time - t_avg = r.time / r.num_calls - if t_sum is not None: - total_time += t_sum - - time_sum = f"{t_sum:{g}}" - time_avg = f"{t_avg:{g}}" - - tbl.add_row((knl, r.num_calls, time_sum, - time_avg,)) - - tbl.add_row(("", "", "", "")) - tbl.add_row(("Total", total_calls, f"{total_time:{g}}", "--")) + result = { + kernel_name: MultiCallKernelProfile(len(times), sum(times)) + for kernel_name, times in self.profile_results.items() + } self.profile_results = {} - return tbl + return result + + # }}} @property def _frozen_array_types(self) -> tuple[type, ...]: diff --git a/arraycontext/impl/pytato/utils.py b/arraycontext/impl/pytato/utils.py index 2457e297..fc09e52f 100644 --- a/arraycontext/impl/pytato/utils.py +++ b/arraycontext/impl/pytato/utils.py @@ -35,6 +35,7 @@ from collections.abc import Mapping from typing import TYPE_CHECKING, Any, cast +import pytools from pytato.array import ( AbstractResultWithNamedArrays, Array, @@ -51,6 +52,7 @@ from arraycontext import ArrayContext from arraycontext.impl.pyopencl.taggable_cl_array import Axis as ClAxis +from arraycontext.impl.pytato import MultiCallKernelProfile if TYPE_CHECKING: @@ -221,4 +223,38 @@ def transfer_to_numpy(expr: ArrayOrNames, actx: ArrayContext) -> ArrayOrNames: # }}} + +def tabulate_profiling_data(profile_results: dict[str, MultiCallKernelProfile])\ + -> pytools.Table: + """Return a :class:`pytools.Table` with the profiling results.""" + tbl = pytools.Table() + + # Table header + tbl.add_row(("Kernel", "# Calls", "Time_sum [ns]", "Time_avg [ns]")) + + # Precision of results + g = ".4g" + + total_calls = 0 + total_time = 0.0 + + for kernel_name, mckp in profile_results.items(): + total_calls += mckp.num_calls + + t_sum = mckp.time + t_avg = mckp.time / mckp.num_calls + if t_sum is not None: + total_time += t_sum + + time_sum = f"{t_sum:{g}}" + time_avg = f"{t_avg:{g}}" + + tbl.add_row((kernel_name, mckp.num_calls, time_sum, + time_avg,)) + + tbl.add_row(("", "", "", "")) + tbl.add_row(("Total", total_calls, f"{total_time:{g}}", "--")) + + return tbl + # vim: foldmethod=marker diff --git a/test/test_pytato_arraycontext.py b/test/test_pytato_arraycontext.py index 5597e3e1..222fe6be 100644 --- a/test/test_pytato_arraycontext.py +++ b/test/test_pytato_arraycontext.py @@ -26,6 +26,7 @@ import logging +import numpy as np import pytest from pytools.tag import Tag @@ -289,27 +290,38 @@ def twice(x): f = actx.compile(twice) + assert len(actx.profile_events) == 0 + for _ in range(10): assert actx.to_numpy(f(99)) == 198 + assert len(actx.profile_events) == 10 actx._wait_and_transfer_profile_events() + assert len(actx.profile_events) == 0 assert len(actx.profile_results) == 1 assert len(actx.profile_results["twice"]) == 10 - print(actx.tabulate_profiling_data()) + from arraycontext.impl.pytato.utils import tabulate_profiling_data + + print(tabulate_profiling_data(actx.get_and_reset_profiling_data())) + assert len(actx.profile_results) == 0 # }}} # {{{ Uncompiled/frozen test + assert len(actx.profile_events) == 0 + for _ in range(10): - assert actx.to_numpy(twice(99)) == 198 + assert np.all(actx.to_numpy(twice(actx.from_numpy(np.array([99, 99])))) == 198) + assert len(actx.profile_events) == 10 actx._wait_and_transfer_profile_events() + assert len(actx.profile_events) == 0 assert len(actx.profile_results) == 1 - assert len(actx.profile_results["twice"]) == 10 + assert len(actx.profile_results["frozen_result"]) == 10 - print(actx.tabulate_profiling_data()) + print(tabulate_profiling_data(actx.get_and_reset_profiling_data())) # }}} From 03dfa33cc6a6e6e29438b273f4d167fb5acf215c Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Tue, 8 Apr 2025 13:27:59 -0500 Subject: [PATCH 10/14] add test to disable --- arraycontext/impl/pytato/__init__.py | 1 - test/test_pytato_arraycontext.py | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 4dd8d0da..c019599c 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -59,7 +59,6 @@ import numpy as np -import pytools as pytools from pytools import memoize_method from pytools.tag import Tag, ToTagSetConvertible, normalize_tags diff --git a/test/test_pytato_arraycontext.py b/test/test_pytato_arraycontext.py index 222fe6be..d665e137 100644 --- a/test/test_pytato_arraycontext.py +++ b/test/test_pytato_arraycontext.py @@ -323,6 +323,22 @@ def twice(x): print(tabulate_profiling_data(actx.get_and_reset_profiling_data())) + assert len(actx.profile_results) == 0 + + # }}} + + # {{{ test disabling profiling + + actx.profile_kernels = False + + assert len(actx.profile_events) == 0 + + for _ in range(10): + assert actx.to_numpy(f(99)) == 198 + + assert len(actx.profile_events) == 0 + assert len(actx.profile_results) == 0 + # }}} From 94d9d1694e0af08cf3aef665182f3ce5188d7313 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 9 Apr 2025 17:20:29 -0500 Subject: [PATCH 11/14] rename to private fields/method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Andreas Klöckner --- arraycontext/impl/pytato/__init__.py | 24 +++++++++++----------- arraycontext/impl/pytato/compile.py | 4 ++-- test/test_pytato_arraycontext.py | 30 ++++++++++++++-------------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index c019599c..2b53c79b 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -304,10 +304,10 @@ def __init__( # List of ProfileEvents that haven't been transferred to profiled # results yet - self.profile_events: list[ProfileEvent] = [] + self._profile_events: list[ProfileEvent] = [] # Dict of kernel name -> list of kernel execution times - self.profile_results: dict[str, list[int]] = {} + self._profile_results: dict[str, list[int]] = {} self.using_svm = None @@ -362,25 +362,25 @@ def __init__( def _wait_and_transfer_profile_events(self) -> None: import pyopencl as cl # First, wait for completion of all events - if self.profile_events: + if self._profile_events: cl.wait_for_events([p_event.stop_cl_event - for p_event in self.profile_events]) + for p_event in self._profile_events]) # Then, collect all events and store them - for t in self.profile_events: + for t in self._profile_events: name = t.t_unit_name time = t.stop_cl_event.profile.end - t.start_cl_event.profile.end - self.profile_results.setdefault(name, []).append(time) + self._profile_results.setdefault(name, []).append(time) - self.profile_events = [] + self._profile_events = [] - def add_profiling_events(self, start: cl._cl.Event, stop: cl._cl.Event, + def _add_profiling_events(self, start: cl._cl.Event, stop: cl._cl.Event, t_unit_name: str) -> None: """Add profiling events to the list of profiling events.""" if self.profile_kernels: - self.profile_events.append(ProfileEvent(start, stop, t_unit_name)) + self._profile_events.append(ProfileEvent(start, stop, t_unit_name)) def get_profiling_data_for_kernel(self, kernel_name: str) \ -> MultiCallKernelProfile: @@ -410,10 +410,10 @@ def get_and_reset_profiling_data(self) -> dict[str, MultiCallKernelProfile]: result = { kernel_name: MultiCallKernelProfile(len(times), sum(times)) - for kernel_name, times in self.profile_results.items() + for kernel_name, times in self._profile_results.items() } - self.profile_results = {} + self._profile_results = {} return result @@ -653,7 +653,7 @@ def _to_frozen(key: tuple[Any, ...], ary) -> TaggableCLArray: **bound_arguments) if self.profile_kernels: - self.add_profiling_events(start_evt, evt, pt_prg.program.entrypoint) + self._add_profiling_events(start_evt, evt, pt_prg.program.entrypoint) assert len(set(out_dict) & set(key_to_frozen_subary)) == 0 diff --git a/arraycontext/impl/pytato/compile.py b/arraycontext/impl/pytato/compile.py index 8f0d9c9f..5a4340e3 100644 --- a/arraycontext/impl/pytato/compile.py +++ b/arraycontext/impl/pytato/compile.py @@ -645,7 +645,7 @@ def __call__(self, arg_id_to_arg) -> ArrayContainer: **input_kwargs_for_loopy) if self.actx.profile_kernels: - self.actx.add_profiling_events(start_evt, evt, fn_name) + self.actx._add_profiling_events(start_evt, evt, fn_name) def to_output_template(keys, _): name_in_program = self.output_id_to_name_in_program[keys] @@ -691,7 +691,7 @@ def __call__(self, arg_id_to_arg) -> ArrayContainer: **input_kwargs_for_loopy) if self.actx.profile_kernels: - self.actx.add_profiling_events(start_evt, evt, fn_name) + self.actx._add_profiling_events(start_evt, evt, fn_name) return self.actx.thaw(to_tagged_cl_array(out_dict[self.output_name], axes=get_cl_axes_from_pt_axes( diff --git a/test/test_pytato_arraycontext.py b/test/test_pytato_arraycontext.py index d665e137..7802df02 100644 --- a/test/test_pytato_arraycontext.py +++ b/test/test_pytato_arraycontext.py @@ -290,40 +290,40 @@ def twice(x): f = actx.compile(twice) - assert len(actx.profile_events) == 0 + assert len(actx._profile_events) == 0 for _ in range(10): assert actx.to_numpy(f(99)) == 198 - assert len(actx.profile_events) == 10 + assert len(actx._profile_events) == 10 actx._wait_and_transfer_profile_events() - assert len(actx.profile_events) == 0 - assert len(actx.profile_results) == 1 - assert len(actx.profile_results["twice"]) == 10 + assert len(actx._profile_events) == 0 + assert len(actx._profile_results) == 1 + assert len(actx._profile_results["twice"]) == 10 from arraycontext.impl.pytato.utils import tabulate_profiling_data print(tabulate_profiling_data(actx.get_and_reset_profiling_data())) - assert len(actx.profile_results) == 0 + assert len(actx._profile_results) == 0 # }}} # {{{ Uncompiled/frozen test - assert len(actx.profile_events) == 0 + assert len(actx._profile_events) == 0 for _ in range(10): assert np.all(actx.to_numpy(twice(actx.from_numpy(np.array([99, 99])))) == 198) - assert len(actx.profile_events) == 10 + assert len(actx._profile_events) == 10 actx._wait_and_transfer_profile_events() - assert len(actx.profile_events) == 0 - assert len(actx.profile_results) == 1 - assert len(actx.profile_results["frozen_result"]) == 10 + assert len(actx._profile_events) == 0 + assert len(actx._profile_results) == 1 + assert len(actx._profile_results["frozen_result"]) == 10 print(tabulate_profiling_data(actx.get_and_reset_profiling_data())) - assert len(actx.profile_results) == 0 + assert len(actx._profile_results) == 0 # }}} @@ -331,13 +331,13 @@ def twice(x): actx.profile_kernels = False - assert len(actx.profile_events) == 0 + assert len(actx._profile_events) == 0 for _ in range(10): assert actx.to_numpy(f(99)) == 198 - assert len(actx.profile_events) == 0 - assert len(actx.profile_results) == 0 + assert len(actx._profile_events) == 0 + assert len(actx._profile_results) == 0 # }}} From 6bdb85b3a91a1e530567df5c388c8a56e00dc27e Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 9 Apr 2025 18:58:33 -0500 Subject: [PATCH 12/14] refactor to simplify API --- arraycontext/impl/pytato/__init__.py | 49 +++------------------------- arraycontext/impl/pytato/utils.py | 31 +++++++++++++----- test/test_pytato_arraycontext.py | 4 +-- 3 files changed, 28 insertions(+), 56 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 2b53c79b..8e13553b 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -245,14 +245,6 @@ class ProfileEvent: t_unit_name: str -@dataclass -class MultiCallKernelProfile: - """Class to hold the results of multiple kernel executions.""" - - num_calls: int - time: int - - class PytatoPyOpenCLArrayContext(_BasePytatoArrayContext): """ An :class:`ArrayContext` that uses :mod:`pytato` data types to represent @@ -289,14 +281,13 @@ def __init__( representation. This interface should be considered unstable. """ - import pyopencl as cl - if allocator is not None and use_memory_pool is not None: raise TypeError("may not specify both allocator and use_memory_pool") self.profile_kernels = profile_kernels if profile_kernels: + import pyopencl as cl if not queue.properties & cl.command_queue_properties.PROFILING_ENABLE: raise RuntimeError("Profiling was not enabled in the command queue. " "Please create the queue with " @@ -379,44 +370,12 @@ def _wait_and_transfer_profile_events(self) -> None: def _add_profiling_events(self, start: cl._cl.Event, stop: cl._cl.Event, t_unit_name: str) -> None: """Add profiling events to the list of profiling events.""" - if self.profile_kernels: - self._profile_events.append(ProfileEvent(start, stop, t_unit_name)) - - def get_profiling_data_for_kernel(self, kernel_name: str) \ - -> MultiCallKernelProfile: - """Return profiling data for kernel *kernel_name*.""" - self._wait_and_transfer_profile_events() - - time = 0 - num_calls = 0 - - if kernel_name in self.profile_results: - knl_results = self.profile_results[kernel_name] - - num_calls = len(knl_results) - - for r in knl_results: - time += r - - return MultiCallKernelProfile(num_calls, time) - - def reset_profiling_data_for_kernel(self, kernel_name: str) -> None: - """Reset profiling data for kernel *kernel_name*.""" - self.profile_results.pop(kernel_name, None) - - def get_and_reset_profiling_data(self) -> dict[str, MultiCallKernelProfile]: - """Return and reset profiling data.""" - self._wait_and_transfer_profile_events() - - result = { - kernel_name: MultiCallKernelProfile(len(times), sum(times)) - for kernel_name, times in self._profile_results.items() - } + self._profile_events.append(ProfileEvent(start, stop, t_unit_name)) + def _reset_profiling_data(self) -> None: + """Reset profiling data.""" self._profile_results = {} - return result - # }}} @property diff --git a/arraycontext/impl/pytato/utils.py b/arraycontext/impl/pytato/utils.py index fc09e52f..5ff71938 100644 --- a/arraycontext/impl/pytato/utils.py +++ b/arraycontext/impl/pytato/utils.py @@ -4,6 +4,12 @@ __doc__ = """ .. autofunction:: transfer_from_numpy .. autofunction:: transfer_to_numpy + + +Profiling-related functions: +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: tabulate_profiling_data """ @@ -52,7 +58,7 @@ from arraycontext import ArrayContext from arraycontext.impl.pyopencl.taggable_cl_array import Axis as ClAxis -from arraycontext.impl.pytato import MultiCallKernelProfile +from arraycontext.impl.pytato import PytatoPyOpenCLArrayContext if TYPE_CHECKING: @@ -224,9 +230,12 @@ def transfer_to_numpy(expr: ArrayOrNames, actx: ArrayContext) -> ArrayOrNames: # }}} -def tabulate_profiling_data(profile_results: dict[str, MultiCallKernelProfile])\ - -> pytools.Table: +# {{{ Profiling + +def tabulate_profiling_data(actx: PytatoPyOpenCLArrayContext) -> pytools.Table: """Return a :class:`pytools.Table` with the profiling results.""" + actx._wait_and_transfer_profile_events() + tbl = pytools.Table() # Table header @@ -238,23 +247,27 @@ def tabulate_profiling_data(profile_results: dict[str, MultiCallKernelProfile])\ total_calls = 0 total_time = 0.0 - for kernel_name, mckp in profile_results.items(): - total_calls += mckp.num_calls + for kernel_name, times in actx._profile_results.items(): + num_calls = len(times) + total_calls += num_calls - t_sum = mckp.time - t_avg = mckp.time / mckp.num_calls + t_sum = sum(times) + t_avg = t_sum / num_calls if t_sum is not None: total_time += t_sum time_sum = f"{t_sum:{g}}" time_avg = f"{t_avg:{g}}" - tbl.add_row((kernel_name, mckp.num_calls, time_sum, - time_avg,)) + tbl.add_row((kernel_name, num_calls, time_sum, time_avg)) tbl.add_row(("", "", "", "")) tbl.add_row(("Total", total_calls, f"{total_time:{g}}", "--")) + actx._reset_profiling_data() + return tbl +# }}} + # vim: foldmethod=marker diff --git a/test/test_pytato_arraycontext.py b/test/test_pytato_arraycontext.py index 7802df02..b5b37051 100644 --- a/test/test_pytato_arraycontext.py +++ b/test/test_pytato_arraycontext.py @@ -303,7 +303,7 @@ def twice(x): from arraycontext.impl.pytato.utils import tabulate_profiling_data - print(tabulate_profiling_data(actx.get_and_reset_profiling_data())) + print(tabulate_profiling_data(actx)) assert len(actx._profile_results) == 0 # }}} @@ -321,7 +321,7 @@ def twice(x): assert len(actx._profile_results) == 1 assert len(actx._profile_results["frozen_result"]) == 10 - print(tabulate_profiling_data(actx.get_and_reset_profiling_data())) + print(tabulate_profiling_data(actx)) assert len(actx._profile_results) == 0 From fedb8360e70373fb6af307b0a01409b3613d6711 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 9 Apr 2025 19:05:38 -0500 Subject: [PATCH 13/14] bit more doc --- arraycontext/impl/pytato/__init__.py | 2 ++ arraycontext/impl/pytato/utils.py | 11 ++++------- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 8e13553b..5c949d2f 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -351,6 +351,8 @@ def __init__( # {{{ Profiling functionality def _wait_and_transfer_profile_events(self) -> None: + """Wait for all profiling events to finish and transfer the results + to *self._profile_results*.""" import pyopencl as cl # First, wait for completion of all events if self._profile_events: diff --git a/arraycontext/impl/pytato/utils.py b/arraycontext/impl/pytato/utils.py index 5ff71938..3f33b448 100644 --- a/arraycontext/impl/pytato/utils.py +++ b/arraycontext/impl/pytato/utils.py @@ -6,8 +6,8 @@ .. autofunction:: transfer_to_numpy -Profiling-related functions: -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Profiling-related functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autofunction:: tabulate_profiling_data """ @@ -242,7 +242,7 @@ def tabulate_profiling_data(actx: PytatoPyOpenCLArrayContext) -> pytools.Table: tbl.add_row(("Kernel", "# Calls", "Time_sum [ns]", "Time_avg [ns]")) # Precision of results - g = ".4g" + g = ".5g" total_calls = 0 total_time = 0.0 @@ -256,10 +256,7 @@ def tabulate_profiling_data(actx: PytatoPyOpenCLArrayContext) -> pytools.Table: if t_sum is not None: total_time += t_sum - time_sum = f"{t_sum:{g}}" - time_avg = f"{t_avg:{g}}" - - tbl.add_row((kernel_name, num_calls, time_sum, time_avg)) + tbl.add_row((kernel_name, num_calls, f"{t_sum:{g}}", f"{t_avg:{g}}")) tbl.add_row(("", "", "", "")) tbl.add_row(("Total", total_calls, f"{total_time:{g}}", "--")) From 6ae30d635e8b2d7ed7a90558044788eadce892e6 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Fri, 11 Apr 2025 08:46:10 -0500 Subject: [PATCH 14/14] factor out profile enable/disable --- arraycontext/impl/pytato/__init__.py | 37 ++++++++++++++++------------ test/test_pytato_arraycontext.py | 28 ++++++++++++++++++++- 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 5c949d2f..b6223e6a 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -284,22 +284,6 @@ def __init__( if allocator is not None and use_memory_pool is not None: raise TypeError("may not specify both allocator and use_memory_pool") - self.profile_kernels = profile_kernels - - if profile_kernels: - import pyopencl as cl - if not queue.properties & cl.command_queue_properties.PROFILING_ENABLE: - raise RuntimeError("Profiling was not enabled in the command queue. " - "Please create the queue with " - "cl.command_queue_properties.PROFILING_ENABLE.") - - # List of ProfileEvents that haven't been transferred to profiled - # results yet - self._profile_events: list[ProfileEvent] = [] - - # Dict of kernel name -> list of kernel execution times - self._profile_results: dict[str, list[int]] = {} - self.using_svm = None if allocator is None: @@ -348,8 +332,29 @@ def __init__( self._force_svm_arg_limit = _force_svm_arg_limit + self._enable_profiling(profile_kernels) + # {{{ Profiling functionality + def _enable_profiling(self, enable: bool) -> None: + # List of ProfileEvents that haven't been transferred to profiled + # results yet + self._profile_events: list[ProfileEvent] = [] + + # Dict of kernel name -> list of kernel execution times + self._profile_results: dict[str, list[int]] = {} + + if enable: + import pyopencl as cl + if not self.queue.properties & cl.command_queue_properties.PROFILING_ENABLE: + raise RuntimeError("Profiling was not enabled in the command queue. " + "Please create the queue with " + "cl.command_queue_properties.PROFILING_ENABLE.") + self.profile_kernels = True + + else: + self.profile_kernels = False + def _wait_and_transfer_profile_events(self) -> None: """Wait for all profiling events to finish and transfer the results to *self._profile_results*.""" diff --git a/test/test_pytato_arraycontext.py b/test/test_pytato_arraycontext.py index b5b37051..188aa9de 100644 --- a/test/test_pytato_arraycontext.py +++ b/test/test_pytato_arraycontext.py @@ -329,7 +329,7 @@ def twice(x): # {{{ test disabling profiling - actx.profile_kernels = False + actx._enable_profiling(False) assert len(actx._profile_events) == 0 @@ -341,6 +341,32 @@ def twice(x): # }}} + # {{{ test enabling profiling + + actx._enable_profiling(True) + + assert len(actx._profile_events) == 0 + + for _ in range(10): + assert actx.to_numpy(f(99)) == 198 + + assert len(actx._profile_events) == 10 + actx._wait_and_transfer_profile_events() + assert len(actx._profile_events) == 0 + assert len(actx._profile_results) == 1 + + # }}} + + queue2 = cl.CommandQueue(cl_ctx) + + with pytest.raises(RuntimeError): + PytatoPyOpenCLArrayContext(queue2, profile_kernels=True) + + actx2 = PytatoPyOpenCLArrayContext(queue2) + + with pytest.raises(RuntimeError): + actx2._enable_profiling(True) + if __name__ == "__main__": import sys