Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyop2/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ class Configuration(dict):
("PYOP2_LDFLAGS", str, ""),
"simd_width":
("PYOP2_SIMD_WIDTH", int, 1),
"extra_info":
("PYOP2_EXTRA_INFO", bool, False),
"vectorization_strategy":
("PYOP2_VECT_STRATEGY", str, "cross-element"),
"alignment":
Expand Down
20 changes: 20 additions & 0 deletions pyop2/parloop.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ def _compute(self, part):

:arg part: The :class:`SetPartition` to compute over.
"""
if configuration["extra_info"]:
nbytes = self.comm.allreduce(self.nbytes)
if self.comm.Get_rank() == 0:
print("{0}_BYTES= {1}".format(self.global_kernel.name, nbytes))
with self._compute_event():
PETSc.Log.logFlops(part.size*self.num_flops)
self.global_kernel(self.comm, part.offset, part.offset+part.size, *self.arglist)
Expand All @@ -195,6 +199,22 @@ def _compute(self, part):
def num_flops(self):
return self.global_kernel.num_flops(self.iterset)

@cached_property
def nbytes(self):
nbytes = 0
seen = set()
for arg in self.arguments:
nbytes += arg.data.nbytes
for map_ in arg.maps:
if map_ is None:
continue
for k in map_._kernel_args_:
if k in seen:
continue
nbytes += map_.values.nbytes
seen.add(k)
return nbytes

@mpi.collective
def compute(self):
# Parloop.compute is an alias for Parloop.__call__
Expand Down