Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion config/cscs.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
],
'environs': [
'builtin',
'PrgEnv-gnu'
'PrgEnv-gnu',
'PrgEnv-fujitsu'
],
'descr': 'Fujitsu A64FX CPUs',
'max_jobs': 100,
Expand Down Expand Up @@ -857,6 +858,18 @@
'cxx': 'mpicxx',
'ftn': 'mpif90'
},
{
'name': 'PrgEnv-fujitsu',
'target_systems': [
'ault'
],
'modules': [
'a64fxsdk'
],
'cc': 'mpifccpx',
'cxx': 'mpiFCCpx',
'ftn': 'mpifrtpx'
},
{
'name': 'builtin',
'target_systems': [
Expand Down
129 changes: 55 additions & 74 deletions cscs-checks/microbenchmarks/cpu/dgemm/dgemm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,105 +4,86 @@
# SPDX-License-Identifier: BSD-3-Clause

import reframe as rfm
import reframe.utility.sanity as sn

from hpctestlib.microbenchmarks.cpu.dgemm import Dgemm


@rfm.simple_test
class DGEMMTest(rfm.RegressionTest):
def __init__(self):
self.descr = 'DGEMM performance test'
self.sourcepath = 'dgemm.c'
self.sanity_patterns = self.eval_sanity()
class dgemm_check(Dgemm):
'''CSCS DGEMM check.

The matrix dimensions are set in the base class.
Every node reports its performance in Gflops/s. To do so, this class
overrides the performance patterns and references from the base test.
This is done in the ``set_perf_patterns`` pre-performance hook.
'''

valid_systems = [
'daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc',
'arolla:cn', 'arolla:pn', 'tsa:cn', 'tsa:pn',
'eiger:mc', 'pilatus:mc', 'ault:a64fx'
]
num_tasks = 0
sys_reference = variable(
dict, value={
'daint:gpu': (300.0, -0.15, None, 'Gflop/s'),
'daint:mc': (1040.0, -0.15, None, 'Gflop/s'),
'dom:gpu': (300.0, -0.15, None, 'Gflop/s'),
'dom:mc': (1040.0, -0.15, None, 'Gflop/s'),
'eiger:mc': (3200.0, -0.15, None, 'Gflop/s'),
'pilatus:mc': (3200.0, -0.15, None, 'Gflop/s'),
'ault:a64fx': (1930.0, -0.15, None, 'Gflop/s'),
'*': (None, None, None, 'Gflop/s'),
},
)
tags = {'benchmark', 'diagnostic', 'craype'}

# the perf patterns are automaticaly generated inside sanity
self.perf_patterns = {}
self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc',
'arolla:cn', 'arolla:pn', 'tsa:cn', 'tsa:pn',
'eiger:mc', 'pilatus:mc']
@run_after('init')
def set_valid_prog_environs(self):
if self.current_system.name in ['daint', 'dom']:
self.valid_prog_environs = ['PrgEnv-gnu', 'PrgEnv-intel']
elif self.current_system.name in ['arolla', 'tsa']:
self.valid_prog_environs = ['PrgEnv-gnu-nompi']
elif self.current_system.name in ['eiger', 'pilatus']:
self.valid_prog_environs = ['PrgEnv-gnu']
else:
self.valid_prog_environs = []
elif self.current_system.name in ['ault']:
self.valid_prog_environs = ['PrgEnv-fujitsu']

self.num_tasks = 0
self.use_multithreading = False
self.executable_opts = ['6144', '12288', '3072']
self.build_system = 'SingleSource'
self.build_system.cflags = ['-O3']
self.sys_reference = {
'daint:gpu': (300.0, -0.15, None, 'Gflop/s'),
'daint:mc': (1040.0, -0.15, None, 'Gflop/s'),
'dom:gpu': (300.0, -0.15, None, 'Gflop/s'),
'dom:mc': (1040.0, -0.15, None, 'Gflop/s'),
'eiger:mc': (3200.0, -0.15, None, 'Gflop/s'),
'pilatus:mc': (3200.0, -0.15, None, 'Gflop/s'),
}
self.maintainers = ['AJ', 'VH']
self.tags = {'benchmark', 'diagnostic', 'craype'}
@run_after('setup')
def skip_incompatible_combinations(self):
'''Fujitsu env only available in ault's a64fx partition.'''
if self.current_environ.name.startswith('PrgEnv-fujitsu'):
self.skip_if(
self.current_partition.fullname not in ('ault:a64fx')
)

@run_after('setup')
def set_num_cpus_per_task(self):
proc = self.current_partition.processor
pname = self.current_partition.fullname
if not proc.info:
self.skip(f'no topology information found for partition {pname!r}')

self.num_cpus_per_task = proc.num_cpus // proc.num_cpus_per_core

@run_before('compile')
def setflags(self):
def set_flags(self):
if self.current_environ.name.startswith('PrgEnv-gnu'):
self.build_system.cflags += ['-fopenmp']
elif self.current_environ.name.startswith('PrgEnv-intel'):
self.build_system.cppflags = [
'-DMKL_ILP64', '-I${MKLROOT}/include'
]
self.build_system.cflags = ['-qopenmp']
self.build_system.cflags += ['-qopenmp']
self.build_system.ldflags = [
'-mkl', '-static-intel', '-liomp5', '-lpthread', '-lm', '-ldl'
]
elif self.current_environ.name.startswith('PrgEnv-fujitsu'):
self.build_system.cflags += ['-fopenmp', '-Nlibomp', '-mt']
self.build_system.ldflags += ['-SSL2BLAMP', '-mt']

if self.current_partition.fullname in ['arolla:cn', 'arolla:pn',
'tsa:cn', 'tsa:pn']:
self.build_system.cflags += ['-I$EBROOTOPENBLAS/include']
self.build_system.ldflags = ['-L$EBROOTOPENBLAS/lib', '-lopenblas',
'-lpthread', '-lgfortran']

@run_before('run')
def set_tasks(self):
if self.current_partition.fullname in ['daint:gpu', 'dom:gpu']:
self.num_cpus_per_task = 12
elif self.current_partition.fullname in ['daint:mc', 'dom:mc']:
self.num_cpus_per_task = 36
elif self.current_partition.fullname in ['arolla:cn', 'tsa:cn']:
self.num_cpus_per_task = 16
elif self.current_partition.fullname in ['arolla:pn', 'tsa:pn']:
self.num_cpus_per_task = 40
elif self.current_partition.fullname in ['eiger:mc', 'pilatus:mc']:
self.num_cpus_per_task = 128

if self.num_cpus_per_task:
self.variables = {
'OMP_NUM_THREADS': str(self.num_cpus_per_task),
'OMP_BIND': 'cores',
'OMP_PROC_BIND': 'spread',
'OMP_SCHEDULE': 'static'
}

@sn.sanity_function
def eval_sanity(self):
all_tested_nodes = sn.evaluate(sn.extractall(
r'(?P<hostname>\S+):\s+Time for \d+ DGEMM operations',
self.stdout, 'hostname'))
num_tested_nodes = len(all_tested_nodes)
failure_msg = ('Requested %s node(s), but found %s node(s)' %
(self.job.num_tasks, num_tested_nodes))
sn.evaluate(sn.assert_eq(num_tested_nodes, self.job.num_tasks,
msg=failure_msg))

for hostname in all_tested_nodes:
partition_name = self.current_partition.fullname
ref_name = '%s:%s' % (partition_name, hostname)
self.reference[ref_name] = self.sys_reference.get(
partition_name, (0.0, None, None, 'Gflop/s')
)
self.perf_patterns[hostname] = sn.extractsingle(
r'%s:\s+Avg\. performance\s+:\s+(?P<gflops>\S+)'
r'\sGflop/s' % hostname, self.stdout, 'gflops', float)

return True
121 changes: 53 additions & 68 deletions cscs-checks/microbenchmarks/cpu/latency/latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,61 +4,73 @@
# SPDX-License-Identifier: BSD-3-Clause

import reframe as rfm
import reframe.utility.sanity as sn

from hpctestlib.microbenchmarks.cpu.latency import CpuLatency


@rfm.simple_test
class CPULatencyTest(rfm.RegressionTest):
sourcepath = 'latency.cpp'
build_system = 'SingleSource'
valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc',
'ault:intel', 'ault:amdvega', 'tave:compute']
valid_prog_environs = ['PrgEnv-gnu']
class cpu_latency_check(CpuLatency):
'''CPU latency check.

This check performs a latency test for each provided buffer size.
'''
# Set required variables
buffer_sizes = ['16000', '128000', '8000000', '500000000']
num_tasks = 0
num_tasks_per_node = 1
executable_opts = ['16000', '128000', '8000000', '500000000']

valid_systems = [
'daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc',
'ault:intel', 'ault:amdvega', 'tave:compute', 'ault:a64fx'
]
valid_prog_environs = ['PrgEnv-gnu']
reference = {
'dom:mc': {
'latencyL1': (1.21, -0.01, 0.26, 'ns'),
'latencyL2': (3.65, -0.01, 0.26, 'ns'),
'latencyL3': (18.83, -0.01, 0.05, 'ns'),
'latencyMem': (76.6, -0.01, 0.05, 'ns')
'latencyL1': (1.21, -0.01, 0.26, 'ns'),
'latencyL2': (3.65, -0.01, 0.26, 'ns'),
'latencyL3': (18.83, -0.01, 0.05, 'ns'),
'latencyL4': (76.6, -0.01, 0.05, 'ns')
},
'dom:gpu': {
'latencyL1': (1.14, -0.01, 0.26, 'ns'),
'latencyL2': (3.44, -0.01, 0.26, 'ns'),
'latencyL3': (15.65, -0.01, 0.05, 'ns'),
'latencyMem': (71.7, -0.01, 0.05, 'ns')
'latencyL1': (1.14, -0.01, 0.26, 'ns'),
'latencyL2': (3.44, -0.01, 0.26, 'ns'),
'latencyL3': (15.65, -0.01, 0.05, 'ns'),
'latencyL4': (71.7, -0.01, 0.05, 'ns')
},
'daint:mc': {
'latencyL1': (1.21, -0.01, 0.26, 'ns'),
'latencyL2': (3.65, -0.01, 0.26, 'ns'),
'latencyL3': (18.83, -0.01, 0.05, 'ns'),
'latencyMem': (76.6, -0.01, 0.05, 'ns')
'latencyL1': (1.21, -0.01, 0.26, 'ns'),
'latencyL2': (3.65, -0.01, 0.26, 'ns'),
'latencyL3': (18.83, -0.01, 0.05, 'ns'),
'latencyL4': (76.6, -0.01, 0.05, 'ns')
},
'daint:gpu': {
'latencyL1': (1.14, -0.01, 0.26, 'ns'),
'latencyL2': (3.44, -0.01, 0.26, 'ns'),
'latencyL3': (15.65, -0.01, 0.05, 'ns'),
'latencyMem': (71.7, -0.01, 0.05, 'ns')
'latencyL1': (1.14, -0.01, 0.26, 'ns'),
'latencyL2': (3.44, -0.01, 0.26, 'ns'),
'latencyL3': (15.65, -0.01, 0.05, 'ns'),
'latencyL4': (71.7, -0.01, 0.05, 'ns')
},
'ault:intel': {
'latencyL1': (1.08, -0.01, 0.26, 'ns'),
'latencyL2': (3.8, -0.01, 0.26, 'ns'),
'latencyL3': (21.5, -0.01, 0.05, 'ns'),
'latencyMem': (86.5, -0.01, 0.05, 'ns')
'latencyL1': (1.08, -0.01, 0.26, 'ns'),
'latencyL2': (3.8, -0.01, 0.26, 'ns'),
'latencyL3': (21.5, -0.01, 0.05, 'ns'),
'latencyL4': (86.5, -0.01, 0.05, 'ns')
},
'ault:amdvega': {
'latencyL1': (1.32, -0.01, 0.26, 'ns'),
'latencyL2': (4.02, -0.01, 0.26, 'ns'),
'latencyL3': (14.4, -0.01, 0.26, 'ns'),
'latencyMem': (90.0, -0.01, 0.05, 'ns')
'latencyL1': (1.32, -0.01, 0.26, 'ns'),
'latencyL2': (4.02, -0.01, 0.26, 'ns'),
'latencyL3': (14.4, -0.01, 0.26, 'ns'),
'latencyL4': (90.0, -0.01, 0.05, 'ns')
},
'tave:compute': {
'latencyL1': (2.86, -0.01, 0.05, 'ns'),
'latencyL2': (12.15, -0.01, 0.05, 'ns'),
'latencyL3': (137, -0.01, 0.05, 'ns'),
'latencyMem': (150, -0.05, 0.05, 'ns')
'latencyL1': (2.86, -0.01, 0.05, 'ns'),
'latencyL2': (12.15, -0.01, 0.05, 'ns'),
'latencyL3': (137, -0.01, 0.05, 'ns'),
'latencyL4': (150, -0.05, 0.05, 'ns')
},
'ault:a64fx': {
'latencyL1': (2.78, None, 0.05, 'ns'),
'latencyL2': (14.3, None, 0.05, 'ns'),
'latencyL3': (32.1, None, 0.05, 'ns'),
'latencyL4': (146, None, 0.05, 'ns')
},
}
maintainers = ['SK']
Expand All @@ -71,34 +83,7 @@ def set_modules(self):
if self.current_system.name in {'tave'}:
self.modules = ['craype-hugepages512M']

@run_before('compile')
def set_flags(self):
self.build_system.cxxflags = ['-O3']

@sanity_function
def assert_success(self):
return sn.assert_eq(
sn.count(sn.findall(r'latency', self.stdout)),
self.num_tasks * len(self.executable_opts)
)

def lat_pattern(self, index):
return sn.extractsingle(
r'latency \(ns\) for input size %s: (?P<bw>\S+) clocks' %
self.executable_opts[index], self.stdout, 'bw', float)

@performance_function('ns')
def latencyL1(self):
return self.lat_pattern(0)

@performance_function('ns')
def latencyL2(self):
return self.lat_pattern(1)

@performance_function('ns')
def latencyL3(self):
return self.lat_pattern(2)

@performance_function('ns')
def latencyMem(self):
return self.lat_pattern(3)
@run_after('init')
def set_valid_environs(self):
if self.current_system.name in {'ault'}:
self.valid_prog_environs = ['PrgEnv-fujitsu']
Loading