reframe-hpc · jjotero · May 27, 2021 · May 27, 2021 · May 31, 2021 · May 31, 2021
diff --git a/config/cscs.py b/config/cscs.py
@@ -39,7 +39,8 @@
                     ],
                     'environs': [
                         'builtin',
-                        'PrgEnv-gnu'
+                        'PrgEnv-gnu',
+                        'PrgEnv-fujitsu'
                     ],
                     'descr': 'Fujitsu A64FX CPUs',
                     'max_jobs': 100,
@@ -857,6 +858,18 @@
             'cxx': 'mpicxx',
             'ftn': 'mpif90'
         },
+        {
+            'name': 'PrgEnv-fujitsu',
+            'target_systems': [
+                'ault'
+            ],
+            'modules': [
+                'a64fxsdk'
+            ],
+            'cc': 'mpifccpx',
+            'cxx': 'mpiFCCpx',
+            'ftn': 'mpifrtpx'
+        },
         {
             'name': 'builtin',
             'target_systems': [

diff --git a/cscs-checks/microbenchmarks/cpu/dgemm/dgemm.py b/cscs-checks/microbenchmarks/cpu/dgemm/dgemm.py
@@ -4,105 +4,86 @@
 # SPDX-License-Identifier: BSD-3-Clause
 
 import reframe as rfm
-import reframe.utility.sanity as sn
+
+from hpctestlib.microbenchmarks.cpu.dgemm import Dgemm
 
 
 @rfm.simple_test
-class DGEMMTest(rfm.RegressionTest):
-    def __init__(self):
-        self.descr = 'DGEMM performance test'
-        self.sourcepath = 'dgemm.c'
-        self.sanity_patterns = self.eval_sanity()
+class dgemm_check(Dgemm):
+    '''CSCS DGEMM check.
+
+    The matrix dimensions are set in the base class.
+    Every node reports its performance in Gflops/s. To do so, this class
+    overrides the performance patterns and references from the base test.
+    This is done in the ``set_perf_patterns`` pre-performance hook.
+    '''
+
+    valid_systems = [
+        'daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc',
+        'arolla:cn', 'arolla:pn', 'tsa:cn', 'tsa:pn',
+        'eiger:mc', 'pilatus:mc', 'ault:a64fx'
+    ]
+    num_tasks = 0
+    sys_reference = variable(
+        dict, value={
+            'daint:gpu':  (300.0, -0.15, None, 'Gflop/s'),
+            'daint:mc':   (1040.0, -0.15, None, 'Gflop/s'),
+            'dom:gpu':    (300.0, -0.15, None, 'Gflop/s'),
+            'dom:mc':     (1040.0, -0.15, None, 'Gflop/s'),
+            'eiger:mc':   (3200.0, -0.15, None, 'Gflop/s'),
+            'pilatus:mc': (3200.0, -0.15, None, 'Gflop/s'),
+            'ault:a64fx': (1930.0, -0.15, None, 'Gflop/s'),
+            '*':          (None, None, None, 'Gflop/s'),
+        },
+    )
+    tags = {'benchmark', 'diagnostic', 'craype'}
 
-        # the perf patterns are automaticaly generated inside sanity
-        self.perf_patterns = {}
-        self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc',
-                              'arolla:cn', 'arolla:pn', 'tsa:cn', 'tsa:pn',
-                              'eiger:mc', 'pilatus:mc']
+    @run_after('init')
+    def set_valid_prog_environs(self):
         if self.current_system.name in ['daint', 'dom']:
             self.valid_prog_environs = ['PrgEnv-gnu', 'PrgEnv-intel']
         elif self.current_system.name in ['arolla', 'tsa']:
             self.valid_prog_environs = ['PrgEnv-gnu-nompi']
         elif self.current_system.name in ['eiger', 'pilatus']:
             self.valid_prog_environs = ['PrgEnv-gnu']
-        else:
-            self.valid_prog_environs = []
+        elif self.current_system.name in ['ault']:
+            self.valid_prog_environs = ['PrgEnv-fujitsu']
 
-        self.num_tasks = 0
-        self.use_multithreading = False
-        self.executable_opts = ['6144', '12288', '3072']
-        self.build_system = 'SingleSource'
-        self.build_system.cflags = ['-O3']
-        self.sys_reference = {
-            'daint:gpu': (300.0, -0.15, None, 'Gflop/s'),
-            'daint:mc': (1040.0, -0.15, None, 'Gflop/s'),
-            'dom:gpu': (300.0, -0.15, None, 'Gflop/s'),
-            'dom:mc': (1040.0, -0.15, None, 'Gflop/s'),
-            'eiger:mc': (3200.0, -0.15, None, 'Gflop/s'),
-            'pilatus:mc': (3200.0, -0.15, None, 'Gflop/s'),
-        }
-        self.maintainers = ['AJ', 'VH']
-        self.tags = {'benchmark', 'diagnostic', 'craype'}
+    @run_after('setup')
+    def skip_incompatible_combinations(self):
+        '''Fujitsu env only available in ault's a64fx partition.'''
+        if self.current_environ.name.startswith('PrgEnv-fujitsu'):
+            self.skip_if(
+                self.current_partition.fullname not in ('ault:a64fx')
+            )
+
+    @run_after('setup')
+    def set_num_cpus_per_task(self):
+        proc = self.current_partition.processor
+        pname = self.current_partition.fullname
+        if not proc.info:
+            self.skip(f'no topology information found for partition {pname!r}')
+
+        self.num_cpus_per_task = proc.num_cpus // proc.num_cpus_per_core
 
     @run_before('compile')
-    def setflags(self):
+    def set_flags(self):
         if self.current_environ.name.startswith('PrgEnv-gnu'):
             self.build_system.cflags += ['-fopenmp']
         elif self.current_environ.name.startswith('PrgEnv-intel'):
             self.build_system.cppflags = [
                 '-DMKL_ILP64', '-I${MKLROOT}/include'
             ]
-            self.build_system.cflags = ['-qopenmp']
+            self.build_system.cflags += ['-qopenmp']
             self.build_system.ldflags = [
                 '-mkl', '-static-intel', '-liomp5', '-lpthread', '-lm', '-ldl'
             ]
+        elif self.current_environ.name.startswith('PrgEnv-fujitsu'):
+            self.build_system.cflags += ['-fopenmp', '-Nlibomp', '-mt']
+            self.build_system.ldflags += ['-SSL2BLAMP', '-mt']
 
         if self.current_partition.fullname in ['arolla:cn', 'arolla:pn',
                                                'tsa:cn', 'tsa:pn']:
             self.build_system.cflags += ['-I$EBROOTOPENBLAS/include']
             self.build_system.ldflags = ['-L$EBROOTOPENBLAS/lib', '-lopenblas',
                                          '-lpthread', '-lgfortran']
-
-    @run_before('run')
-    def set_tasks(self):
-        if self.current_partition.fullname in ['daint:gpu', 'dom:gpu']:
-            self.num_cpus_per_task = 12
-        elif self.current_partition.fullname in ['daint:mc', 'dom:mc']:
-            self.num_cpus_per_task = 36
-        elif self.current_partition.fullname in ['arolla:cn', 'tsa:cn']:
-            self.num_cpus_per_task = 16
-        elif self.current_partition.fullname in ['arolla:pn', 'tsa:pn']:
-            self.num_cpus_per_task = 40
-        elif self.current_partition.fullname in ['eiger:mc', 'pilatus:mc']:
-            self.num_cpus_per_task = 128
-
-        if self.num_cpus_per_task:
-            self.variables = {
-                'OMP_NUM_THREADS': str(self.num_cpus_per_task),
-                'OMP_BIND': 'cores',
-                'OMP_PROC_BIND': 'spread',
-                'OMP_SCHEDULE': 'static'
-            }
-
-    @sn.sanity_function
-    def eval_sanity(self):
-        all_tested_nodes = sn.evaluate(sn.extractall(
-            r'(?P<hostname>\S+):\s+Time for \d+ DGEMM operations',
-            self.stdout, 'hostname'))
-        num_tested_nodes = len(all_tested_nodes)
-        failure_msg = ('Requested %s node(s), but found %s node(s)' %
-                       (self.job.num_tasks, num_tested_nodes))
-        sn.evaluate(sn.assert_eq(num_tested_nodes, self.job.num_tasks,
-                                 msg=failure_msg))
-
-        for hostname in all_tested_nodes:
-            partition_name = self.current_partition.fullname
-            ref_name = '%s:%s' % (partition_name, hostname)
-            self.reference[ref_name] = self.sys_reference.get(
-                partition_name, (0.0, None, None, 'Gflop/s')
-            )
-            self.perf_patterns[hostname] = sn.extractsingle(
-                r'%s:\s+Avg\. performance\s+:\s+(?P<gflops>\S+)'
-                r'\sGflop/s' % hostname, self.stdout, 'gflops', float)
-
-        return True
diff --git a/cscs-checks/microbenchmarks/cpu/latency/latency.py b/cscs-checks/microbenchmarks/cpu/latency/latency.py
@@ -4,61 +4,73 @@
 # SPDX-License-Identifier: BSD-3-Clause
 
 import reframe as rfm
-import reframe.utility.sanity as sn
+
+from hpctestlib.microbenchmarks.cpu.latency import CpuLatency
 
 
 @rfm.simple_test
-class CPULatencyTest(rfm.RegressionTest):
-    sourcepath = 'latency.cpp'
-    build_system = 'SingleSource'
-    valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc',
-                     'ault:intel', 'ault:amdvega', 'tave:compute']
-    valid_prog_environs = ['PrgEnv-gnu']
+class cpu_latency_check(CpuLatency):
+    '''CPU latency check.
+
+    This check performs a latency test for each provided buffer size.
+    '''
+    # Set required variables
+    buffer_sizes = ['16000', '128000', '8000000', '500000000']
     num_tasks = 0
-    num_tasks_per_node = 1
-    executable_opts = ['16000', '128000', '8000000', '500000000']
+
+    valid_systems = [
+        'daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc',
+        'ault:intel', 'ault:amdvega', 'tave:compute', 'ault:a64fx'
+    ]
+    valid_prog_environs = ['PrgEnv-gnu']
     reference = {
         'dom:mc': {
-            'latencyL1':  (1.21, -0.01, 0.26, 'ns'),
-            'latencyL2':  (3.65, -0.01, 0.26, 'ns'),
-            'latencyL3':  (18.83, -0.01, 0.05, 'ns'),
-            'latencyMem': (76.6, -0.01, 0.05, 'ns')
+            'latencyL1': (1.21, -0.01, 0.26, 'ns'),
+            'latencyL2': (3.65, -0.01, 0.26, 'ns'),
+            'latencyL3': (18.83, -0.01, 0.05, 'ns'),
+            'latencyL4': (76.6, -0.01, 0.05, 'ns')
         },
         'dom:gpu': {
-            'latencyL1':  (1.14, -0.01, 0.26, 'ns'),
-            'latencyL2':  (3.44, -0.01, 0.26, 'ns'),
-            'latencyL3':  (15.65, -0.01, 0.05, 'ns'),
-            'latencyMem': (71.7, -0.01, 0.05, 'ns')
+            'latencyL1': (1.14, -0.01, 0.26, 'ns'),
+            'latencyL2': (3.44, -0.01, 0.26, 'ns'),
+            'latencyL3': (15.65, -0.01, 0.05, 'ns'),
+            'latencyL4': (71.7, -0.01, 0.05, 'ns')
         },
         'daint:mc': {
-            'latencyL1':  (1.21, -0.01, 0.26, 'ns'),
-            'latencyL2':  (3.65, -0.01, 0.26, 'ns'),
-            'latencyL3':  (18.83, -0.01, 0.05, 'ns'),
-            'latencyMem': (76.6, -0.01, 0.05, 'ns')
+            'latencyL1': (1.21, -0.01, 0.26, 'ns'),
+            'latencyL2': (3.65, -0.01, 0.26, 'ns'),
+            'latencyL3': (18.83, -0.01, 0.05, 'ns'),
+            'latencyL4': (76.6, -0.01, 0.05, 'ns')
         },
         'daint:gpu': {
-            'latencyL1':  (1.14, -0.01, 0.26, 'ns'),
-            'latencyL2':  (3.44, -0.01, 0.26, 'ns'),
-            'latencyL3':  (15.65, -0.01, 0.05, 'ns'),
-            'latencyMem': (71.7, -0.01, 0.05, 'ns')
+            'latencyL1': (1.14, -0.01, 0.26, 'ns'),
+            'latencyL2': (3.44, -0.01, 0.26, 'ns'),
+            'latencyL3': (15.65, -0.01, 0.05, 'ns'),
+            'latencyL4': (71.7, -0.01, 0.05, 'ns')
         },
         'ault:intel': {
-            'latencyL1':  (1.08, -0.01, 0.26, 'ns'),
-            'latencyL2':  (3.8, -0.01, 0.26, 'ns'),
-            'latencyL3':  (21.5, -0.01, 0.05, 'ns'),
-            'latencyMem': (86.5, -0.01, 0.05, 'ns')
+            'latencyL1': (1.08, -0.01, 0.26, 'ns'),
+            'latencyL2': (3.8, -0.01, 0.26, 'ns'),
+            'latencyL3': (21.5, -0.01, 0.05, 'ns'),
+            'latencyL4': (86.5, -0.01, 0.05, 'ns')
         },
         'ault:amdvega': {
-            'latencyL1':  (1.32, -0.01, 0.26, 'ns'),
-            'latencyL2':  (4.02, -0.01, 0.26, 'ns'),
-            'latencyL3':  (14.4, -0.01, 0.26, 'ns'),
-            'latencyMem': (90.0, -0.01, 0.05, 'ns')
+            'latencyL1': (1.32, -0.01, 0.26, 'ns'),
+            'latencyL2': (4.02, -0.01, 0.26, 'ns'),
+            'latencyL3': (14.4, -0.01, 0.26, 'ns'),
+            'latencyL4': (90.0, -0.01, 0.05, 'ns')
         },
         'tave:compute': {
-            'latencyL1':  (2.86, -0.01, 0.05, 'ns'),
-            'latencyL2':  (12.15, -0.01, 0.05, 'ns'),
-            'latencyL3':  (137, -0.01, 0.05, 'ns'),
-            'latencyMem': (150, -0.05, 0.05, 'ns')
+            'latencyL1': (2.86, -0.01, 0.05, 'ns'),
+            'latencyL2': (12.15, -0.01, 0.05, 'ns'),
+            'latencyL3': (137, -0.01, 0.05, 'ns'),
+            'latencyL4': (150, -0.05, 0.05, 'ns')
+        },
+        'ault:a64fx': {
+            'latencyL1': (2.78, None, 0.05, 'ns'),
+            'latencyL2': (14.3, None, 0.05, 'ns'),
+            'latencyL3': (32.1, None, 0.05, 'ns'),
+            'latencyL4': (146,  None, 0.05, 'ns')
         },
     }
     maintainers = ['SK']
@@ -71,34 +83,7 @@ def set_modules(self):
         if self.current_system.name in {'tave'}:
             self.modules = ['craype-hugepages512M']
 
-    @run_before('compile')
-    def set_flags(self):
-        self.build_system.cxxflags = ['-O3']
-
-    @sanity_function
-    def assert_success(self):
-        return sn.assert_eq(
-            sn.count(sn.findall(r'latency', self.stdout)),
-            self.num_tasks * len(self.executable_opts)
-        )
-
-    def lat_pattern(self, index):
-        return sn.extractsingle(
-            r'latency \(ns\) for input size %s: (?P<bw>\S+) clocks' %
-            self.executable_opts[index], self.stdout, 'bw', float)
-
-    @performance_function('ns')
-    def latencyL1(self):
-        return self.lat_pattern(0)
-
-    @performance_function('ns')
-    def latencyL2(self):
-        return self.lat_pattern(1)
-
-    @performance_function('ns')
-    def latencyL3(self):
-        return self.lat_pattern(2)
-
-    @performance_function('ns')
-    def latencyMem(self):
-        return self.lat_pattern(3)
+    @run_after('init')
+    def set_valid_environs(self):
+        if self.current_system.name in {'ault'}:
+            self.valid_prog_environs = ['PrgEnv-fujitsu']