diff --git a/tests/reframe/config/settings.py b/tests/reframe/config/settings.py new file mode 100644 index 0000000000..c6ce42f74d --- /dev/null +++ b/tests/reframe/config/settings.py @@ -0,0 +1,86 @@ +site_configuration = { + 'systems': [ + { + 'name': 'example_system', + 'descr': 'This is just an example system', + 'modules_system': 'tmod', + 'hostnames': ['login', 'int'], + 'partitions': [ + { + 'name': 'cpu', + 'scheduler': 'slurm', + 'launcher': 'srun', + 'access': ['-p cpu'], + 'environs': ['builtin'], + 'processor': { + 'num_cpus': 24, + }, + 'descr': 'normal CPU partition' + }, + { + 'name': 'gpu', + 'descr': 'GPU partition', + 'scheduler': 'slurm', + 'access': ['-p gpu'], + 'environs': ['builtin'], + 'max_jobs': 100, + 'launcher': 'srun', + 'processor': { + 'num_cpus': 16, + }, + 'devices': [ + { + 'type': 'gpu', + 'num_devices': 2, + }, + ], + }, + ] + }, + ], + 'environments': [ + { + 'name': 'builtin', + 'cc': 'cc', + 'cxx': '', + 'ftn': '', + }, + ], + 'logging': [ + { + 'level': 'debug', + 'handlers': [ + { + 'type': 'stream', + 'name': 'stdout', + 'level': 'info', + 'format': '%(message)s' + }, + { + 'type': 'file', + 'name': 'reframe.log', + 'level': 'debug', + 'format': '[%(asctime)s] %(levelname)s: %(check_info)s: %(message)s', # noqa: E501 + 'append': False + } + ], + 'handlers_perflog': [ + { + 'type': 'filelog', + 'prefix': '%(check_system)s/%(check_partition)s', + 'level': 'info', + 'format': ( + '%(check_job_completion_time)s|reframe %(version)s|' + '%(check_info)s|jobid=%(check_jobid)s|' + '%(check_perf_var)s=%(check_perf_value)s|' + 'ref=%(check_perf_ref)s ' + '(l=%(check_perf_lower_thres)s, ' + 'u=%(check_perf_upper_thres)s)|' + '%(check_perf_unit)s' + ), + 'append': True + } + ] + } + ], +} diff --git a/tests/reframe/eessi-checks/applications/gromacs.py b/tests/reframe/eessi-checks/applications/gromacs.py new file mode 100644 index 0000000000..7a5734c383 --- /dev/null +++ b/tests/reframe/eessi-checks/applications/gromacs.py @@ -0,0 +1,53 @@ +import re +import reframe as rfm +from reframe.utility import find_modules + +from testlib.applications.gromacs import Gromacs +import eessi_utils.hooks as hooks + +@rfm.required_version('>=3.6.2') +@rfm.simple_test +class Gromacs_EESSI(Gromacs): + '''EESSI Gromacs check. + This test will run GROMACS using all modules with 'GROMACS' in the module environment it can find. + On GPU nodes, it will only run tests if module names also contain 'cuda'. + On CPU nodes, it will only run tests if a module name does NOT contain 'cuda'. + Whether a nodes is CPU/GPU is determined based on if a device named 'gpu' is specified in the ReFrame settings file for the current partition. + Number of tasks, tasks per node and cpus per task are set based on the number of GPUs and number of CPUs specified in the ReFrame config file for the current partition. + ''' + + modules = required # Make sure that our apply_module_info hook sets a value + scale = parameter([ + ('singlenode', 10000, 1), + ('small', 40000, 4), + ('large', 100000, 10)]) + module_info = parameter(find_modules('GROMACS', environ_mapping={r'.*': 'builtin'})) + + @run_after('init') + def apply_module_info(self): + self.s, self.e, self.m = self.module_info + self.valid_systems = [self.s] + self.modules = [self.m] + self.valid_prog_environs = [self.e] + + @run_after('init') + def set_test_scale(self): + scale_variant, self.nsteps, self.num_nodes = self.scale + self.tags.add(scale_variant) + + # Skip testing GPU-based modules on CPU-based nodes + @run_after('setup') + def skip_gpu_test_on_cpu_nodes(self): + hooks.skip_gpu_test_on_cpu_nodes(self) + + # Skip testing CPU-based modules on GPU-based nodes + # (though these would run fine, one is usually not interested in them) + @run_after('setup') + def skip_cpu_test_on_gpu_nodes(self): + hooks.skip_cpu_test_on_gpu_nodes(self) + + # Assign num_tasks, num_tasks_per_node and num_cpus_per_task automatically based on current partition's num_cpus and gpus + @run_after('setup') + def set_num_tasks(self): + hooks.auto_assign_num_tasks_MPI(test = self, num_nodes = self.num_nodes) + diff --git a/tests/reframe/eessi_utils/hooks.py b/tests/reframe/eessi_utils/hooks.py new file mode 100644 index 0000000000..d7296eebbe --- /dev/null +++ b/tests/reframe/eessi_utils/hooks.py @@ -0,0 +1,26 @@ +import reframe as rfm +import eessi_utils.utils as utils + +def skip_cpu_test_on_gpu_nodes(test: rfm.RegressionTest): + '''Skip test if GPUs are present, but no CUDA is required''' + skip = ( utils.is_gpu_present(test) and not utils.is_cuda_required(test) ) + if skip: + print("GPU is present on this partition, skipping CPU-based test") + test.skip_if(True) + +def skip_gpu_test_on_cpu_nodes(test: rfm.RegressionTest): + '''Skip test if CUDA is required, but no GPU is present''' + skip = ( utils.is_cuda_required(test) and not utils.is_gpu_present(test) ) + if skip: + print("Test requires CUDA, but no GPU is present in this partition. Skipping test...") + test.skip_if(True) + +def auto_assign_num_tasks_MPI(test: rfm.RegressionTest, num_nodes: int) -> rfm.RegressionTest: + '''Automatically sets num_tasks, tasks_per_node and cpus_per_task based on the current partitions num_cpus, number of GPUs and test.num_nodes. For GPU tests, one task per GPU is set, and num_cpus_per_task is based on the ratio of CPU cores/GPUs. For CPU tests, one task per CPU is set, and num_cpus_per_task is set to 1. Total task count is determined based on the number of nodes to be used in the test. Behaviour of this function is (usually) sensible for pure MPI tests.''' + if utils.is_cuda_required(test): + test.num_tasks_per_node = utils.get_num_gpus(test) + test.num_cpus_per_task = int(test.current_partition.processor.num_cpus / test.num_tasks_per_node) + else: + test.num_tasks_per_node = test.current_partition.processor.num_cpus + test.num_cpus_per_task = 1 + test.num_tasks = num_nodes * test.num_tasks_per_node diff --git a/tests/reframe/eessi_utils/utils.py b/tests/reframe/eessi_utils/utils.py new file mode 100644 index 0000000000..8b5cd3194b --- /dev/null +++ b/tests/reframe/eessi_utils/utils.py @@ -0,0 +1,35 @@ +import re + +import reframe as rfm + + +gpu_dev_name = 'gpu' + +def _get_gpu_list(test: rfm.RegressionTest): + return [ dev.num_devices for dev in test.current_partition.devices if dev.device_type == gpu_dev_name ] + +def get_num_gpus(test: rfm.RegressionTest) -> int: + '''Returns the number of GPUs for the current partition''' + gpu_list = _get_gpu_list(test) + # If multiple devices are called 'GPU' in the current partition, + # we don't know for which to return the device count... + if(len(gpu_list) != 1): + raise ValueError(f"Multiple different devices exist with the name " + f"'{gpu_dev_name}' for partition '{test.current_partition.name}'. " + f"Cannot determine number of GPUs available for the test. " + f"Please check the definition of partition '{test.current_partition.name}' " + f"in your ReFrame config file.") + + return gpu_list[0] + +def is_gpu_present(test: rfm.RegressionTest) -> bool: + '''Checks if GPUs are present in the current partition''' + return ( len(_get_gpu_list(test)) >= 1 ) + +def is_cuda_required(test: rfm.RegressionTest) -> bool: + '''Checks if CUDA seems to be required by current module''' + requires_cuda = False + for module in test.modules: + if re.search("(?i)cuda", module): + requires_cuda = True + return requires_cuda diff --git a/tests/reframe/testlib/applications/gromacs/__init__.py b/tests/reframe/testlib/applications/gromacs/__init__.py new file mode 100644 index 0000000000..53c37f66f5 --- /dev/null +++ b/tests/reframe/testlib/applications/gromacs/__init__.py @@ -0,0 +1,66 @@ +import os +import reframe as rfm +import reframe.utility.sanity as sn + +# Cannot currently set required version on a library test +#@rfm.required_version('>=3.6.2') +class Gromacs(rfm.RunOnlyRegressionTest, pin_prefix=True): + '''Gromacs benchmark based on Prace Benchmark Suite GROMACS case A. + + Derived tests must specify the variables ``num_tasks``, ``num_tasks_per_node``, ``num_cpus_per_task``, ``nsteps`` and ``modules``. + Note that a sufficiently large ``nsteps`` needs to be defined in order for GROMACS to pass the load balancing phase. + As a rough estimate: 10000 steps would generally be ok for 24 tasks, a 100000 steps for 240 tasks, etc. + ''' + + num_tasks = required + num_tasks_per_node = required + num_cpus_per_task = required + nsteps = variable(int) + + descr = 'GROMACS Prace Benchmark Suite case A' + use_multithreading = False + executable = 'gmx_mpi' + output_file = 'md.log' + energy_reference = -1509290.0 + reference = { + '*': { + 'perf': (None, None, None, 'ns/day') + } + } + maintainers = ['casparvl'] + + @run_before('run') + def set_executable_opts(self): + '''Set the executable opts, with correct nsteps''' + self.executable_opts = ['mdrun', '-s ion_channel.tpr', '-maxh 0.50', + '-resethway', '-noconfout', '-nsteps %s ' % self.nsteps] + + @run_before('run') + def set_omp_num_threads(self): + self.variables = { + 'OMP_NUM_THREADS': f'{self.num_cpus_per_task}', + } + + @run_before('performance') + def set_perf_patterns(self): + '''Set the perf patterns to report''' + self.perf_patterns = { + 'perf': sn.extractsingle(r'Performance:\s+(?P\S+)', + self.output_file, 'perf', float) + } + + @sn.sanity_function + def get_energy(self): + return sn.extractsingle(r'\s+Coul\. recip\.\s+Potential\s+Kinetic En\.\s+Total Energy\s+Conserved En.\n' + r'(\s+\S+){3}\s+(?P\S+)(\s+\S+){1}\n', + self.output_file, 'energy', float, item=-1) + + @run_before('sanity') + def set_sanity_patterns(self): + self.sanity_patterns = sn.all([ + sn.assert_found('Finished mdrun', self.output_file, + msg = "Run seems to not have finished succesfully"), + sn.assert_reference(self.get_energy(), self.energy_reference, -0.001, 0.001, + msg = "Final energy reference not within expected limits") + ]) + diff --git a/tests/reframe/testlib/applications/gromacs/src/ion_channel.tpr b/tests/reframe/testlib/applications/gromacs/src/ion_channel.tpr new file mode 100644 index 0000000000..abcd87e04f Binary files /dev/null and b/tests/reframe/testlib/applications/gromacs/src/ion_channel.tpr differ