From 2195f63777c3d3f050ab676f403d6efc438ce14a Mon Sep 17 00:00:00 2001 From: fgvangessel-umd Date: Fri, 26 Sep 2025 14:57:18 -0400 Subject: [PATCH 1/4] Adding slurm example files for the airfoil problem --- .../problems/airfoil/dataset_slurm_airfoil.py | 161 ++++++++++++++++++ engibench/problems/airfoil/simulation_jobs.py | 61 +++++++ 2 files changed, 222 insertions(+) create mode 100644 engibench/problems/airfoil/dataset_slurm_airfoil.py create mode 100644 engibench/problems/airfoil/simulation_jobs.py diff --git a/engibench/problems/airfoil/dataset_slurm_airfoil.py b/engibench/problems/airfoil/dataset_slurm_airfoil.py new file mode 100644 index 00000000..c05b6acb --- /dev/null +++ b/engibench/problems/airfoil/dataset_slurm_airfoil.py @@ -0,0 +1,161 @@ +from argparse import ArgumentParser +from itertools import product +import os, sys +import shutil +import time +from typing import Any +import matplotlib.pyplot as plt +import numpy as np +from scipy.stats import qmc +from engibench.utils import slurm +from engibench.problems.airfoil.simulation_jobs import simulate_slurm +from datasets import load_dataset +print(f"Python version: {sys.version}") + +def calculate_runtime(group_size, minutes_per_sim=5): + # Calculate runtime based on group size and (rough) estimate of minutes per simulation + total_minutes = group_size * minutes_per_sim + hours = total_minutes // 60 + minutes = total_minutes % 60 + return f"{hours:02d}:{minutes:02d}:00" + +if __name__ == "__main__": + """Dataset Generation, Simulation, and Rendering for Airfoil Problem via SLURM. + + This script generates a dataset for the Airfoil problem using the SLURM API, though it could + be generalized to other problems as well. It includes functions for simulation of designs. + + Command Line Arguments: + -n_designs, --num_designs: How many airfoil designs should we use? + -n_flows, --num_flow_conditions: How many flow conditions should we use per design? + -n_aoas, --num_angles_of_attack: How many angles of attack should we use per design & flow condition pairing? + -group_size, --group_size: How many simulations should we group together on a single cpu? + -n_slurm_array, --num_slurm_array: How many slurm jobs to spawn and submit via slurm arrays? Note this may be limited by the HPC system. + """ + # Fetch command line arguments for render and simulate to know whether to run those functions + parser = ArgumentParser() + parser.add_argument( + "-n_designs", + "--num_designs", + type=int, + default=10, + help="How many airfoil designs should we use?", + ) + parser.add_argument( + "-n_flows", + "--num_flow_conditions", + type=int, + default=1, + help="How many flow conditions (Mach Number and Reynolds Number) should we sample for each design?", + ) + parser.add_argument( + "-n_aoas", + "--num_angles_of_attack", + type=int, + default=1, + help="How many angles of attack should we sample for each design?", + ) + parser.add_argument( + "-group_size", + "--group_size", + type=int, + default=2, + help="How many simulations do you wish to batch within each individual slurm job?", + ) + parser.add_argument( + "-n_slurm_array", + "--num_slurm_array", + type=int, + default=1000, + help="What is the maximum size of the Slurm array (Will vary from HPC system to HPC system)?", + ) + args = parser.parse_args() + + n_designs = args.num_designs + n_flows = args.num_flow_conditions + n_aoas = args.num_angles_of_attack + group_size = args.group_size + n_slurm_array = args.num_slurm_array + + # ============== Problem-specific elements =================== + # The following elements are specific to the problem and should be modified accordingly + + # Define flow parameter and angle of attack ranges + Ma_min, Ma_max = 0.5, 0.9 # Mach number range + Re_min, Re_max = 1.0e6, 2.0e7 # Reynolds number range + aoa_min, aoa_max = 0.0, 20.0 # Angle of attack range + + # Load airfoil designs from HF Database + ds = load_dataset("IDEALLab/airfoil_v0") + designs = ds["train"]["initial_design"]+ds["train"]["optimal_design"]+\ + ds["val"]["initial_design"]+ds["val"]["optimal_design"]+\ + ds["test"]["initial_design"]+ds["test"]["optimal_design"] + + # Use specified number of designs + designs = designs[:n_designs] + + # Generate LHS samples + rng = np.random.default_rng(seed=42) # Optional seed for reproducibility + sampler = qmc.LatinHypercube(d=2, seed=rng) + samples = sampler.random(n=n_designs*n_flows) # n samples needed + + # Scale to your flow domain + bounds = np.array([[Ma_min, Ma_max], [Re_min, Re_max]]) + scaled_samples = qmc.scale(samples, bounds[:, 0], bounds[:, 1]) + mach_values = scaled_samples[:, 0] + reynolds_values = scaled_samples[:, 1] + + # Generate all simulation configurations + config_id = 0 + simulate_configs_designs = [] + for i, design in enumerate(designs): + for j in range(n_flows): + ma = mach_values[i*n_flows + j] + re = reynolds_values[i*n_flows + j] + for k, alpha in enumerate(rng.uniform(low=aoa_min, high=aoa_max, size=n_aoas)): + problem_configuration = {'mach': ma, 'reynolds': re, 'alpha': alpha} + config = {'problem_configuration': problem_configuration, 'configuration_id': config_id} + config["design"] = design["coords"] + simulate_configs_designs.append(config) + config_id += 1 + + # Randomly shuffle all simulation configurations + simulate_configs_designs = np.random.permutation(simulate_configs_designs).tolist() + + print(f"Generated {len(simulate_configs_designs)} configurations for simulation.") + + # Calculate total number of simulation jobs and number of sbatch maps needed + n_simulations = len(simulate_configs_designs) + n_sbatch_maps = np.ceil(n_simulations / (group_size * n_slurm_array)) + + slurm_config = slurm.SlurmConfig( + name="Airfoil_dataset_generation", + runtime=calculate_runtime(group_size, minutes_per_sim=5), + ntasks=1, + cpus_per_task=1, + log_dir="./sim_logs/", + ) + print(calculate_runtime(group_size, minutes_per_sim=5)) + + submitted_jobs = [] + for ibatch in range(int(n_sbatch_maps)): + sim_batch_configs = simulate_configs_designs[ibatch * group_size * n_slurm_array: (ibatch + 1) * group_size * n_slurm_array] + print(len(sim_batch_configs)) + print(f"Submitting batch {ibatch + 1}/{int(n_sbatch_maps)}") + + job_array = slurm.sbatch_map( + f=simulate_slurm, + args=sim_batch_configs, + slurm_args=slurm_config, + group_size=group_size, # Number of jobs to batch in sequence to reduce job array size + work_dir='scratch' + ) + + # Save the job array reference + submitted_jobs.append(job_array) + + # Wait for this job to complete by calling save() + # This will submit a dependent job that waits for the array to finish + print(f"Waiting for batch {ibatch + 1} to complete...") + job_array.save(f"results_{ibatch}.pkl", slurm_args=slurm_config) + print(f"Batch {ibatch + 1} completed!") \ No newline at end of file diff --git a/engibench/problems/airfoil/simulation_jobs.py b/engibench/problems/airfoil/simulation_jobs.py new file mode 100644 index 00000000..430a1a70 --- /dev/null +++ b/engibench/problems/airfoil/simulation_jobs.py @@ -0,0 +1,61 @@ +"""Dataset Generator for the Airfoil problem using the SLURM API.""" + +from argparse import ArgumentParser +from itertools import product +import os, sys +import shutil +import time +from typing import Any +import numpy as np +from engibench.problems.airfoil.v0 import Airfoil +from engibench.utils import slurm + + +def simulate_slurm(problem_configuration: dict, configuration_id: int, design: list) -> dict: + """Takes in the given configuration and designs, then runs the simulation analysis. + + Any arguments should be things that you want to change across the different jobs, and anything + that is the same/static across the runs should just be defined inside this function. + + Args: + problem_configuration (dict): The specific configuration used to setup the problem being passed. + For the airfoil problem this includes Mach number, Reynolds number, and andle of attack. + configuration_id (int): A unique identifier for the job for later debugging or tracking. + design (list): list of lists defining x and y corrdinates of airfoil geometry. + + Returns: + "performance_dict": Dictionary of aerodynamic performance (lift & drag). + "simulate_time": The time taken to run this simulation job. Useful for aggregating + the time taken for dataset generation. + "problem_configuration": Problem configuration parameters + "configuration_id": Identifier for specific simulation configurations + """ + + # Instantiate problem + problem = Airfoil() + + # Set simulation ID + sim_id = configuration_id+1 + + # Create unique simulation directory + problem.reset(seed=sim_id, cleanup=False) + + # Create simulation design (coordinates + angle of attack) + my_design = {"coords": np.array(design), "angle_of_attack": problem_configuration["alpha"]} + + print("Starting `simulate` via SLURM...") + start_time = time.time() + + performance = problem.simulate(my_design, mpicores=1, config=problem_configuration) + performance_dict = {'drag': performance[0], 'lift': performance[1]} + print("Finished `simulate` via SLURM.") + end_time = time.time() + elapsed_time = end_time - start_time + print(f"Elapsed time for `simulate`: {elapsed_time:.2f} seconds") + + return { + "performance_dict": performance_dict, + "simulate_time": elapsed_time, + "problem_configuration": problem_configuration, + "configuration_id": configuration_id, + } \ No newline at end of file From f1c51e50a567b859648b12d16a2fd285facb1aee Mon Sep 17 00:00:00 2001 From: fgvangessel-umd Date: Fri, 26 Sep 2025 15:18:10 -0400 Subject: [PATCH 2/4] Fixes from pre-commit check --- .gitignore | 9 +++++++++ engibench/problems/airfoil/dataset_slurm_airfoil.py | 10 +++++----- engibench/problems/airfoil/simulation_jobs.py | 6 +++--- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index b1b68c0b..d8a9fa94 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,15 @@ __pycache__/ *.py[cod] *$py.class + +# Custom engibench_studies/ +apptainer-cache/ +engibench/problems/airfoil/device_dataset_slurm_airfoil.py +engibench/problems/airfoil/test_imports.py +experiment/ +singularity-cache/ + # C extensions *.so @@ -130,6 +138,7 @@ venv/ ENV/ env.bak/ venv.bak/ +engibench_env/ # Spyder project settings .spyderproject diff --git a/engibench/problems/airfoil/dataset_slurm_airfoil.py b/engibench/problems/airfoil/dataset_slurm_airfoil.py index c05b6acb..f8cfcf0a 100644 --- a/engibench/problems/airfoil/dataset_slurm_airfoil.py +++ b/engibench/problems/airfoil/dataset_slurm_airfoil.py @@ -13,7 +13,7 @@ print(f"Python version: {sys.version}") def calculate_runtime(group_size, minutes_per_sim=5): - # Calculate runtime based on group size and (rough) estimate of minutes per simulation + # Calculate runtime based on group size and (rough) estimate of minutes per simulation total_minutes = group_size * minutes_per_sim hours = total_minutes // 60 minutes = total_minutes % 60 @@ -79,12 +79,12 @@ def calculate_runtime(group_size, minutes_per_sim=5): # ============== Problem-specific elements =================== # The following elements are specific to the problem and should be modified accordingly - + # Define flow parameter and angle of attack ranges Ma_min, Ma_max = 0.5, 0.9 # Mach number range Re_min, Re_max = 1.0e6, 2.0e7 # Reynolds number range aoa_min, aoa_max = 0.0, 20.0 # Angle of attack range - + # Load airfoil designs from HF Database ds = load_dataset("IDEALLab/airfoil_v0") designs = ds["train"]["initial_design"]+ds["train"]["optimal_design"]+\ @@ -123,7 +123,7 @@ def calculate_runtime(group_size, minutes_per_sim=5): simulate_configs_designs = np.random.permutation(simulate_configs_designs).tolist() print(f"Generated {len(simulate_configs_designs)} configurations for simulation.") - + # Calculate total number of simulation jobs and number of sbatch maps needed n_simulations = len(simulate_configs_designs) n_sbatch_maps = np.ceil(n_simulations / (group_size * n_slurm_array)) @@ -158,4 +158,4 @@ def calculate_runtime(group_size, minutes_per_sim=5): # This will submit a dependent job that waits for the array to finish print(f"Waiting for batch {ibatch + 1} to complete...") job_array.save(f"results_{ibatch}.pkl", slurm_args=slurm_config) - print(f"Batch {ibatch + 1} completed!") \ No newline at end of file + print(f"Batch {ibatch + 1} completed!") diff --git a/engibench/problems/airfoil/simulation_jobs.py b/engibench/problems/airfoil/simulation_jobs.py index 430a1a70..fed99b8c 100644 --- a/engibench/problems/airfoil/simulation_jobs.py +++ b/engibench/problems/airfoil/simulation_jobs.py @@ -19,9 +19,9 @@ def simulate_slurm(problem_configuration: dict, configuration_id: int, design: l Args: problem_configuration (dict): The specific configuration used to setup the problem being passed. - For the airfoil problem this includes Mach number, Reynolds number, and andle of attack. + For the airfoil problem this includes Mach number, Reynolds number, and angle of attack. configuration_id (int): A unique identifier for the job for later debugging or tracking. - design (list): list of lists defining x and y corrdinates of airfoil geometry. + design (list): list of lists defining x and y coordinates of airfoil geometry. Returns: "performance_dict": Dictionary of aerodynamic performance (lift & drag). @@ -58,4 +58,4 @@ def simulate_slurm(problem_configuration: dict, configuration_id: int, design: l "simulate_time": elapsed_time, "problem_configuration": problem_configuration, "configuration_id": configuration_id, - } \ No newline at end of file + } From 74d577ea95654b14d50d5a9417bdf0bfb299723d Mon Sep 17 00:00:00 2001 From: fgvangessel-umd Date: Fri, 26 Sep 2025 15:23:22 -0400 Subject: [PATCH 3/4] Ruff check --- .../problems/airfoil/dataset_slurm_airfoil.py | 20 +++++++++---------- engibench/problems/airfoil/simulation_jobs.py | 11 +++------- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/engibench/problems/airfoil/dataset_slurm_airfoil.py b/engibench/problems/airfoil/dataset_slurm_airfoil.py index f8cfcf0a..39f12678 100644 --- a/engibench/problems/airfoil/dataset_slurm_airfoil.py +++ b/engibench/problems/airfoil/dataset_slurm_airfoil.py @@ -1,15 +1,13 @@ from argparse import ArgumentParser -from itertools import product -import os, sys -import shutil -import time -from typing import Any -import matplotlib.pyplot as plt +import sys + +from datasets import load_dataset import numpy as np from scipy.stats import qmc -from engibench.utils import slurm + from engibench.problems.airfoil.simulation_jobs import simulate_slurm -from datasets import load_dataset +from engibench.utils import slurm + print(f"Python version: {sys.version}") def calculate_runtime(group_size, minutes_per_sim=5): @@ -113,8 +111,8 @@ def calculate_runtime(group_size, minutes_per_sim=5): ma = mach_values[i*n_flows + j] re = reynolds_values[i*n_flows + j] for k, alpha in enumerate(rng.uniform(low=aoa_min, high=aoa_max, size=n_aoas)): - problem_configuration = {'mach': ma, 'reynolds': re, 'alpha': alpha} - config = {'problem_configuration': problem_configuration, 'configuration_id': config_id} + problem_configuration = {"mach": ma, "reynolds": re, "alpha": alpha} + config = {"problem_configuration": problem_configuration, "configuration_id": config_id} config["design"] = design["coords"] simulate_configs_designs.append(config) config_id += 1 @@ -148,7 +146,7 @@ def calculate_runtime(group_size, minutes_per_sim=5): args=sim_batch_configs, slurm_args=slurm_config, group_size=group_size, # Number of jobs to batch in sequence to reduce job array size - work_dir='scratch' + work_dir="scratch" ) # Save the job array reference diff --git a/engibench/problems/airfoil/simulation_jobs.py b/engibench/problems/airfoil/simulation_jobs.py index fed99b8c..c01db5ba 100644 --- a/engibench/problems/airfoil/simulation_jobs.py +++ b/engibench/problems/airfoil/simulation_jobs.py @@ -1,14 +1,10 @@ """Dataset Generator for the Airfoil problem using the SLURM API.""" -from argparse import ArgumentParser -from itertools import product -import os, sys -import shutil import time -from typing import Any + import numpy as np + from engibench.problems.airfoil.v0 import Airfoil -from engibench.utils import slurm def simulate_slurm(problem_configuration: dict, configuration_id: int, design: list) -> dict: @@ -30,7 +26,6 @@ def simulate_slurm(problem_configuration: dict, configuration_id: int, design: l "problem_configuration": Problem configuration parameters "configuration_id": Identifier for specific simulation configurations """ - # Instantiate problem problem = Airfoil() @@ -47,7 +42,7 @@ def simulate_slurm(problem_configuration: dict, configuration_id: int, design: l start_time = time.time() performance = problem.simulate(my_design, mpicores=1, config=problem_configuration) - performance_dict = {'drag': performance[0], 'lift': performance[1]} + performance_dict = {"drag": performance[0], "lift": performance[1]} print("Finished `simulate` via SLURM.") end_time = time.time() elapsed_time = end_time - start_time From 3800a386df55300ef68b8facddc3f40fe524f433 Mon Sep 17 00:00:00 2001 From: fgvangessel-umd Date: Fri, 26 Sep 2025 15:51:02 -0400 Subject: [PATCH 4/4] Fixed warnings generated by ruff and mypy --- .gitignore | 1 + .../problems/airfoil/dataset_slurm_airfoil.py | 38 +++++++++++-------- engibench/problems/airfoil/simulation_jobs.py | 2 +- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index d8a9fa94..3a1404f0 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ __pycache__/ # Custom engibench_studies/ +experiment/engibench_studies/ apptainer-cache/ engibench/problems/airfoil/device_dataset_slurm_airfoil.py engibench/problems/airfoil/test_imports.py diff --git a/engibench/problems/airfoil/dataset_slurm_airfoil.py b/engibench/problems/airfoil/dataset_slurm_airfoil.py index 39f12678..2eb05acc 100644 --- a/engibench/problems/airfoil/dataset_slurm_airfoil.py +++ b/engibench/problems/airfoil/dataset_slurm_airfoil.py @@ -1,5 +1,9 @@ +"""Dataset Generation for Airfoil Problem via SLURM. + +This script generates a dataset for the Airfoil problem using the SLURM API +""" + from argparse import ArgumentParser -import sys from datasets import load_dataset import numpy as np @@ -8,15 +12,15 @@ from engibench.problems.airfoil.simulation_jobs import simulate_slurm from engibench.utils import slurm -print(f"Python version: {sys.version}") def calculate_runtime(group_size, minutes_per_sim=5): - # Calculate runtime based on group size and (rough) estimate of minutes per simulation + """Calculate runtime based on group size and (rough) estimate of minutes per simulation.""" total_minutes = group_size * minutes_per_sim hours = total_minutes // 60 minutes = total_minutes % 60 return f"{hours:02d}:{minutes:02d}:00" + if __name__ == "__main__": """Dataset Generation, Simulation, and Rendering for Airfoil Problem via SLURM. @@ -85,9 +89,14 @@ def calculate_runtime(group_size, minutes_per_sim=5): # Load airfoil designs from HF Database ds = load_dataset("IDEALLab/airfoil_v0") - designs = ds["train"]["initial_design"]+ds["train"]["optimal_design"]+\ - ds["val"]["initial_design"]+ds["val"]["optimal_design"]+\ - ds["test"]["initial_design"]+ds["test"]["optimal_design"] + designs = ( + ds["train"]["initial_design"] + + ds["train"]["optimal_design"] + + ds["val"]["initial_design"] + + ds["val"]["optimal_design"] + + ds["test"]["initial_design"] + + ds["test"]["optimal_design"] + ) # Use specified number of designs designs = designs[:n_designs] @@ -95,7 +104,7 @@ def calculate_runtime(group_size, minutes_per_sim=5): # Generate LHS samples rng = np.random.default_rng(seed=42) # Optional seed for reproducibility sampler = qmc.LatinHypercube(d=2, seed=rng) - samples = sampler.random(n=n_designs*n_flows) # n samples needed + samples = sampler.random(n=n_designs * n_flows) # n samples needed # Scale to your flow domain bounds = np.array([[Ma_min, Ma_max], [Re_min, Re_max]]) @@ -108,18 +117,15 @@ def calculate_runtime(group_size, minutes_per_sim=5): simulate_configs_designs = [] for i, design in enumerate(designs): for j in range(n_flows): - ma = mach_values[i*n_flows + j] - re = reynolds_values[i*n_flows + j] - for k, alpha in enumerate(rng.uniform(low=aoa_min, high=aoa_max, size=n_aoas)): + ma = mach_values[i * n_flows + j] + re = reynolds_values[i * n_flows + j] + for alpha in rng.uniform(low=aoa_min, high=aoa_max, size=n_aoas): problem_configuration = {"mach": ma, "reynolds": re, "alpha": alpha} config = {"problem_configuration": problem_configuration, "configuration_id": config_id} config["design"] = design["coords"] simulate_configs_designs.append(config) config_id += 1 - # Randomly shuffle all simulation configurations - simulate_configs_designs = np.random.permutation(simulate_configs_designs).tolist() - print(f"Generated {len(simulate_configs_designs)} configurations for simulation.") # Calculate total number of simulation jobs and number of sbatch maps needed @@ -137,7 +143,9 @@ def calculate_runtime(group_size, minutes_per_sim=5): submitted_jobs = [] for ibatch in range(int(n_sbatch_maps)): - sim_batch_configs = simulate_configs_designs[ibatch * group_size * n_slurm_array: (ibatch + 1) * group_size * n_slurm_array] + sim_batch_configs = simulate_configs_designs[ + ibatch * group_size * n_slurm_array : (ibatch + 1) * group_size * n_slurm_array + ] print(len(sim_batch_configs)) print(f"Submitting batch {ibatch + 1}/{int(n_sbatch_maps)}") @@ -146,7 +154,7 @@ def calculate_runtime(group_size, minutes_per_sim=5): args=sim_batch_configs, slurm_args=slurm_config, group_size=group_size, # Number of jobs to batch in sequence to reduce job array size - work_dir="scratch" + work_dir="scratch", ) # Save the job array reference diff --git a/engibench/problems/airfoil/simulation_jobs.py b/engibench/problems/airfoil/simulation_jobs.py index c01db5ba..265b5410 100644 --- a/engibench/problems/airfoil/simulation_jobs.py +++ b/engibench/problems/airfoil/simulation_jobs.py @@ -30,7 +30,7 @@ def simulate_slurm(problem_configuration: dict, configuration_id: int, design: l problem = Airfoil() # Set simulation ID - sim_id = configuration_id+1 + sim_id = configuration_id + 1 # Create unique simulation directory problem.reset(seed=sim_id, cleanup=False)