Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,16 @@
__pycache__/
*.py[cod]
*$py.class

# Custom
engibench_studies/
experiment/engibench_studies/
apptainer-cache/
engibench/problems/airfoil/device_dataset_slurm_airfoil.py
engibench/problems/airfoil/test_imports.py
experiment/
singularity-cache/


# C extensions
*.so
Expand Down Expand Up @@ -130,6 +139,7 @@ venv/
ENV/
env.bak/
venv.bak/
engibench_env/

# Spyder project settings
.spyderproject
Expand Down
167 changes: 167 additions & 0 deletions engibench/problems/airfoil/dataset_slurm_airfoil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
"""Dataset Generation for Airfoil Problem via SLURM.

This script generates a dataset for the Airfoil problem using the SLURM API
"""

from argparse import ArgumentParser

from datasets import load_dataset
import numpy as np
from scipy.stats import qmc

from engibench.problems.airfoil.simulation_jobs import simulate_slurm
from engibench.utils import slurm


def calculate_runtime(group_size, minutes_per_sim=5):
"""Calculate runtime based on group size and (rough) estimate of minutes per simulation."""
total_minutes = group_size * minutes_per_sim
hours = total_minutes // 60
minutes = total_minutes % 60
return f"{hours:02d}:{minutes:02d}:00"


if __name__ == "__main__":
"""Dataset Generation, Simulation, and Rendering for Airfoil Problem via SLURM.

This script generates a dataset for the Airfoil problem using the SLURM API, though it could
be generalized to other problems as well. It includes functions for simulation of designs.

Command Line Arguments:
-n_designs, --num_designs: How many airfoil designs should we use?
-n_flows, --num_flow_conditions: How many flow conditions should we use per design?
-n_aoas, --num_angles_of_attack: How many angles of attack should we use per design & flow condition pairing?
-group_size, --group_size: How many simulations should we group together on a single cpu?
-n_slurm_array, --num_slurm_array: How many slurm jobs to spawn and submit via slurm arrays? Note this may be limited by the HPC system.
"""
# Fetch command line arguments for render and simulate to know whether to run those functions
parser = ArgumentParser()
parser.add_argument(
"-n_designs",
"--num_designs",
type=int,
default=10,
help="How many airfoil designs should we use?",
)
parser.add_argument(
"-n_flows",
"--num_flow_conditions",
type=int,
default=1,
help="How many flow conditions (Mach Number and Reynolds Number) should we sample for each design?",
)
parser.add_argument(
"-n_aoas",
"--num_angles_of_attack",
type=int,
default=1,
help="How many angles of attack should we sample for each design?",
)
parser.add_argument(
"-group_size",
"--group_size",
type=int,
default=2,
help="How many simulations do you wish to batch within each individual slurm job?",
)
parser.add_argument(
"-n_slurm_array",
"--num_slurm_array",
type=int,
default=1000,
help="What is the maximum size of the Slurm array (Will vary from HPC system to HPC system)?",
)
args = parser.parse_args()

n_designs = args.num_designs
n_flows = args.num_flow_conditions
n_aoas = args.num_angles_of_attack
group_size = args.group_size
n_slurm_array = args.num_slurm_array

# ============== Problem-specific elements ===================
# The following elements are specific to the problem and should be modified accordingly

# Define flow parameter and angle of attack ranges
Ma_min, Ma_max = 0.5, 0.9 # Mach number range
Re_min, Re_max = 1.0e6, 2.0e7 # Reynolds number range
aoa_min, aoa_max = 0.0, 20.0 # Angle of attack range

# Load airfoil designs from HF Database
ds = load_dataset("IDEALLab/airfoil_v0")
designs = (
ds["train"]["initial_design"]
+ ds["train"]["optimal_design"]
+ ds["val"]["initial_design"]
+ ds["val"]["optimal_design"]
+ ds["test"]["initial_design"]
+ ds["test"]["optimal_design"]
)

# Use specified number of designs
designs = designs[:n_designs]

# Generate LHS samples
rng = np.random.default_rng(seed=42) # Optional seed for reproducibility
sampler = qmc.LatinHypercube(d=2, seed=rng)
samples = sampler.random(n=n_designs * n_flows) # n samples needed

# Scale to your flow domain
bounds = np.array([[Ma_min, Ma_max], [Re_min, Re_max]])
scaled_samples = qmc.scale(samples, bounds[:, 0], bounds[:, 1])
mach_values = scaled_samples[:, 0]
reynolds_values = scaled_samples[:, 1]

# Generate all simulation configurations
config_id = 0
simulate_configs_designs = []
for i, design in enumerate(designs):
for j in range(n_flows):
ma = mach_values[i * n_flows + j]
re = reynolds_values[i * n_flows + j]
for alpha in rng.uniform(low=aoa_min, high=aoa_max, size=n_aoas):
problem_configuration = {"mach": ma, "reynolds": re, "alpha": alpha}
config = {"problem_configuration": problem_configuration, "configuration_id": config_id}
config["design"] = design["coords"]
simulate_configs_designs.append(config)
config_id += 1

print(f"Generated {len(simulate_configs_designs)} configurations for simulation.")

# Calculate total number of simulation jobs and number of sbatch maps needed
n_simulations = len(simulate_configs_designs)
n_sbatch_maps = np.ceil(n_simulations / (group_size * n_slurm_array))

slurm_config = slurm.SlurmConfig(
name="Airfoil_dataset_generation",
runtime=calculate_runtime(group_size, minutes_per_sim=5),
ntasks=1,
cpus_per_task=1,
log_dir="./sim_logs/",
)
print(calculate_runtime(group_size, minutes_per_sim=5))

submitted_jobs = []
for ibatch in range(int(n_sbatch_maps)):
sim_batch_configs = simulate_configs_designs[
ibatch * group_size * n_slurm_array : (ibatch + 1) * group_size * n_slurm_array
]
print(len(sim_batch_configs))
print(f"Submitting batch {ibatch + 1}/{int(n_sbatch_maps)}")

job_array = slurm.sbatch_map(
f=simulate_slurm,
args=sim_batch_configs,
slurm_args=slurm_config,
group_size=group_size, # Number of jobs to batch in sequence to reduce job array size
work_dir="scratch",
)

# Save the job array reference
submitted_jobs.append(job_array)

# Wait for this job to complete by calling save()
# This will submit a dependent job that waits for the array to finish
print(f"Waiting for batch {ibatch + 1} to complete...")
job_array.save(f"results_{ibatch}.pkl", slurm_args=slurm_config)
print(f"Batch {ibatch + 1} completed!")
56 changes: 56 additions & 0 deletions engibench/problems/airfoil/simulation_jobs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Dataset Generator for the Airfoil problem using the SLURM API."""

import time

import numpy as np

from engibench.problems.airfoil.v0 import Airfoil


def simulate_slurm(problem_configuration: dict, configuration_id: int, design: list) -> dict:
"""Takes in the given configuration and designs, then runs the simulation analysis.

Any arguments should be things that you want to change across the different jobs, and anything
that is the same/static across the runs should just be defined inside this function.

Args:
problem_configuration (dict): The specific configuration used to setup the problem being passed.
For the airfoil problem this includes Mach number, Reynolds number, and angle of attack.
configuration_id (int): A unique identifier for the job for later debugging or tracking.
design (list): list of lists defining x and y coordinates of airfoil geometry.

Returns:
"performance_dict": Dictionary of aerodynamic performance (lift & drag).
"simulate_time": The time taken to run this simulation job. Useful for aggregating
the time taken for dataset generation.
"problem_configuration": Problem configuration parameters
"configuration_id": Identifier for specific simulation configurations
"""
# Instantiate problem
problem = Airfoil()

# Set simulation ID
sim_id = configuration_id + 1

# Create unique simulation directory
problem.reset(seed=sim_id, cleanup=False)

# Create simulation design (coordinates + angle of attack)
my_design = {"coords": np.array(design), "angle_of_attack": problem_configuration["alpha"]}

print("Starting `simulate` via SLURM...")
start_time = time.time()

performance = problem.simulate(my_design, mpicores=1, config=problem_configuration)
performance_dict = {"drag": performance[0], "lift": performance[1]}
print("Finished `simulate` via SLURM.")
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Elapsed time for `simulate`: {elapsed_time:.2f} seconds")

return {
"performance_dict": performance_dict,
"simulate_time": elapsed_time,
"problem_configuration": problem_configuration,
"configuration_id": configuration_id,
}
Loading