Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
26ad226
variables outsourced to config.py; condorJobs_sim runs until actual e…
Victor-Schwan May 16, 2024
9880448
switch from str paths to Path objects
Victor-Schwan May 21, 2024
06033d0
minor fix
Victor-Schwan May 21, 2024
ecb75d1
reco: switched to pathlib, more config vars added; conffig: environme…
Victor-Schwan May 21, 2024
389170d
time stuff removed; wait slowed down the execution significantly, may…
Victor-Schwan May 21, 2024
fee1771
job number calc in verbose mode fixed
Victor-Schwan May 21, 2024
ddc294b
verbose mode for debugging of slow loop removed
Victor-Schwan May 21, 2024
5d8fa22
minor refactoring Condor/condorJobs_sim.py
Victor-Schwan May 27, 2024
c293833
minor
Victor-Schwan May 30, 2024
51dbf54
added afs paths to config
Victor-Schwan May 30, 2024
7548679
mostly chhanges to paths
Victor-Schwan May 31, 2024
e41ee39
condorJobs_sim: argparse to choose config file,
Victor-Schwan May 31, 2024
18bb9cd
condorJobs_sim: improved argparsing: .py suffix can be omitted
Victor-Schwan May 31, 2024
46bfe63
parse_arg and load_config outsourced to utils; easily reusable in e.g…
Victor-Schwan May 31, 2024
acebf1c
JOB_FLAVOR outsourced to config
Victor-Schwan May 31, 2024
0600e0d
config gun args actually incorporated in ddsim command;
Victor-Schwan Jun 3, 2024
7975c8b
reco: adde cfile from
Victor-Schwan Jun 3, 2024
7c91e95
renamed config template
Victor-Schwan Jun 3, 2024
6a8a238
minor addendum config template
Victor-Schwan Jun 3, 2024
915b9b6
double definitions of vars removed:
Victor-Schwan Jun 5, 2024
dab3fa7
number of events specified in config is ceiled instead of floored (ro…
Victor-Schwan Jun 5, 2024
2d9db28
Alignment of names in and
Victor-Schwan Jun 7, 2024
7ece7d1
refac, alignment of naming and structure in sim and reco
Victor-Schwan Jun 7, 2024
138a7b5
fix path for xrootd
Victor-Schwan Jun 11, 2024
298dcfa
minor fix; refacs: mainly renaming vars
Victor-Schwan Jun 11, 2024
5e3cef0
sim: output file rm on afs; minor fixes; make Plotting/analysis_track…
Victor-Schwan Jun 11, 2024
8c2a8e4
changes to reco requested by Gaelle
Victor-Schwan Jun 11, 2024
bcd52b3
ResVDX_UV_ removed from reco as requested by Gaelle
Victor-Schwan Jun 11, 2024
b5a93fd
Revert Plotting/analysis_tracking.py to state before Victor's PR
Victor-Schwan Jun 12, 2024
6fa0479
reco: change aida file suffix to .aida.root
Victor-Schwan Jun 12, 2024
237bd06
minor: typehint main functions
Victor-Schwan Jun 19, 2024
4163c0b
config option EDM4HEP_SUFFIX_WITH_UNDERSCORE added and path building …
Victor-Schwan Jun 19, 2024
bdd3deb
minor fixup
Victor-Schwan Jun 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
**/__pycache__/
385 changes: 222 additions & 163 deletions TrackingPerformance/Condor/condorJobs_reco.py
Original file line number Diff line number Diff line change
@@ -1,167 +1,226 @@
#!/usr/bin/env python

import os
import sys
import ROOT
import argparse
import subprocess

# ==========================
# Parameters Initialisation
# ==========================
# Define lists of parameters for reconstruction
thetaList_ = ["10", "20", "30", "40", "50", "60", "70", "80", "89"]
#thetaList_ = ["70", "80", "89"]
momentumList_ = ["1", "2", "5", "10", "20", "50", "100", "200"]
#momentumList_ = ["1", "10", "100"]
particleList_ = ["mu"]#,"e" ,"pi"]
#ResVDX_UV_ = ['0.001']

DetectorModelList_ = ["CLD_o3_v01"] # FCCee_o1_v04 CLD_o2_v05 CLD_o3_v01
Nevts_ = "10000"

Nevt_per_job = "1000" # Set the desired number of events per job
N_jobs = int(int(Nevts_) / int(Nevt_per_job)) * len(particleList_) * len(thetaList_) * len(momentumList_)
total_events = int(Nevts_)
num_jobs = total_events // int(Nevt_per_job)

# ===========================
# Directory Setup and Checks
# ===========================
# Define directories for input and output
directory_jobs = f"CondorJobs/Rec_{particleList_[0]}_{DetectorModelList_[0]}"
#setup = "/cvmfs/sw-nightlies.hsf.org/key4hep/setup.sh" # nightlies
setup = "/cvmfs/sw.hsf.org/key4hep/setup.sh" # stable
#InputDirectory = f"/eos/user/g/gasadows/Output/TrackingPerformance/{DetectorModelList_[0]}/SIM/3T/"
InputDirectory = f"/eos/experiment/fcc/users/g/gasadows/TrackingPerformance/{DetectorModelList_[0]}/SIM/3T/"
EosDir = f"/eos/user/g/gasadows/Output/TrackingPerformance/{DetectorModelList_[0]}/REC/3T/"

#steering_file = "CLDReconstruction.py"
steering_file = "/afs/cern.ch/user/g/gasadows/CLDConfig/CLDConfig/CLDReconstruction_3T.py"

# Enable output checks
check_output = True # Set to True to enable checks, False to disable
# It will check if the ouputs exist and contain correct number of events
# if not it will send job to rerun reconstruction

JobFlavour = "tomorrow"
# Job flavours:
# espresso = 20 minutes
# microcentury = 1 hour
# longlunch = 2 hours
# workday = 8 hours
# tomorrow = 1 day
# testmatch = 3 days
# nextweek = 1 week

# Set default value if ResVDX_UV_ is not defined or empty
try:
if not ResVDX_UV_:
ResVDX_UV_ = ['0.003']
except NameError:
ResVDX_UV_ = ['0.003']

# Check if the directory exists and exit if it does
if os.path.exists(directory_jobs):
print(f"Error: Directory '{directory_jobs}' already exists and should not be overwritten.")
sys.exit(1)

# Create output directories if they don't exist
[os.makedirs(directory, exist_ok=True) for directory in [EosDir, directory_jobs]]

# =======================
# Simulation Job Creation
# =======================
# Create all possible combinations
import itertools
list_of_combined_variables = itertools.product(thetaList_, momentumList_, particleList_, DetectorModelList_)

need_to_create_scripts = False

for theta, momentum, part, dect in list_of_combined_variables:
for task_index in range(num_jobs):

outputFileName = f"REC_{dect}"
outputFileName+= f"_{part}"
outputFileName+= f"_{theta}_deg"
outputFileName+= f"_{momentum}_GeV"
outputFileName+= f"_{Nevt_per_job}_evts"
outputFileName+= f"_{task_index}"

inputFile= os.path.join(InputDirectory + f"/{part}", f"SIM_{dect}_{part}_{theta}_deg_{momentum}_GeV_{Nevt_per_job}_evts_{task_index}_edm4hep.root")
#inputFile= os.path.join(InputDirectory + f"/{part}", f"SIM_{dect}_{part}_{theta}_deg_{momentum}_GeV_{Nevt_per_job}_evts_edm4hep.root")
#input_file= os.path.join(InputDirectory, "SIMTest_" + dect + "_" + part + "_" + theta + "_deg_" + momentum + "_GeV_" + Nevts_ + "_evts.slcio")

# Check if the input file exists
if not os.path.exists(inputFile):
print(f"Error: Input file {inputFile} does not exist. Skipping job.")
continue
# Check if the output file already exists and has correct Nb of events
output_dir = os.path.join(EosDir, part); os.makedirs(output_dir, exist_ok=True)
output_file = output_dir +"/"+ outputFileName + "_edm4hep.root"
if check_output and os.path.exists(output_file):
root_file = ROOT.TFile(output_file, "READ")
events_tree = root_file.Get("events")
if events_tree and events_tree.GetEntries() == int(Nevt_per_job):
root_file.Close()
continue
root_file.Close()
need_to_create_scripts = True

# Create aida output Dir
output_dir_aida = os.path.join(output_dir, "aida_outputs"); os.makedirs(output_dir_aida, exist_ok=True)

arguments = (
#f" --GeoSvc.detectors=/afs/cern.ch/work/g/gasadows/k4geo/FCCee/CLD/compact/{DetectorModelList_[0]}_3T/{DetectorModelList_[0]}.xml"+
f" --GeoSvc.detectors=$K4GEO/FCCee/CLD/compact/{DetectorModelList_[0]}/{DetectorModelList_[0]}.xml"+
" --inputFiles " + inputFile + " --outputBasename " + outputFileName+
f" --VXDDigitiserResUV={ResVDX_UV_[0]}" +
" --trackingOnly" +
" -n " + Nevt_per_job
from math import ceil
from os import fspath, system # for execution at the end
from pathlib import Path

import ROOT
from utils import load_config, parse_args


def main() -> None:

# ==========================
# Load specified config file
# ==========================

args = parse_args()
config = load_config(args.config)

# ==========================
# Check paths
# ==========================

assert (
config.rec_steering_file.exists()
), f"The file {config.rec_steering_file} does not exist"
assert (
config.detector_dir.exists()
), f"The folder {config.detector_dir} does not exist"

# ==========================
# Parameters Initialisation
# ==========================

n_para_sets = (
len(config.detector_model_list)
* len(config.particle_list)
* len(config.theta_list)
* len(config.momentum_list)
)
# number of parallel jobs with same parameter combination/set
n_jobs_per_para_set = ceil(
config.N_EVTS / config.N_EVTS_PER_JOB
) # Nevts is lower limit
# total number of jobs, can be printed for debugging/information
n_jobs = n_jobs_per_para_set * n_para_sets

# ===========================
# Directory Setup and Checks
# ===========================

# Define directories for input and output
directory_jobs = (
config.rec_condor_dir
/ f"{config.particle_list[0]}_{config.detector_model_list[0]}"
)
sim_eos_dir = config.data_dir / f"{config.detector_model_list[0]}" / "SIM" # input
rec_eos_dir = config.data_dir / f"{config.detector_model_list[0]}" / "REC" # output

# Enable output checks
CHECK_OUTPUT = True # Set to True to enable checks, False to disable
# It will check if the ouputs exist and contain correct number of events
# if not it will send job to rerun reconstruction

# Check if the directory exists and exit if it does
if directory_jobs.exists():
print(
f"Error: Directory '{directory_jobs}' already exists and should not be overwritten."
)
command = f"k4run {steering_file} " + arguments + " > /dev/null"

# Write bash script for job execution
bash_script = (
"#!/bin/bash \n"
f"source {setup} \n"
"git clone https://github.com/gaswk/CLDConfig.git \n"
"cd " + "CLDConfig/CLDConfig" + "\n"
f"{command} \n"
f"xrdcp {outputFileName}_edm4hep.root root://eosuser.cern.ch/{output_dir} \n"
f"xrdcp {outputFileName}_aida.root root://eosuser.cern.ch/{output_dir_aida} \n"
)
bash_file = directory_jobs + f"/bash_script_{dect}_{part}_{momentum}_{theta}_{task_index}.sh"
with open(bash_file, "w") as file:
file.write(bash_script)
file.close()

if not need_to_create_scripts:
print("All output files are correct.")
sys.exit(0)

# ============================
# Condor Submission Script
# ============================
# Write the condor submission script
condor_script = (
"executable = $(filename) \n"
"arguments = $(ClusterId) $(ProcId) \n"
"output = output.$(ClusterId).$(ProcId).out \n"
"error = error.$(ClusterId).$(ProcId).err \n"
"log = log.$(ClusterId).log \n"
f"+JobFlavour = \"{JobFlavour}\" \n"
"queue filename matching files *.sh \n"
)
condor_file = directory_jobs + "/condor_script.sub"
with open(condor_file, "w") as file2:
file2.write(condor_script)
file2.close()

# ====================
# Submit Job to Condor
# ====================
os.system("cd "+ directory_jobs + "; condor_submit condor_script.sub")



sys.exit(1)

# Create output directories if they don't exist
rec_eos_dir.mkdir(parents=True, exist_ok=True)
directory_jobs.mkdir(parents=True, exist_ok=True)

# =======================
# Reconstruction Job Creation
# =======================

# Create all possible combinations
import itertools

iter_of_combined_variables = itertools.product(
config.theta_list,
config.momentum_list,
config.particle_list,
config.detector_model_list,
)

NEED_TO_CREATE_SCRIPTS = False

for theta, momentum, part, dect in iter_of_combined_variables:
for task_index in range(n_jobs_per_para_set):

output_file_name_parts = [
f"REC_{dect}",
f"{part}",
f"{theta}_deg",
f"{momentum}_GeV",
f"{config.N_EVTS_PER_JOB}_evts",
f"{task_index}",
]
output_file_name = "_".join(output_file_name_parts)

input_file_name_parts = [
f"SIM_{dect}",
f"{part}",
f"{theta}_deg",
f"{momentum}_GeV",
f"{config.N_EVTS_PER_JOB}_evts",
f"{task_index}",
]
if config.EDM4HEP_SUFFIX_WITH_UNDERSCORE:
input_file_name_parts.append("edm4hep")
input_file_path = Path("_".join(input_file_name_parts)).with_suffix(
".root"
)
else:
input_file_path = Path("_".join(input_file_name_parts)).with_suffix(
".edm4hep.root"
)
input_file = sim_eos_dir / part / input_file_path

Comment thread
Victor-Schwan marked this conversation as resolved.
# Check if the input file exists
if not input_file.exists():
print(f"Error: Input file {input_file} does not exist. Skipping job.")
continue
# Check if the output file already exists and has correct Nb of events
output_dir = rec_eos_dir / part
output_dir.mkdir(parents=True, exist_ok=True)
if config.EDM4HEP_SUFFIX_WITH_UNDERSCORE:
output_file = (
output_dir / (output_file_name + "_edm4hep")
).with_suffix(".root")
else:
output_file = (output_dir / output_file_name).with_suffix(
".edm4hep.root"
)

# FIXME: Issue #4
if CHECK_OUTPUT and output_file.exists():
root_file = ROOT.TFile(fspath(output_file), "READ")
events_tree = root_file.Get("events")
if events_tree and events_tree.GetEntries() == config.N_EVTS_PER_JOB:
root_file.Close()
continue
root_file.Close()
NEED_TO_CREATE_SCRIPTS = True

# Create aida output Dir
output_dir_aida = output_dir / "aida_outputs"
output_dir_aida.mkdir(exist_ok=True)

arguments = (
f" --GeoSvc.detectors=$K4GEO/FCCee/CLD/compact/{config.detector_model_list[0]}/{config.detector_model_list[0]}.xml"
+ " --inputFiles "
+ fspath(input_file)
+ " --outputBasename "
+ fspath(output_file_name)
+ " --trackingOnly"
+ " -n "
+ str(config.N_EVTS_PER_JOB)
)
command = f"k4run {config.rec_steering_file} " + arguments + " > /dev/null"

# Write bash script for job execution
bash_script = (
"#!/bin/bash \n"
f"source {config.setup} \n"
"git clone https://github.com/key4hep/CLDConfig.git \n" # FIXME: see issues
"cd "
+ "CLDConfig/CLDConfig" # FIXME: CLD should not be hardcoded
+ "\n"
f"{command} \n"
f"xrdcp {output_file_name}{'_' if config.EDM4HEP_SUFFIX_WITH_UNDERSCORE else '.'}edm4hep.root root://eosuser.cern.ch/{output_dir} \n"
f"xrdcp {output_file_name}{'_' if config.EDM4HEP_SUFFIX_WITH_UNDERSCORE else '.'}aida.root root://eosuser.cern.ch/{output_dir_aida} \n"
)
bash_file_name_parts = [
"bash_script",
dect,
part,
f"{theta}_deg",
f"{momentum}_GeV",
str(task_index),
]
bash_file_path = (
directory_jobs / "_".join(bash_file_name_parts)
).with_suffix(".sh")

with open(bash_file_path, "w", encoding="utf-8") as bash_file:
bash_file.write(bash_script)
bash_file.close()

if not NEED_TO_CREATE_SCRIPTS:
print("All output files are correct.")
sys.exit(0)

# ============================
# Condor Submission Script
# ============================
# Write the condor submission script
condor_script = (
"executable = $(filename) \n"
"arguments = $(ClusterId) $(ProcId) \n"
"output = output.$(ClusterId).$(ProcId).out \n"
"error = error.$(ClusterId).$(ProcId).err \n"
"log = log.$(ClusterId).log \n"
f'+JobFlavour = "{config.JOB_FLAVOR}" \n'
"queue filename matching files *.sh \n"
)
condor_file_path = directory_jobs / "condor_script.sub"
with open(condor_file_path, "w", encoding="utf-8") as condor_file:
condor_file.write(condor_script)
condor_file.close()

# ====================
# Submit Job to Condor
# ====================
system(
"cd " + fspath(directory_jobs) + "; condor_submit condor_script.sub"
) # FIXME: use subprocess instead?


if __name__ == "__main__":
main()
Loading