Skip to content

Commit b7dd88f

Browse files
committed
metrics tracking with prometheus, addressing issue #7
1 parent 1cc2f41 commit b7dd88f

File tree

12 files changed

+619
-235
lines changed

12 files changed

+619
-235
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ __pycache__
44
.coverage
55
htmlcov
66
*egg-info
7+
*venv*

batchtools/batchtools.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from .bq import GpuQueuesCommand
1111
from .br import CreateJobCommand
1212
from .bps import ListPodsCommand
13-
from .helpers import is_logged_in
13+
from .helpers import is_logged_in, is_on_project
1414

1515

1616
class BatchTools:
@@ -61,6 +61,10 @@ def main() -> None:
6161
sys.exit(
6262
"You are not logged in to the oc cli. Retrieve the token using 'oc login --web' or retrieving the login token from the openshift UI."
6363
)
64+
if not is_on_project():
65+
sys.exit(
66+
"You are not on a project. For metric tracking, you must be on a project. Use oc project <project-name> to get on a project."
67+
)
6468

6569
app = BatchTools()
6670
app.run()

batchtools/bd.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,26 @@
11
import sys
22
import argparse
33
from typing import cast
4-
54
import openshift_client as oc
6-
75
from .basecommand import Command, override
86
from .basecommand import SubParserFactory
9-
from .helpers import oc_delete
7+
from .helpers import oc_delete, is_kueue_managed_job
108

119

1210
class DeleteJobsCommand(Command):
1311
"""
1412
batchtools bd [job-name [job-name ...]]
1513
16-
Delete specified Jobs, or all Jobs if none are specified.
14+
Delete specified Kueue-managed GPU jobs, or all such jobs if none are specified.
15+
16+
Description:
17+
Deletes only those Jobs that are both:
18+
- named like your GPU jobs (name starts with 'job-'), and
19+
- detected as Kueue-managed (via labels/Workload linkage).
1720
"""
1821

1922
name: str = "bd"
20-
help: str = "Delete specified Jobs, or all if none are specified"
23+
help: str = "Delete specified Kueue-managed GPU jobs, or all if none are specified"
2124

2225
@classmethod
2326
@override
@@ -26,34 +29,39 @@ def build_parser(cls, subparsers: SubParserFactory):
2629
p.add_argument(
2730
"job_names",
2831
nargs="*",
29-
help="Optional list of job names to delete",
32+
help="Optional list of job names to delete (must be Kueue-managed)",
3033
)
3134
return p
3235

3336
@staticmethod
3437
@override
3538
def run(args: argparse.Namespace):
3639
args = cast(DeleteJobsCommand, args)
37-
3840
try:
3941
jobs = oc.selector("jobs").objects()
4042
if not jobs:
4143
print("No jobs found.")
4244
return
4345

46+
# only want to delete kueue jobs so filter for kueue jobs
47+
kueue_gpu_jobs = [job for job in jobs if is_kueue_managed_job(job)]
48+
49+
if not kueue_gpu_jobs:
50+
print("No Kueue-managed GPU jobs to delete.")
51+
return
52+
4453
if args.job_names:
45-
# delete only specified jobs
46-
existing = {job.model.metadata.name for job in jobs}
54+
# if jobs are specified, only delete specified jobs
55+
allowed = {job.model.metadata.name for job in kueue_gpu_jobs}
4756
for name in args.job_names:
48-
if name not in existing:
49-
print(f"{name} does not exist; skipping.")
57+
if name not in allowed:
58+
print(f"{name} is not a Kueue-managed GPU job; skipping.")
5059
continue
5160
oc_delete("job", name)
5261
print(f"Deleted job: {name}")
5362
else:
54-
# delete all jobs
55-
print("No job names provided -> deleting ALL jobs:\n")
56-
for job in jobs:
63+
print("No job names provided -> deleting all Kueue-managed GPU jobs:\n")
64+
for job in kueue_gpu_jobs:
5765
name = job.model.metadata.name
5866
oc_delete("job", name)
5967
print(f"Deleted job: {name}")

batchtools/bj.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import sys
55
import openshift_client as oc
66

7+
from .helpers import is_kueue_managed_job
78
from .basecommand import Command
89

910

@@ -35,8 +36,12 @@ def run(args: argparse.Namespace):
3536
print("No jobs found.")
3637
return
3738

38-
print(f"Found {len(jobs)} job(s):\n")
39-
for job in jobs:
39+
# filter only Kueue-managed jobs
40+
managed = [job for job in jobs if is_kueue_managed_job(job)]
41+
42+
print(f"Found {len(managed)} job(s):\n")
43+
44+
for job in managed:
4045
print(f"- {job.model.metadata.name}")
4146

4247
except oc.OpenShiftPythonException as e:

0 commit comments

Comments
 (0)