Skip to content
1 change: 1 addition & 0 deletions loadgen/mlperf.conf
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ deepseek-r1.*.performance_sample_count_override = 4388
deepseek-r1-interactive.*.performance_sample_count_override = 4388
whisper.*.performance_sample_count_override = 1633
qwen3-vl-235b-a22b.*.performance_sample_count_override = 48289
yolo.*.performance_sample_count_override = 5000
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why 5000 when the dataset size is 1513?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it was present in submission checker already, just copied down here. @manpreetssokhi , would it be fine if we bring it down to 1513?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

500 is used here: https://github.com/mlcommons/inference/blob/master/vision/classification_and_detection/yolo/yolo_loadgen.py#L150

The count should be such that the memory needed to load that much dataset should ideally be above a few MBs (> L3 size) but still run on edge systems (not above say 256MB or so)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in yolo_loadgen.py I had tried both 1525 and 500. I believe that the yolo.*.performance_sample_count_override = 5000 comes from retinanet being 5000. I am open to adjusting it to meet the MLC rules. I think it might be best to keep 1525 across:
https://github.com/mlcommons/inference/blob/master/vision/classification_and_detection/yolo/yolo_loadgen.py#L150
and
https://github.com/mlcommons/inference/pull/2446/files/97d502c84c1e574a6ca4f5a1d6bf5f877ffe1ad6..09537da212a780471f563fc63ff9d1f6edea4910#diff-60781b468b10ba9bf59f52a09114c63209a92f299bf957299a055a99900a35c8

what do you both think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

retinanet performance_sample_count is actually 64. If the image size is the same as in retinanet, we can use 64 itself for yolo.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, then in that case we can go with 64 for yolo here. Does this overwrite the min of 500 performance_sample_count in the base implementation I have? I am trying to understand the difference and more broadly the different components in the repo. My intention behind the higher numbers of 500 and 1525 was to make sure we meet the minimum 10 min run.

# set to 0 to let entire sample set to be performance sample
3d-unet.*.performance_sample_count_override = 0

Expand Down
53 changes: 51 additions & 2 deletions tools/submission/generate_final_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,52 @@ def main():
],
]

if args.version == "5.0":
if args.version == "6.0":
filter_scenarios = {
"datacenter": {
"resnet": [],
"bert-99": [],
"bert-99.9": [],
"stable-diffusion-xl": [],
"pointpainting": [],
"dlrm-v3": ["Server", "Offline"],
"3d-unet-99": ["Offline"],
"3d-unet-99.9": ["Offline"],
"llama2-70b-99": ["Server", "Offline", "Interactive"],
"llama2-70b-99.9": ["Server", "Offline", "Interactive"],
"mixtral-8x7b": ["Server", "Offline"],
"rgat": ["Offline"],
"llama3.1-8b": ["Server", "Offline", "Interactive"],
"llama3.1-405b": ["Offline", "Server"],
"deepseek-r1": ["Server", "Offline", "Interactive"],
"whisper": ["Offline"],
"gpt-oss-120b": ["Offline", "Interactive", "Server"],
"qwen3-vl-235b-a22b": ["Server", "Offline", "Interactive"],
"wan-2.2-t2v-a14b": ["Offline", "SingleStream"],
},
"edge": {
"resnet": ["SingleStream", "MultiStream", "Offline"],
"bert-99": ["SingleStream", "Offline"],
"bert-99.9": ["SingleStream", "Offline"],
"dlrm-v2-99": [],
"dlrm-v2-99.9": [],
"3d-unet-99": ["SingleStream", "Offline"],
"3d-unet-99.9": ["SingleStream", "Offline"],
"llama2-70b-99": [],
"llama2-70b-99.9": [],
"llama2-70b-interactive-99": [],
"llama2-70b-interactive-99.9": [],
"llama3.1-405b": [],
"llama3.1-8b-edge": ["SingleStream", "Offline"],
"rgat": [],
"stable-diffusion-xl": ["SingleStream", "Offline"],
"pointpainting": ["SingleStream"],
"whisper": ["Offline"],
"yolo-95": ["SingleStream", "MultiStream", "Offline"],
"yolo-99": ["SingleStream", "MultiStream", "Offline"],
},
}
elif args.version == "5.0":
filter_scenarios = {
"datacenter": {
"resnet": ["Server", "Offline"],
Expand All @@ -185,7 +230,11 @@ def main():
"mixtral-8x7b": ["Server", "Offline"],
"rgat": ["Offline"],
"llama3.1-405b": ["Offline", "Server"],
"pointpainting": []
"rgat": ["Offline"],
"pointpainting": [],
"gpt-oss-120b": ["Offline"],
"qwen3-vl-235b-a22b": ["Server", "Offline"],
"dlrm-v3": ["Server", "Offline"],
},
"edge": {
"resnet": ["SingleStream", "MultiStream", "Offline"],
Expand Down
20 changes: 16 additions & 4 deletions tools/submission/submission_checker/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,23 @@
"wan-2.2-t2v-a14b",
"qwen3-vl-235b-a22b",
"dlrm-v3",
"yolo-95",
"yolo-99",
],
"required-scenarios-datacenter": {
"retinanet": ["Server", "Offline"],
"dlrm-v3": ["Server", "Offline"],
"3d-unet-99": ["Offline"],
"3d-unet-99.9": ["Offline"],
"llama3.1-8b": ["Offline"],
"llama2-70b-99": ["Offline"],
"llama2-70b-99.9": ["Offline"],
"stable-diffusion-xl": ["Server", "Offline"],
"mixtral-8x7b": ["Server", "Offline"],
"llama3.1-405b": ["Offline"],
"rgat": ["Offline"],
"whisper": ["Offline"],
"deepseek-r1": ["Offline"],
"gpt-oss-120b": ["Offline"],
"qwen3-vl-235b-a22b": ["Server", "Offline"],
"dlrm-v3": ["Server", "Offline"],
},
"optional-scenarios-datacenter": {
"llama2-70b-99": ["Interactive", "Server"],
Expand All @@ -51,7 +50,6 @@
},
"required-scenarios-edge": {
"resnet": ["SingleStream", "MultiStream", "Offline"],
"retinanet": ["SingleStream", "MultiStream", "Offline"],
"bert-99": ["SingleStream", "Offline"],
"bert-99.9": ["SingleStream", "Offline"],
"3d-unet-99": ["SingleStream", "Offline"],
Expand All @@ -60,6 +58,8 @@
"stable-diffusion-xl": ["SingleStream", "Offline"],
"pointpainting": ["SingleStream"],
"whisper": ["Offline"],
"yolo-95": ["SingleStream", "MultiStream", "Offline"],
"yolo-99": ["SingleStream", "MultiStream", "Offline"],
},
"optional-scenarios-edge": {},
"required-scenarios-datacenter-edge": {
Expand All @@ -83,6 +83,8 @@
"gpt-oss-120b": ["Offline"],
"qwen3-vl-235b-a22b": ["Offline"],
"dlrm-v3": ["Offline", "Server"],
"yolo-95": ["SingleStream", "MultiStream", "Offline"],
"yolo-99": ["SingleStream", "MultiStream", "Offline"],
},
"optional-scenarios-datacenter-edge": {
"llama2-70b-99": ["Interactive", "Server"],
Expand Down Expand Up @@ -231,6 +233,8 @@
"gpt-oss-120b": 6396,
"qwen3-vl-235b-a22b": 48289,
"dlrm-v3": 34996,
"yolo-95": 5000,
"yolo-99": 5000,
},
"dataset-size": {
"resnet": 50000,
Expand All @@ -256,6 +260,8 @@
"gpt-oss-120b": 6396,
"qwen3-vl-235b-a22b": 48289,
"dlrm-v3": 34996,
"yolo-95": 1525,
"yolo-99": 1525,
},
# model_mapping.json is expected in the root directory of the
# submission folder for open submissions and so the below dictionary is
Expand Down Expand Up @@ -329,6 +335,8 @@
"gpt-oss-120b": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
"qwen3-vl-235b-a22b": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
"dlrm-v3": {"Server": 270336, "Offline": 1},
"yolo-95": {"SingleStream": 1024, "MultiStream": 270336, "Offline": 1},
"yolo-99": {"SingleStream": 1024, "MultiStream": 270336, "Offline": 1},
},
"models_TEST01": [
"resnet",
Expand All @@ -343,6 +351,8 @@
"rgat",
"pointpainting",
"whisper",
"yolo-99",
"yolo-95",
],
"models_TEST04": [
"resnet",
Expand Down Expand Up @@ -1115,6 +1125,8 @@
"deepseek-r1": 4388,
"whisper": 1633,
"pointpainting": 6636,
"yolo-99": 1525,
"yolo-95": 1525,
}

SCENARIO_MAPPING = {
Expand Down
58 changes: 42 additions & 16 deletions vision/classification_and_detection/yolo/yolo_loadgen.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
from ultralytics import YOLO
import mlperf_loadgen as lg
import numpy as np
from pathlib import Path
from datetime import datetime
import time
import struct
import sys
import os
import json
import array
import argparse

"""
YOLOv11 LoadGen MLPerf
"""
import argparse
import array
import json
import os
import sys
import struct
import time
from datetime import datetime
from pathlib import Path
import numpy as np
import mlperf_loadgen as lg
from ultralytics import YOLO


# Standard YOLO (80 classes) to COCO (91 classes) mapping
Expand Down Expand Up @@ -110,6 +111,22 @@ def main():
default=None,
help="Number of samples to run")
parser.add_argument("--output", type=str, help="Directory for MLPerf logs")
parser.add_argument(
"--user_conf",
default="user.conf",
help="user config for user LoadGen settings such as target QPS",
)
parser.add_argument(
"--model-name",
type=str,
required=False,
default="yolo"
)
parser.add_argument(
"--enable-log-trace",
action="store_true",
help="Enable log tracing. This file can become quite large",
)

# mode flags
mode_group = parser.add_mutually_exclusive_group(required=True)
Expand Down Expand Up @@ -152,6 +169,13 @@ def flush_queries(): pass

settings = lg.TestSettings()

# Load user configuration
user_conf = os.path.abspath(args.user_conf)
if not os.path.exists(user_conf):
print("{} not found".format(user_conf))
sys.exit(1)
settings.FromConfig(user_conf, args.model_name, args.scenario)

# scenario configurations
scenario_map = {
"SingleStream": lg.TestScenario.SingleStream,
Expand All @@ -172,24 +196,26 @@ def flush_queries(): pass
settings.mode = lg.TestMode.PerformanceOnly
# NOTE MLPerf requirement: minimum 10 minute run for performance
settings.min_duration_ms = 600000
settings.min_query_count = 100

# NOTE: user configs can override this in submission, this is the reference implementation so purposely left barebones
# settings.target_qps = ...
# ...

# configure logs
log_output_settings = lg.LogOutputSettings()
log_output_settings.outdir = log_path
log_output_settings.copy_summary_to_stdout = True
log_settings = lg.LogSettings()
log_settings.log_output.outdir = log_path
log_settings.log_output.copy_summary_to_stdout = True
log_settings.log_output = log_output_settings
log_settings.enable_trace = args.enable_log_trace

print(f"Starting MLPerf run")
print(f"Scenario: {args.scenario}")
print(f"{'Accuracy' if args.AccuracyOnly else 'Performance'} run")
print(f"Log directory: {log_path}")

try:
lg.StartTestWithLogSettings(sut, qsl, settings, lg.LogSettings())
lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)
print(f"MLPerf run complete - cleaning up")
except Exception as e:
print(f"An error occured during StartTest: {e}")
Expand Down
Loading