From 8854e4a5b7dc4b2cfd5dd98b7fe39763418af698 Mon Sep 17 00:00:00 2001 From: Mehrdad Hessar Date: Wed, 15 Mar 2023 09:13:59 -0700 Subject: [PATCH] MLPerfTiny emulation test harness --- .../mlperftiny/api/submitter_implemented.h | 2 +- tests/micro/common/test_mlperftiny.py | 130 +++++++++++++++++- 2 files changed, 127 insertions(+), 5 deletions(-) diff --git a/3rdparty/mlperftiny/api/submitter_implemented.h b/3rdparty/mlperftiny/api/submitter_implemented.h index fc03a733c9cb..4c832aa0931e 100644 --- a/3rdparty/mlperftiny/api/submitter_implemented.h +++ b/3rdparty/mlperftiny/api/submitter_implemented.h @@ -27,7 +27,7 @@ methods from th_libc.h and all testharness methods from th_lib.h are here. /// \detail This API is designed for performance evaluation only. In order to /// gather energy measurments we recommend using the EEMBC test suite. #define EE_MSG_TIMESTAMP "m-lap-us-%lu\r\n" -#define TH_VENDOR_NAME_STRING "unspecified" +#define TH_VENDOR_NAME_STRING "microTVM" // MAX_DB_INPUT_SIZE defined in CMakeList.txt #ifndef TH_MODEL_VERSION diff --git a/tests/micro/common/test_mlperftiny.py b/tests/micro/common/test_mlperftiny.py index daa3cf340551..318d68184379 100644 --- a/tests/micro/common/test_mlperftiny.py +++ b/tests/micro/common/test_mlperftiny.py @@ -15,7 +15,10 @@ # specific language governing permissions and limitations # under the License. import os +import re +import logging from urllib.parse import urlparse +import struct import pytest import tensorflow as tf @@ -37,6 +40,7 @@ create_header_file, mlf_extract_workspace_size_bytes, ) +from tvm.micro.testing.utils import aot_transport_find_message MLPERF_TINY_MODELS = { "kws": { @@ -44,12 +48,14 @@ "index": 1, "url": "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/keyword_spotting/trained_models/kws_ref_model.tflite", "sample": "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/keyword_spotting_int8_6.pyc.npy", + "sample_label": 6, }, "vww": { "name": "Visual Wake Words", "index": 2, "url": "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/visual_wake_words/trained_models/vww_96_int8.tflite", "sample": "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/visual_wake_word_int8_1.npy", + "sample_label": 1, }, # Note: The reason we use quantized model with float32 I/O is # that TVM does not handle the int8 I/O correctly and accuracy @@ -67,9 +73,14 @@ "index": 4, "url": "https://github.com/mlcommons/tiny/raw/bceb91c5ad2e2deb295547d81505721d3a87d578/benchmark/training/image_classification/trained_models/pretrainedResnet_quant.tflite", "sample": "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/data/image_classification_int8_0.npy", + "sample_label": 0, }, } +MLPERFTINY_READY_MSG = "m-ready" +MLPERFTINY_RESULT_MSG = "m-results" +MLPERFTINY_NAME_MSG = "m-name" + def mlperftiny_get_module(model_name: str): model_url = MLPERF_TINY_MODELS[model_name]["url"] @@ -114,11 +125,17 @@ def mlperftiny_get_module(model_name: str): return relay_mod, params, model_info -def get_test_data(model_name: str) -> list: +def get_test_data(model_name: str, project_type: str) -> list: sample_url = MLPERF_TINY_MODELS[model_name]["sample"] url = urlparse(sample_url) sample_path = download_testdata(sample_url, os.path.basename(url.path), module="data") - return [np.load(sample_path)] + sample = np.load(sample_path) + if project_type == "mlperftiny" and model_name != "ad": + sample = sample.astype(np.uint8) + sample_label = None + if "sample_label" in MLPERF_TINY_MODELS[model_name].keys(): + sample_label = MLPERF_TINY_MODELS[model_name]["sample_label"] + return [sample], [sample_label] def predict_ad_labels_aot(session, aot_executor, input_data, runs_per_sample=1): @@ -148,6 +165,91 @@ def predict_ad_labels_aot(session, aot_executor, input_data, runs_per_sample=1): yield np.mean(errors), np.median(slice_runtimes) +def _mlperftiny_get_name(device_transport) -> str: + """Get device name.""" + device_transport.write(b"name%", timeout_sec=5) + name_message = aot_transport_find_message(device_transport, MLPERFTINY_NAME_MSG, timeout_sec=5) + m = re.search(r"\[([A-Za-z0-9_]+)\]", name_message) + return m.group(1) + + +def _mlperftiny_infer(transport, warmup: int, infer: int, timeout: int): + """Send MLPerfTiny infer command.""" + cmd = f"infer {warmup} {infer}%".encode("UTF-8") + transport.write(cmd, timeout_sec=timeout) + + +def _mlperftiny_write_sample(device_transport, data: list, timeout: int): + """Write a sample with MLPerfTiny compatible format.""" + cmd = f"db load {len(data)}%".encode("UTF-8") + logging.debug(f"transport write: {cmd}") + device_transport.write(cmd, timeout) + aot_transport_find_message(device_transport, MLPERFTINY_READY_MSG, timeout_sec=timeout) + for item in data: + if isinstance(item, float): + ba = bytearray(struct.pack(" 2: + raise ValueError(f"Hex value not in HH format: {hex_val}") + hex_array = [hex_val] + + for hex_val in hex_array: + cmd = f"db {hex_val}%".encode("UTF-8") + logging.debug(f"transport write: {cmd}") + device_transport.write(cmd, timeout) + aot_transport_find_message(device_transport, MLPERFTINY_READY_MSG, timeout_sec=timeout) + + +def _mlperftiny_test_dataset(device_transport, dataset, timeout): + """Run test dataset compatible with MLPerfTiny format.""" + num_correct = 0 + total = 0 + samples, labels = dataset + i_counter = 0 + for sample in samples: + label = labels[i_counter] + logging.info(f"Writing Sample {i_counter}") + _mlperftiny_write_sample(device_transport, sample.flatten().tolist(), timeout) + _mlperftiny_infer(device_transport, 1, 0, timeout) + results = aot_transport_find_message( + device_transport, MLPERFTINY_RESULT_MSG, timeout_sec=timeout + ) + + m = re.search(r"m\-results\-\[([A-Za-z0-9_,.]+)\]", results) + results = m.group(1).split(",") + results_val = [float(x) for x in results] + results_val = np.array(results_val) + + if np.argmax(results_val) == label: + num_correct += 1 + total += 1 + i_counter += 1 + return float(num_correct / total) + + +def _mlperftiny_test_dataset_ad(device_transport, dataset, timeout): + """Run test dataset compatible with MLPerfTiny format for AD model.""" + samples, _ = dataset + result_output = np.zeros(samples[0].shape[0]) + + for slice in range(0, 40): + _mlperftiny_write_sample(device_transport, samples[0][slice, :].flatten().tolist(), timeout) + _mlperftiny_infer(device_transport, 1, 0, timeout) + results = aot_transport_find_message( + device_transport, MLPERFTINY_RESULT_MSG, timeout_sec=timeout + ) + m = re.search(r"m\-results\-\[([A-Za-z0-9_,.]+)\]", results) + results = m.group(1).split(",") + results_val = [float(x) for x in results] + result_output[slice] = np.array(results_val) + return np.average(result_output) + + @pytest.mark.parametrize("model_name", ["kws", "vww", "ad", "ic"]) @pytest.mark.parametrize("project_type", ["host_driven", "mlperftiny"]) @tvm.testing.requires_micro @@ -159,7 +261,7 @@ def test_mlperftiny_models(platform, board, workspace_dir, serial_number, model_ Testing MLPerfTiny models using host_driven project. In this case one input sample is used to verify the end to end execution. Accuracy is not checked in this test. - Also, this test builds each model in standalone mode that can be used with EEMBC runner. + Also, this test builds each model in standalone mode that can be used for MLPerfTiny submissions. """ if platform != "zephyr": pytest.skip(reason="Other platforms are not supported yet.") @@ -177,6 +279,7 @@ def test_mlperftiny_models(platform, board, workspace_dir, serial_number, model_ else: predictor = predict_labels_aot + samples, labels = get_test_data(model_name, project_type) if project_type == "host_driven": with create_aot_session( platform, @@ -201,7 +304,7 @@ def test_mlperftiny_models(platform, board, workspace_dir, serial_number, model_ args = { "session": session, "aot_executor": aot_executor, - "input_data": get_test_data(model_name), + "input_data": samples, "runs_per_sample": 10, } predicted_labels, runtimes = zip(*predictor(**args)) @@ -267,6 +370,10 @@ def test_mlperftiny_models(platform, board, workspace_dir, serial_number, model_ for i in range(len(input_shape)): input_total_size *= input_shape[i] + # float input + if model_name == "ad": + input_total_size *= 4 + template_project_path = pathlib.Path(tvm.micro.get_microtvm_template_projects(platform)) project_options.update( { @@ -295,6 +402,21 @@ def test_mlperftiny_models(platform, board, workspace_dir, serial_number, model_ template_project_path, workspace_dir / "project", model_tar_path, project_options ) project.build() + project.flash() + with project.transport() as transport: + aot_transport_find_message(transport, MLPERFTINY_READY_MSG, timeout_sec=200) + print(f"Testing {model_name} on {_mlperftiny_get_name(transport)}.") + assert _mlperftiny_get_name(transport) == "microTVM" + if model_name != "ad": + accuracy = _mlperftiny_test_dataset(transport, [samples, labels], 100) + print(f"Model {model_name} accuracy: {accuracy}") + else: + mean_error = _mlperftiny_test_dataset_ad(transport, [samples, None], 100) + print( + f"""Model {model_name} mean error: {mean_error}. + Note that this is not the final accuracy number. + To calculate that, you need to use sklearn.metrics.roc_auc_score function.""" + ) if __name__ == "__main__":