diff --git a/python/tvm/meta_schedule/testing/relay_workload.py b/python/tvm/meta_schedule/testing/relay_workload.py index 98bb99512020..9dcff2ace583 100644 --- a/python/tvm/meta_schedule/testing/relay_workload.py +++ b/python/tvm/meta_schedule/testing/relay_workload.py @@ -61,23 +61,23 @@ def _get_network( assert layout is None or layout in ["NCHW", "NHWC"] if name in ["resnet_18", "resnet_50"]: - model = getattr(models, name.replace("_", ""))(pretrained=False) + model = getattr(models, name.replace("_", ""))(weights=None) elif name == "wide_resnet_50": - model = getattr(models, "wide_resnet50_2")(pretrained=False) + model = getattr(models, "wide_resnet50_2")(weights=None) elif name == "resnext_50": - model = getattr(models, "resnext50_32x4d")(pretrained=False) + model = getattr(models, "resnext50_32x4d")(weights=None) elif name == "mobilenet_v2": - model = getattr(models, name)(pretrained=False) + model = getattr(models, name)(weights=None) elif name == "mobilenet_v3": - model = getattr(models, name + "_large")(pretrained=False) + model = getattr(models, name + "_large")(weights=None) elif name == "inception_v3": - model = getattr(models, name)(pretrained=False, aux_logits=False) + model = getattr(models, name)(weights=None, aux_logits=False) elif name == "densenet_121": - model = getattr(models, name.replace("_", ""))(pretrained=False) + model = getattr(models, name.replace("_", ""))(weights=None) elif name == "resnet3d_18": - model = models.video.r3d_18(pretrained=False) + model = models.video.r3d_18(weights=None) elif name == "vgg_16": - model = getattr(models, name.replace("_", ""))(pretrained=False) + model = getattr(models, name.replace("_", ""))(weights=None) dtype = "float32" input_data = torch.randn(input_shape).type( # pylint: disable=no-member diff --git a/python/tvm/meta_schedule/utils.py b/python/tvm/meta_schedule/utils.py index 26bf20670955..7b7c4a68653d 100644 --- a/python/tvm/meta_schedule/utils.py +++ b/python/tvm/meta_schedule/utils.py @@ -371,11 +371,27 @@ def make_logging_func(logger: logging.Logger) -> Optional[Callable]: } def logging_func(level: int, msg: str): - level2log[level](msg) + def clear_notebook_output(): + from IPython.display import clear_output # type: ignore # pylint: disable=import-outside-toplevel + + clear_output(wait=True) + + if level < 0: + clear_notebook_output() + else: + level2log[level](msg) return logging_func +@register_func("meta_schedule.using_ipython") +def _check_ipython_env(): + try: + return get_ipython().__class__.__name__ == "ZMQInteractiveShell" # type: ignore + except NameError: + return False + + def parameterize_config(config: Dict[str, Any], params: Dict[str, str]) -> Dict[str, Any]: """Parameterize the given configuration. diff --git a/src/meta_schedule/task_scheduler/gradient_based.cc b/src/meta_schedule/task_scheduler/gradient_based.cc index 73d191f593fe..506bb620e1d8 100644 --- a/src/meta_schedule/task_scheduler/gradient_based.cc +++ b/src/meta_schedule/task_scheduler/gradient_based.cc @@ -61,22 +61,43 @@ class GradientBasedNode final : public TaskSchedulerNode { int total_trials = 0; double total_latency = 0.0; support::TablePrinter p; - p.Row() << "ID" - << "Name" - << "FLOP" - << "Weight" - << "Speed (GFLOPS)" - << "Latency (us)" - << "Weighted Latency (us)" - << "Trials" - << "Terminated"; + + if (using_ipython()) { + p.Row() << "ID" + << "Name" + << "FLOP" + << "Weight" + << "GFLOPS" + << "Latency (us)" + << "Wtd. Latency" + << "Trials" + << "Terminated"; + } else { + p.Row() << "ID" + << "Name" + << "FLOP" + << "Weight" + << "Speed (GFLOPS)" + << "Latency (us)" + << "Weighted Latency (us)" + << "Trials" + << "Terminated"; + } + p.Separator(); + for (int i = 0; i < n_tasks; ++i) { const TaskRecord& record = task_records_[i]; auto row = p.Row(); int trials = record.trials; + String task_name = record.task->task_name.value(); + if (using_ipython() && task_name.length() > 23) { + std::string temp = task_name.c_str(); + temp = temp.substr(0, 20) + "..."; + task_name = String(temp); + } row << /*id=*/i // - << /*name=*/record.task->task_name.value() // + << /*name=*/task_name // << /*flops=*/static_cast(record.flop) // << /*weight=*/static_cast(record.weight); double latency = 1e9; @@ -101,9 +122,10 @@ class GradientBasedNode final : public TaskSchedulerNode { } } p.Separator(); - os << p.AsStr() // - << "\nTotal trials: " << total_trials // - << "\nTotal latency (us): " << total_latency // + os << p.AsStr() // + << "\nProgress: " << total_trials / (max_trials * 0.01) << "%" // + << "\nTotal Trials: " << total_trials << " / " << max_trials // + << "\nTotal latency (us): " << total_latency // << "\n"; return os.str(); } @@ -112,6 +134,7 @@ class GradientBasedNode final : public TaskSchedulerNode { int n_tasks = task_records_.size(); // Round robin if (num_rounds_already_ == 0) { + TVM_PY_LOG_CLEAR_SCREEN(this->logging_func); TVM_PY_LOG(INFO, this->logging_func) << "\n" << this->TuningStatistics(); } if (num_rounds_already_ < n_tasks) { @@ -178,6 +201,7 @@ class GradientBasedNode final : public TaskSchedulerNode { } record.best_time_cost_history.push_back(best_time_cost); record.trials += results.size(); + TVM_PY_LOG_CLEAR_SCREEN(this->logging_func); TVM_PY_LOG(INFO, this->logging_func) << "[Updated] Task #" << task_id << ": " << record.task->task_name << "\n" << this->TuningStatistics(); diff --git a/src/meta_schedule/utils.h b/src/meta_schedule/utils.h index cf9a32917031..f0b736081670 100644 --- a/src/meta_schedule/utils.h +++ b/src/meta_schedule/utils.h @@ -59,6 +59,7 @@ ::tvm::meta_schedule::PyLogMessage(__FILE__, __LINE__, logging_func, \ PyLogMessage::Level::logging_level) \ .stream() +#define TVM_PY_LOG_CLEAR_SCREEN(logging_func) clear_logging(__FILE__, __LINE__, logging_func) namespace tvm { namespace meta_schedule { @@ -66,10 +67,13 @@ namespace meta_schedule { /*! * \brief Class to accumulate an log message on the python side. Do not use directly, instead use * TVM_PY_LOG(DEBUG), TVM_PY_LOG(INFO), TVM_PY_LOG(WARNING), TVM_PY_ERROR(ERROR). + * \sa TVM_PY_LOG + * \sa TVM_PY_LOG_CLEAR_SCREEN */ class PyLogMessage { public: enum class Level : int32_t { + CLEAR = -10, DEBUG = 10, INFO = 20, WARNING = 30, @@ -81,6 +85,8 @@ class PyLogMessage { : file_(file), lineno_(lineno), logging_func_(logging_func), logging_level_(logging_level) {} TVM_NO_INLINE ~PyLogMessage() { + ICHECK(logging_level_ != Level::CLEAR) + << "Cannot use CLEAR as logging level in TVM_PY_LOG, please use TVM_PY_LOG_CLEAR_SCREEN."; if (this->logging_func_.defined()) { logging_func_(static_cast(logging_level_), stream_.str()); } else { @@ -107,6 +113,32 @@ class PyLogMessage { Level logging_level_; }; +/*! + * \brief Whether the tuning is running on ipython kernel. + * \return A boolean indicating whether ipython kernel is used. + */ +inline bool using_ipython() { + bool flag = false; + const auto* f_using_ipython = runtime::Registry::Get("meta_schedule.using_ipython"); + if (f_using_ipython->defined()) flag = (*f_using_ipython)(); + return flag; +} + +/*! + * \brief A helper function to clear logging output for ipython kernel and console. + * \param file The file name. + * \param lineno The line number. + * \param logging_func The logging function. + */ +inline void clear_logging(const char* file, int lineno, PackedFunc logging_func) { + if (logging_func.defined() && using_ipython()) { + logging_func(static_cast(PyLogMessage::Level::CLEAR), ""); + } else { + // this would clear all logging output in the console + runtime::detail::LogMessage(file, lineno).stream() << "\033c\033[3J\033[2J\033[0m\033[H"; + } +} + /*! \brief The type of the random state */ using TRandState = support::LinearCongruentialEngine::TRandState; diff --git a/tests/python/unittest/test_meta_schedule_tune_relay.py b/tests/python/unittest/test_meta_schedule_tune_relay.py index 0267352fd697..5cc4f8f6a404 100644 --- a/tests/python/unittest/test_meta_schedule_tune_relay.py +++ b/tests/python/unittest/test_meta_schedule_tune_relay.py @@ -115,11 +115,11 @@ def main(placeholder: T.Buffer[(1, 2, 16, 16, 4), "float32"], T_layout_trans: T. @pytest.mark.parametrize( "model_name, input_shape, target, layout", [ - ("resnet_18", [1, 3, 224, 224], "llvm --num-cores=16", "NHWC"), + ("resnet_18", [1, 3, 224, 224], "llvm --num-cores=12", "NHWC"), ("resnet_18", [1, 3, 224, 224], "nvidia/geforce-rtx-3070", "NHWC"), - ("mobilenet_v2", [1, 3, 224, 224], "llvm --num-cores=16", "NHWC"), + ("mobilenet_v2", [1, 3, 224, 224], "llvm --num-cores=12", "NHWC"), ("mobilenet_v2", [1, 3, 224, 224], "nvidia/geforce-rtx-3070", "NHWC"), - ("bert_base", [1, 64], "llvm --num-cores=16", None), + ("bert_base", [1, 64], "llvm --num-cores=12", None), ("bert_base", [1, 64], "nvidia/geforce-rtx-3070", None), ], ) @@ -242,7 +242,7 @@ def print_results(self) -> None: input_name = "data" dev = tvm.cpu() - target = Target("llvm --num-cores=16") + target = Target("llvm --num-cores=12") data = tvm.nd.array(data_sample, dev) database = TestDummyDatabase() @@ -250,7 +250,7 @@ def print_results(self) -> None: database.commit_workload(tvmgen_default_fused_layout_transform_1) database.commit_workload(tvmgen_default_fused_nn_contrib_conv2d_NCHWc) - with database, tvm.transform.PassContext( + with database, tvm.transform.PassContext( # pylint: disable=not-context-manager opt_level=3, config={"relay.backend.use_meta_schedule": True}, ): @@ -295,7 +295,7 @@ def test_meta_schedule_relay_lowering(): input_name = "data" dev = tvm.cpu() - target = Target("llvm --num-cores=16") + target = Target("llvm --num-cores=12") data = tvm.nd.array(data_sample, dev) with tempfile.TemporaryDirectory() as work_dir: @@ -542,11 +542,11 @@ def schedule_rule_dense_vnni(sch: Schedule, dense_block: BlockRV): if __name__ == """__main__""": - test_meta_schedule_tune_relay("resnet_18", [1, 3, 224, 224], "llvm --num-cores=16", None) + test_meta_schedule_tune_relay("resnet_18", [1, 3, 224, 224], "llvm --num-cores=12", None) test_meta_schedule_tune_relay("resnet_18", [1, 3, 224, 224], "nvidia/geforce-rtx-3070", "NCHW") - test_meta_schedule_tune_relay("mobilenet_v2", [1, 3, 224, 224], "llvm --num-cores=16", None) + test_meta_schedule_tune_relay("mobilenet_v2", [1, 3, 224, 224], "llvm --num-cores=12", None) test_meta_schedule_tune_relay("mobilenet_v2", [1, 3, 224, 224], "nvidia/geforce-rtx-3070", None) - test_meta_schedule_tune_relay("bert_base", [1, 64], "llvm --num-cores=16", None) + test_meta_schedule_tune_relay("bert_base", [1, 64], "llvm --num-cores=12", None) test_meta_schedule_tune_relay("bert_base", [1, 64], "nvidia/geforce-rtx-3070", None) test_meta_schedule_te2primfunc_argument_order() test_meta_schedule_relay_lowering()