Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions python/tvm/meta_schedule/testing/relay_workload.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,23 +61,23 @@ def _get_network(
assert layout is None or layout in ["NCHW", "NHWC"]

if name in ["resnet_18", "resnet_50"]:
model = getattr(models, name.replace("_", ""))(pretrained=False)
model = getattr(models, name.replace("_", ""))(weights=None)
elif name == "wide_resnet_50":
model = getattr(models, "wide_resnet50_2")(pretrained=False)
model = getattr(models, "wide_resnet50_2")(weights=None)
elif name == "resnext_50":
model = getattr(models, "resnext50_32x4d")(pretrained=False)
model = getattr(models, "resnext50_32x4d")(weights=None)
elif name == "mobilenet_v2":
model = getattr(models, name)(pretrained=False)
model = getattr(models, name)(weights=None)
elif name == "mobilenet_v3":
model = getattr(models, name + "_large")(pretrained=False)
model = getattr(models, name + "_large")(weights=None)
elif name == "inception_v3":
model = getattr(models, name)(pretrained=False, aux_logits=False)
model = getattr(models, name)(weights=None, aux_logits=False)
elif name == "densenet_121":
model = getattr(models, name.replace("_", ""))(pretrained=False)
model = getattr(models, name.replace("_", ""))(weights=None)
elif name == "resnet3d_18":
model = models.video.r3d_18(pretrained=False)
model = models.video.r3d_18(weights=None)
elif name == "vgg_16":
model = getattr(models, name.replace("_", ""))(pretrained=False)
model = getattr(models, name.replace("_", ""))(weights=None)

dtype = "float32"
input_data = torch.randn(input_shape).type( # pylint: disable=no-member
Expand Down
18 changes: 17 additions & 1 deletion python/tvm/meta_schedule/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,11 +371,27 @@ def make_logging_func(logger: logging.Logger) -> Optional[Callable]:
}

def logging_func(level: int, msg: str):
level2log[level](msg)
def clear_notebook_output():
from IPython.display import clear_output # type: ignore # pylint: disable=import-outside-toplevel

clear_output(wait=True)

if level < 0:
clear_notebook_output()
else:
level2log[level](msg)

return logging_func


@register_func("meta_schedule.using_ipython")
def _check_ipython_env():
try:
return get_ipython().__class__.__name__ == "ZMQInteractiveShell" # type: ignore
except NameError:
return False


def parameterize_config(config: Dict[str, Any], params: Dict[str, str]) -> Dict[str, Any]:
"""Parameterize the given configuration.

Expand Down
50 changes: 37 additions & 13 deletions src/meta_schedule/task_scheduler/gradient_based.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,22 +61,43 @@ class GradientBasedNode final : public TaskSchedulerNode {
int total_trials = 0;
double total_latency = 0.0;
support::TablePrinter p;
p.Row() << "ID"
<< "Name"
<< "FLOP"
<< "Weight"
<< "Speed (GFLOPS)"
<< "Latency (us)"
<< "Weighted Latency (us)"
<< "Trials"
<< "Terminated";

if (using_ipython()) {
p.Row() << "ID"
<< "Name"
<< "FLOP"
<< "Weight"
<< "GFLOPS"
<< "Latency (us)"
<< "Wtd. Latency"
<< "Trials"
<< "Terminated";
} else {
p.Row() << "ID"
<< "Name"
<< "FLOP"
<< "Weight"
<< "Speed (GFLOPS)"
<< "Latency (us)"
<< "Weighted Latency (us)"
<< "Trials"
<< "Terminated";
}

p.Separator();

for (int i = 0; i < n_tasks; ++i) {
const TaskRecord& record = task_records_[i];
auto row = p.Row();
int trials = record.trials;
String task_name = record.task->task_name.value();
if (using_ipython() && task_name.length() > 23) {
std::string temp = task_name.c_str();
temp = temp.substr(0, 20) + "...";
task_name = String(temp);
}
Comment on lines +93 to +98
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have alternative ways to print the whole task names? Like if a task name is too long, we can try to print the name in multiple lines.

I’m thinking of this because I just noticed that in your example table, there are several lines with exactly the same prefix.

        6 | fused_nn_conv2d_add_... | 231612416 |      2 | 276.8975 |     836.4555 |    1672.9111 |     32 |            
        7 | fused_nn_conv2d_add_... | 231813120 |      2 | 100.5202 |    2306.1349 |    4612.2697 |     32 |            
        8 | fused_nn_conv2d_add_... | 115806208 |      1 | 139.2956 |     831.3701 |     831.3701 |     32 |            

I think this will cause confusion. So perhaps we should try to resolve this problem? What do you think of this.

Also cc @junrushao

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is indeed not ideal. We may have an extra API that allows us to assemble a pandas dataframe so that it’s more organically integrated with jupyter. I have an idea and will communicate with Xiyou next week

row << /*id=*/i //
<< /*name=*/record.task->task_name.value() //
<< /*name=*/task_name //
<< /*flops=*/static_cast<int64_t>(record.flop) //
<< /*weight=*/static_cast<int>(record.weight);
double latency = 1e9;
Expand All @@ -101,9 +122,10 @@ class GradientBasedNode final : public TaskSchedulerNode {
}
}
p.Separator();
os << p.AsStr() //
<< "\nTotal trials: " << total_trials //
<< "\nTotal latency (us): " << total_latency //
os << p.AsStr() //
<< "\nProgress: " << total_trials / (max_trials * 0.01) << "%" //
<< "\nTotal Trials: " << total_trials << " / " << max_trials //
<< "\nTotal latency (us): " << total_latency //
<< "\n";
return os.str();
}
Expand All @@ -112,6 +134,7 @@ class GradientBasedNode final : public TaskSchedulerNode {
int n_tasks = task_records_.size();
// Round robin
if (num_rounds_already_ == 0) {
TVM_PY_LOG_CLEAR_SCREEN(this->logging_func);
TVM_PY_LOG(INFO, this->logging_func) << "\n" << this->TuningStatistics();
}
if (num_rounds_already_ < n_tasks) {
Expand Down Expand Up @@ -178,6 +201,7 @@ class GradientBasedNode final : public TaskSchedulerNode {
}
record.best_time_cost_history.push_back(best_time_cost);
record.trials += results.size();
TVM_PY_LOG_CLEAR_SCREEN(this->logging_func);
TVM_PY_LOG(INFO, this->logging_func)
<< "[Updated] Task #" << task_id << ": " << record.task->task_name << "\n"
<< this->TuningStatistics();
Expand Down
32 changes: 32 additions & 0 deletions src/meta_schedule/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,21 @@
::tvm::meta_schedule::PyLogMessage(__FILE__, __LINE__, logging_func, \
PyLogMessage::Level::logging_level) \
.stream()
#define TVM_PY_LOG_CLEAR_SCREEN(logging_func) clear_logging(__FILE__, __LINE__, logging_func)

namespace tvm {
namespace meta_schedule {

/*!
* \brief Class to accumulate an log message on the python side. Do not use directly, instead use
* TVM_PY_LOG(DEBUG), TVM_PY_LOG(INFO), TVM_PY_LOG(WARNING), TVM_PY_ERROR(ERROR).
* \sa TVM_PY_LOG
* \sa TVM_PY_LOG_CLEAR_SCREEN
*/
class PyLogMessage {
public:
enum class Level : int32_t {
CLEAR = -10,
DEBUG = 10,
INFO = 20,
WARNING = 30,
Expand All @@ -81,6 +85,8 @@ class PyLogMessage {
: file_(file), lineno_(lineno), logging_func_(logging_func), logging_level_(logging_level) {}

TVM_NO_INLINE ~PyLogMessage() {
ICHECK(logging_level_ != Level::CLEAR)
<< "Cannot use CLEAR as logging level in TVM_PY_LOG, please use TVM_PY_LOG_CLEAR_SCREEN.";
if (this->logging_func_.defined()) {
logging_func_(static_cast<int>(logging_level_), stream_.str());
} else {
Expand All @@ -107,6 +113,32 @@ class PyLogMessage {
Level logging_level_;
};

/*!
* \brief Whether the tuning is running on ipython kernel.
* \return A boolean indicating whether ipython kernel is used.
*/
inline bool using_ipython() {
bool flag = false;
const auto* f_using_ipython = runtime::Registry::Get("meta_schedule.using_ipython");
if (f_using_ipython->defined()) flag = (*f_using_ipython)();
return flag;
}

/*!
* \brief A helper function to clear logging output for ipython kernel and console.
* \param file The file name.
* \param lineno The line number.
* \param logging_func The logging function.
*/
inline void clear_logging(const char* file, int lineno, PackedFunc logging_func) {
if (logging_func.defined() && using_ipython()) {
logging_func(static_cast<int>(PyLogMessage::Level::CLEAR), "");
} else {
// this would clear all logging output in the console
runtime::detail::LogMessage(file, lineno).stream() << "\033c\033[3J\033[2J\033[0m\033[H";
}
}

/*! \brief The type of the random state */
using TRandState = support::LinearCongruentialEngine::TRandState;

Expand Down
18 changes: 9 additions & 9 deletions tests/python/unittest/test_meta_schedule_tune_relay.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,11 @@ def main(placeholder: T.Buffer[(1, 2, 16, 16, 4), "float32"], T_layout_trans: T.
@pytest.mark.parametrize(
"model_name, input_shape, target, layout",
[
("resnet_18", [1, 3, 224, 224], "llvm --num-cores=16", "NHWC"),
("resnet_18", [1, 3, 224, 224], "llvm --num-cores=12", "NHWC"),
("resnet_18", [1, 3, 224, 224], "nvidia/geforce-rtx-3070", "NHWC"),
("mobilenet_v2", [1, 3, 224, 224], "llvm --num-cores=16", "NHWC"),
("mobilenet_v2", [1, 3, 224, 224], "llvm --num-cores=12", "NHWC"),
("mobilenet_v2", [1, 3, 224, 224], "nvidia/geforce-rtx-3070", "NHWC"),
("bert_base", [1, 64], "llvm --num-cores=16", None),
("bert_base", [1, 64], "llvm --num-cores=12", None),
("bert_base", [1, 64], "nvidia/geforce-rtx-3070", None),
],
)
Expand Down Expand Up @@ -242,15 +242,15 @@ def print_results(self) -> None:

input_name = "data"
dev = tvm.cpu()
target = Target("llvm --num-cores=16")
target = Target("llvm --num-cores=12")
data = tvm.nd.array(data_sample, dev)

database = TestDummyDatabase()
database.commit_workload(tvmgen_default_fused_layout_transform)
database.commit_workload(tvmgen_default_fused_layout_transform_1)
database.commit_workload(tvmgen_default_fused_nn_contrib_conv2d_NCHWc)

with database, tvm.transform.PassContext(
with database, tvm.transform.PassContext( # pylint: disable=not-context-manager
opt_level=3,
config={"relay.backend.use_meta_schedule": True},
):
Expand Down Expand Up @@ -295,7 +295,7 @@ def test_meta_schedule_relay_lowering():

input_name = "data"
dev = tvm.cpu()
target = Target("llvm --num-cores=16")
target = Target("llvm --num-cores=12")
data = tvm.nd.array(data_sample, dev)

with tempfile.TemporaryDirectory() as work_dir:
Expand Down Expand Up @@ -542,11 +542,11 @@ def schedule_rule_dense_vnni(sch: Schedule, dense_block: BlockRV):


if __name__ == """__main__""":
test_meta_schedule_tune_relay("resnet_18", [1, 3, 224, 224], "llvm --num-cores=16", None)
test_meta_schedule_tune_relay("resnet_18", [1, 3, 224, 224], "llvm --num-cores=12", None)
test_meta_schedule_tune_relay("resnet_18", [1, 3, 224, 224], "nvidia/geforce-rtx-3070", "NCHW")
test_meta_schedule_tune_relay("mobilenet_v2", [1, 3, 224, 224], "llvm --num-cores=16", None)
test_meta_schedule_tune_relay("mobilenet_v2", [1, 3, 224, 224], "llvm --num-cores=12", None)
test_meta_schedule_tune_relay("mobilenet_v2", [1, 3, 224, 224], "nvidia/geforce-rtx-3070", None)
test_meta_schedule_tune_relay("bert_base", [1, 64], "llvm --num-cores=16", None)
test_meta_schedule_tune_relay("bert_base", [1, 64], "llvm --num-cores=12", None)
test_meta_schedule_tune_relay("bert_base", [1, 64], "nvidia/geforce-rtx-3070", None)
test_meta_schedule_te2primfunc_argument_order()
test_meta_schedule_relay_lowering()
Expand Down