From d5212b418e0302ce22800e2ecd073abd7ecde208 Mon Sep 17 00:00:00 2001 From: Adam Grabowski Date: Fri, 25 Jun 2021 14:26:08 +0200 Subject: [PATCH] Add new benchmark function for single operator comparison --- benchmark/opperf/README.md | 33 +++++++ benchmark/opperf/rules/default_params.py | 5 + benchmark/opperf/utils/benchmark_utils.py | 110 +++++++++++++++++++--- 3 files changed, 133 insertions(+), 15 deletions(-) diff --git a/benchmark/opperf/README.md b/benchmark/opperf/README.md index bb3fb8ed851b..806d91a8462d 100644 --- a/benchmark/opperf/README.md +++ b/benchmark/opperf/README.md @@ -172,6 +172,39 @@ Output for the above benchmark run, on a CPU machine, would look something like Currently, opperf supports operators in `mx.nd.*` namespace. However, locally, one can profile internal operators in `mx.nd.internal.*` namespace. +## Usecase 6 - Compare performance for chosen operator from both NDArray library and its Numpy/Numpy_extension counterpart +For example, you want to compare add operator from `mx.nd` and `mx.np`. You just run the following python script. + +``` +#!/usr/bin/python +from benchmark.opperf.utils.benchmark_utils import run_benchmark_operator + +run_benchmark_operator(name = "add", run_backward=True) +``` + +Output for the above benchmark run, on a CPU machine, would look something like below: + +``` + +[{'add': [{'inputs': {'lhs': (128, 128), 'rhs': (128, 128)}, + 'max_storage_mem_alloc_cpu/0': 32.768, + 'avg_time_forward_add': 0.0496, + 'avg_time_backward_add': 0.0793}]}] + +[{'add': [{'inputs': {'x1': (128, 128), 'x2': (128, 128)}, + 'max_storage_mem_alloc_cpu/0': 32.768, + 'avg_time_forward_add': 0.0484, + 'avg_time_backward_add': 0.0898}]}] + +``` +This function uses `run_performance_test` function mentioned in Usecase 3 and Usecase 4 and it is possible to change all parameters from it. +All arguments that are of type NDArray will be automatically provided with shape that is passed as `size`. +If any fuction requires more arguments or different shaped NDArrays, provide those arguments as `additional_inputs` as it is shown below: +``` +run_benchmark_operator(name = "pick", size = (128,128), additional_inputs = {"index": (128,1)}) +``` + + #### Changes Remove the hasattr check for `op.__name__` to be in `mx.nd` diff --git a/benchmark/opperf/rules/default_params.py b/benchmark/opperf/rules/default_params.py index 4c903384a030..0474eea08f7b 100644 --- a/benchmark/opperf/rules/default_params.py +++ b/benchmark/opperf/rules/default_params.py @@ -781,3 +781,8 @@ "grads_sum_sq", "mhs", "data1", "data2", "loc", "parameters", "state", "state_cell"] +PARAMS_OF_TYPE_NP_ARRAY = ["x1", "x2", "prototype", "object", "a", "b", "fill_value", "array", "x", "arr", + "values", "ary", "seq", "arrays", "tup", "indices", "m", "ar", "q", "p", "condition", + "arys", "v", "A", "xp", "fp", "data", "mask", "gamma", "beta", "running_mean", + "running_var", "weight", "index", "lhs", "rhs"] + diff --git a/benchmark/opperf/utils/benchmark_utils.py b/benchmark/opperf/utils/benchmark_utils.py index b3bf8213569a..38a1c15a6147 100644 --- a/benchmark/opperf/utils/benchmark_utils.py +++ b/benchmark/opperf/utils/benchmark_utils.py @@ -16,35 +16,90 @@ # under the License. import logging +import inspect import mxnet as mx from mxnet import nd +from mxnet import np from .ndarray_utils import get_mx_ndarray, nd_forward_and_profile, nd_forward_backward_and_profile from .common_utils import merge_map_list from .op_registry_utils import prepare_op_inputs -from benchmark.opperf.rules.default_params import PARAMS_OF_TYPE_NDARRAY +from benchmark.opperf.rules.default_params import PARAMS_OF_TYPE_NDARRAY, PARAMS_OF_TYPE_NP_ARRAY from .profiler_utils import cpp_profile, python_profile no_backward = {'gather_nd', 'softmax_cross_entropy', 'linalg_gelqf', 'linalg_slogdet', 'moments', 'SequenceLast', 'Embedding'} -def _prepare_op_inputs(inputs, run_backward, dtype, ctx): +def _prepare_op_inputs(inputs, run_backward, dtype, ctx, module): mx.random.seed(41) kwargs_list = [] + if module == 'mxnet.numpy_extension' or module == 'mxnet.numpy': + PARAMS_TYPE = PARAMS_OF_TYPE_NP_ARRAY + get_array_fn = get_mx_np_ndarray + else: + PARAMS_TYPE = PARAMS_OF_TYPE_NDARRAY + get_array_fn = get_mx_ndarray for inp in inputs: kwargs = {} for key, value in inp.items(): - if key in PARAMS_OF_TYPE_NDARRAY: - kwargs[key] = get_mx_ndarray(ctx=ctx, in_tensor=value, - dtype=dtype, - initializer=nd.normal, - attach_grad=run_backward) + if key in PARAMS_TYPE: + kwargs[key] = get_array_fn(ctx=ctx, in_tensor=value, + dtype=dtype, + initializer=nd.normal, + attach_grad=run_backward) else: kwargs[key] = value kwargs_list.append(kwargs) return kwargs_list +def get_mx_np_ndarray(ctx, in_tensor, dtype, initializer, attach_grad=True): + """Helper function to prepare a MXNet Numpy NDArray tensor in given Context (ctx) of type (dtype). + You can get a new Tensor by providing only "Shape" or "Numpy NDArray" or another MXNet NDArray as + "in_tensor". + + NOTE: This is a sync call and waits for the Tensor to be created. + + Parameters + ---------- + ctx: mx.ctx, default mx.cpu() + Context of the new MXNet NDArray Tensor. + in_tensor: Numpy NDArray or MXNet NDArray or Tuple of shape + Can be a tuple of shape or Numpy NDArray or MXNet NDArray. + dtype: str + Precision or Dtype of the expected Tensor. Ex: "float32", "Int64" + initializer: + Function reference to the initialize to use. Ex: mx.nd.random.normal, mx.nd.zeros + attach_grad: Boolean, default True + To attach a gradient for the Tensor. Default is True. + + Returns + ------- + MXNet NDArray Tensor. + """ + if isinstance(in_tensor, int) or isinstance(in_tensor, float): + return in_tensor + + if isinstance(in_tensor, tuple): + nd_ndarray = get_mx_ndarray(ctx=ctx, in_tensor=in_tensor, + dtype="float32", + initializer=initializer, + attach_grad=attach_grad) + tensor = nd_ndarray.as_np_ndarray().astype(dtype=dtype) + elif isinstance(in_tensor, list): + tensor = np.array(in_tensor, ctx=ctx) + elif isinstance(in_tensor, nd.NDArray): + tensor = in_tensor.as_np_ndarray() + elif isinstance(in_tensor, np.ndarray): + tensor = in_tensor.as_in_context(ctx) + else: + raise ValueError("Invalid input type for creating input tensor. Input can be tuple() of shape or Numpy Array or" + " MXNet NDArray. Given - ", in_tensor) + if attach_grad: + tensor.attach_grad() + + tensor.wait_to_read() + return tensor def parse_input_ndarray(input_dict): """Parse input for ndarray and extract array shape for better readability @@ -96,7 +151,7 @@ def parse_input_ndarray(input_dict): return no_new_line_input_dict -def _run_nd_operator_performance_test(op, inputs, run_backward, warmup, runs, kwargs_list, profiler): +def _run_operator_performance_test(op, inputs, run_backward, warmup, runs, kwargs_list, profiler): if profiler == 'native': if run_backward: benchmark_helper_func = cpp_profile(nd_forward_backward_and_profile) @@ -141,10 +196,11 @@ def run_performance_test(ops, inputs, run_backward=True, Parameters ---------- ops: [Str] - One or list of operators to benchmark. Should be an NDArray operator. + One or list of operators to benchmark. Should be an NDArray, Numpy or Numpy_extension operator. inputs: map Inputs for operator. Key should be name of parameter for operator. - Example: inputs = {"lhs": (1024, 1024), "rhs": (1024, 1024)} for mx.nd.add + Example: inputs = {"lhs": (1024, 1024), "rhs": (1024, 1024)} for mx.nd.add or + inputs = {"x1": (1024, 1024), "x2": (1024, 1024)} for mx.np.add run_backward: Boolean, Default is True Should we have backward operator benchmarks. dtype: Str, default 'float32' @@ -166,20 +222,44 @@ def run_performance_test(ops, inputs, run_backward=True, Note: when run_performance_test is called on the nd.Embedding operator with run_backward=True, an error will be thrown. Track issue here: https://github.com/apache/incubator-mxnet/issues/11314 """ - kwargs_list = _prepare_op_inputs(inputs, run_backward, dtype, ctx) - if not isinstance(ops, list): ops = [ops] op_benchmark_result = [] for op in ops: - if hasattr(mx.nd, op.__name__): - benchmark_result = _run_nd_operator_performance_test(op, inputs, run_backward, warmup, runs, kwargs_list, profiler) + if hasattr(mx.nd, op.__name__) or hasattr(mx.np, op.__name__) or hasattr(mx.npx, op.__name__): + kwargs_list = _prepare_op_inputs(inputs, run_backward, dtype, ctx, op.__module__) + benchmark_result = _run_operator_performance_test(op, inputs, run_backward, warmup, runs, kwargs_list, profiler) else: - raise ValueError("Unknown NDArray operator provided to benchmark. - ", op.__name__) + raise ValueError("Unknown {0} operator provided to benchmark. - {1}".format(op.__module__, op.__name__)) op_benchmark_result.append(benchmark_result) return op_benchmark_result +def run_benchmark_operator(name, size = (128,128), additional_inputs = {}, + dtype = 'float32', run_backward = False, ctx = mx.cpu(), + warmup=10, runs=50, profiler="native"): + arg_list = {mx.nd: PARAMS_OF_TYPE_NDARRAY, mx.np: PARAMS_OF_TYPE_NP_ARRAY, mx.npx: PARAMS_OF_TYPE_NP_ARRAY} + modules = [mx.nd, mx.np, mx.npx] + responses = [] + for module in modules: + if hasattr(module, name): + function = getattr(module, name) + args = inspect.getargspec(function).args + inputs = {} + for arg in args: + if arg in additional_inputs.keys(): + inputs.update({arg: additional_inputs[arg]}) + elif arg in arg_list[module]: + inputs.update({arg:size}) + res = run_performance_test(function, run_backward=run_backward, dtype=dtype, ctx=ctx, + inputs=[inputs], + warmup=warmup, runs=runs, profiler=profiler) + responses.append(res) + else: + responses.append(str(module.__name__) + " does not have operator " + name) + for i in range(len(modules)): + print(modules[i].__name__) + print(responses[i]) def run_op_benchmarks(ops, dtype, ctx, profiler, int64_tensor, warmup, runs): # Running im2col either forwards or backwards on GPU results in errors