diff --git a/benchmark/python/dnnl/fc_add.py b/benchmark/python/dnnl/fc_add.py new file mode 100644 index 000000000000..8bdefd39ef25 --- /dev/null +++ b/benchmark/python/dnnl/fc_add.py @@ -0,0 +1,164 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import time +import gc +import sys +import mxnet as mx +from mxnet.gluon import nn +from mxnet.contrib import quantization + +#shape, num_hidden: +sizes = [ + (( 1, 224), 512), + (( 1, 224), 4096), + (( 16, 1024), 1024), + (( 32, 4096), 1024), + (( 32, 4096), 4096), + ((512, 512), 4096)] + +rounds = 1000 +warmup = 10 + +test_header = "--no_test_header" not in sys.argv +table_header = "--no_table_header" not in sys.argv +table_left_colums = "--no_size_column" not in sys.argv +dump_graph = "--dump_graph" in sys.argv + +def dump_graph_fn(net, postfix): + if dump_graph: + net.export("/tmp/fc_add_" + postfix) + +def operator_string(elemwise_add): + return 'elemwise_add' if elemwise_add else 'npi_add' + +def print_header(header): + print("\n") + print(header if test_header else "", "\n") + if table_header: + if table_left_colums: + print("| Shape | Hidden | Mean [ms] |" ) + print("|------------:|-------:|----------:|" ) + else: + print(" Mean [ms] |" ) + print("----------:|" ) + +def print_value(shape, hidden, mean): + if table_left_colums: + print("| ({:4},{:4}) | {:6} | {:9.3f} |".format(shape[0], shape[1], hidden, mean)) + else: + print(" {:9.3f} |".format(mean)) + + +def measure(net, data0, data1, data2, shape, nhid): + mx.nd.waitall() + gc.collect() + gc.disable() + for i in range(rounds + warmup): + if i == warmup: + start_time = time.time() + o = net(data0, data1, data2) + o.wait_to_read() + end_time = time.time() + run_time = (end_time - start_time) + print_value(shape, nhid, 1000 * run_time / rounds) + gc.enable() + + +class FCWithSum(nn.HybridBlock): + def __init__(self, num_in, num_hidden, elemwise_add, **kwargs): + super(FCWithSum, self).__init__(**kwargs) + self.fc0 = nn.Dense(units=num_hidden, in_units=num_in) + self.fc1 = nn.Dense(units=num_hidden) + self.elemwise_add = elemwise_add + + def forward(self, data0, data1, data2): + _fc0 = self.fc0(data0) + _fc1 = self.fc1(data1) + if self.elemwise_add: + _sum0 = mx.nd.elemwise_add(data2.as_nd_ndarray(), _fc0.as_nd_ndarray()).as_np_ndarray() + _sum1 = mx.nd.elemwise_add(_fc1.as_nd_ndarray(), _sum0.as_nd_ndarray()).as_np_ndarray() + else: + _sum0 = data2 + _fc0 + _sum1 = _fc1 + _sum0 + return _sum1 + +def benchmark_float(elemwise_add): + header = operator_string(elemwise_add) + ', float' + print_header(header) + for shape, nhid in sizes: + net = FCWithSum(shape[1], nhid, elemwise_add) + net.initialize() + net.hybridize(static_alloc=True, static_shape=True) + data0 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0) + data1 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0) + shape2 = (shape[0], nhid) + data2 = mx.np.random.uniform(size=shape2, low=-1.0, high=1.0) + net.optimize_for(data0, data1, data2, backend='ONEDNN') + measure(net, data0, data1, data2, shape, nhid) + dump_graph_fn(net, operator_string(elemwise_add) + '_float') + +class CalibIter(mx.io.DataIter): + def __init__(self, batch, data_shape, batch_size): + super(CalibIter, self).__init__(batch_size) + self.label_shape = (batch_size,) + self.data_shape = data_shape + if isinstance(data_shape, tuple): + self.provide_data = [('data', data_shape)] + else: + self.provide_data = data_shape + self.provide_label = [] + self.batch = batch + def __iter__(self): + yield self.batch + +def benchmark_int8(quantize_mode, quantize_granularity, elemwise_add): + header = operator_string(elemwise_add) + ', mode = ' + quantize_mode + \ + ', granularity = ' + quantize_granularity + print_header(header) + for shape, nhid in sizes: + net = FCWithSum(shape[1], nhid, elemwise_add) + net.initialize() + net.hybridize(static_alloc=True, static_shape=True) + data0 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0) + data1 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0) + shape2 = (shape[0], nhid) + data2 = mx.np.random.uniform(size=shape2, low=-1.0, high=1.0) + data = mx.gluon.data.ArrayDataset(data0, data1, data2) + calib_data = mx.gluon.data.DataLoader(data, batch_size=1) + net = quantization.quantize_net(net, + device=mx.cpu(), + exclude_layers=None, + exclude_operators=None, + calib_mode='naive', + calib_data=calib_data, + num_calib_batches=1, + quantize_mode=quantize_mode, + quantize_granularity=quantize_granularity + ) + net.hybridize(static_alloc=True, static_shape=True) + measure(net, data0, data1, data2, shape, nhid) + dump_graph_fn(net, operator_string(elemwise_add) + \ + '_' + str(quantize_mode) + '_' + str(quantize_granularity)) + +for elemwise_add in [True, False]: + benchmark_float(elemwise_add) + +for quantize_mode in ['smart', 'full']: + for quantize_granularity in ['tensor-wise', 'channel-wise']: + for elemwise_add in [True, False]: + benchmark_int8(quantize_mode, quantize_granularity, elemwise_add) diff --git a/benchmark/python/dnnl/run.sh b/benchmark/python/dnnl/run.sh new file mode 100755 index 000000000000..63379b4ca4c1 --- /dev/null +++ b/benchmark/python/dnnl/run.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Script for running python benchmark with properly setting OMP prarameters for it + +check_parametrs() { + if [ "$#" -eq 0 ] ; then + echo "Please give python script to run as parameter." + echo "Optionally you can give number of threads to use and python scripts parameters:" + echo " `basename "$0"` [num_threads] python_script [python script parameters]" + exit + fi +} + +check_parametrs $@ + +NUM_SOCKET=`lscpu | grep 'Socket(s)' | awk '{print $NF}'` +CORES_PER_SOCKET=`lscpu | grep 'Core(s) per socket' | awk '{print $NF}'` +NUM_CORES=$((CORES_PER_SOCKET * NUM_SOCKET)) + +integer_reg='^[0-9]+$' +if [[ $1 =~ $integer_reg ]] ; then + if (($1 > $NUM_CORES)); then + echo >&2 + echo "WARNING: given number of threads = $1" \ + " is greater than number of physical cores = $NUM_CORES." >&2 + echo >&2 + fi + NUM_CORES=$1 + shift + check_parametrs $@ +fi + +CORES={0}:${NUM_CORES}:1 + +INSTRUCTION="OMP_NUM_THREADS=${NUM_CORES} OMP_PROC_BIND=TRUE OMP_PLACES=${CORES} python3 -u $@" +echo $INSTRUCTION >&2 +eval $INSTRUCTION diff --git a/benchmark/python/dnnl/run_per_thread.sh b/benchmark/python/dnnl/run_per_thread.sh new file mode 100755 index 000000000000..c766429b341f --- /dev/null +++ b/benchmark/python/dnnl/run_per_thread.sh @@ -0,0 +1,82 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Script for running python benchmark against number of used OMP threads + + +help_and_exit() { + echo "Usage:" + echo " `basename "$0"` [start_num_threads step_num_threads end_num_threads] python_script [python script parameters]" + echo "Number of threads range parameters and python script are optional." + exit +} + +if [ "$#" -eq 0 ] ; then + help_and_exit +fi + +NUM_SOCKET=`lscpu | grep 'Socket(s)' | awk '{print $NF}'` +CORES_PER_SOCKET=`lscpu | grep 'Core(s) per socket' | awk '{print $NF}'` +NUM_CORES=$((CORES_PER_SOCKET * NUM_SOCKET)) + +NT_START=1 +NT_STEP=1 +NT_END=$NUM_CORES + +integer_reg='^[0-9]+$' +signed_integer_reg='^[+-]*[0-9]+$' +if [[ $1 =~ $integer_reg ]] ; then + if [[ $2 =~ $signed_integer_reg ]] && [[ $3 =~ $integer_reg ]]; then + NT_START=$1 + NT_STEP=$2 + NT_END=$3 + shift 3 + if [ "$#" -eq 0 ] ; then + help_and_exit + fi + else + echo "Provide 3 numbers for threads range: start, step and the end." + help_and_exit + fi +fi + +NT_SEQUENCE=`seq $NT_START $NT_STEP $NT_END` +if [ -z "$NT_SEQUENCE" ]; then + echo "Given threads range produce empy sequence." + help_and_exit +else + echo "Start python script $1 for following number of threads:" >&2 + echo $NT_SEQUENCE >&2 +fi + +RUN_SCRIPT=`dirname "$0"`/run.sh +for NT in $NT_SEQUENCE; +do + TMP_FILE=/tmp/_result_${NT}.txt + echo 1>${TMP_FILE} + if [[ $NT -eq $NT_START ]]; then + echo "NUM_THREADS = $NT" 1>>${TMP_FILE} + $RUN_SCRIPT $NT $@ 1>>${TMP_FILE} + else + echo " $NT" 1>>${TMP_FILE} + $RUN_SCRIPT $NT $@ --no_size_column --no_test_header 1>>${TMP_FILE} + fi + TMP_FILES+=" ${TMP_FILE}" +done +paste -d "" ${TMP_FILES}