Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 164 additions & 0 deletions benchmark/python/dnnl/fc_add.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import time
import gc
import sys
import mxnet as mx
from mxnet.gluon import nn
from mxnet.contrib import quantization

#shape, num_hidden:
sizes = [
(( 1, 224), 512),
(( 1, 224), 4096),
(( 16, 1024), 1024),
(( 32, 4096), 1024),
(( 32, 4096), 4096),
((512, 512), 4096)]

rounds = 1000
warmup = 10

test_header = "--no_test_header" not in sys.argv
table_header = "--no_table_header" not in sys.argv
table_left_colums = "--no_size_column" not in sys.argv
dump_graph = "--dump_graph" in sys.argv

def dump_graph_fn(net, postfix):
if dump_graph:
net.export("/tmp/fc_add_" + postfix)

def operator_string(elemwise_add):
return 'elemwise_add' if elemwise_add else 'npi_add'

def print_header(header):
print("\n")
print(header if test_header else "", "\n")
if table_header:
if table_left_colums:
print("| Shape | Hidden | Mean [ms] |" )
print("|------------:|-------:|----------:|" )
else:
print(" Mean [ms] |" )
print("----------:|" )

def print_value(shape, hidden, mean):
if table_left_colums:
print("| ({:4},{:4}) | {:6} | {:9.3f} |".format(shape[0], shape[1], hidden, mean))
else:
print(" {:9.3f} |".format(mean))


def measure(net, data0, data1, data2, shape, nhid):
mx.nd.waitall()
gc.collect()
gc.disable()
for i in range(rounds + warmup):
if i == warmup:
start_time = time.time()
o = net(data0, data1, data2)
o.wait_to_read()
end_time = time.time()
run_time = (end_time - start_time)
print_value(shape, nhid, 1000 * run_time / rounds)
gc.enable()


class FCWithSum(nn.HybridBlock):
def __init__(self, num_in, num_hidden, elemwise_add, **kwargs):
super(FCWithSum, self).__init__(**kwargs)
self.fc0 = nn.Dense(units=num_hidden, in_units=num_in)
self.fc1 = nn.Dense(units=num_hidden)
self.elemwise_add = elemwise_add

def forward(self, data0, data1, data2):
_fc0 = self.fc0(data0)
_fc1 = self.fc1(data1)
if self.elemwise_add:
_sum0 = mx.nd.elemwise_add(data2.as_nd_ndarray(), _fc0.as_nd_ndarray()).as_np_ndarray()
_sum1 = mx.nd.elemwise_add(_fc1.as_nd_ndarray(), _sum0.as_nd_ndarray()).as_np_ndarray()
else:
_sum0 = data2 + _fc0
_sum1 = _fc1 + _sum0
return _sum1

def benchmark_float(elemwise_add):
header = operator_string(elemwise_add) + ', float'
print_header(header)
for shape, nhid in sizes:
net = FCWithSum(shape[1], nhid, elemwise_add)
net.initialize()
net.hybridize(static_alloc=True, static_shape=True)
data0 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0)
data1 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0)
shape2 = (shape[0], nhid)
data2 = mx.np.random.uniform(size=shape2, low=-1.0, high=1.0)
net.optimize_for(data0, data1, data2, backend='ONEDNN')
measure(net, data0, data1, data2, shape, nhid)
dump_graph_fn(net, operator_string(elemwise_add) + '_float')

class CalibIter(mx.io.DataIter):
def __init__(self, batch, data_shape, batch_size):
super(CalibIter, self).__init__(batch_size)
self.label_shape = (batch_size,)
self.data_shape = data_shape
if isinstance(data_shape, tuple):
self.provide_data = [('data', data_shape)]
else:
self.provide_data = data_shape
self.provide_label = []
self.batch = batch
def __iter__(self):
yield self.batch

def benchmark_int8(quantize_mode, quantize_granularity, elemwise_add):
header = operator_string(elemwise_add) + ', mode = ' + quantize_mode + \
', granularity = ' + quantize_granularity
print_header(header)
for shape, nhid in sizes:
net = FCWithSum(shape[1], nhid, elemwise_add)
net.initialize()
net.hybridize(static_alloc=True, static_shape=True)
data0 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0)
data1 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0)
shape2 = (shape[0], nhid)
data2 = mx.np.random.uniform(size=shape2, low=-1.0, high=1.0)
data = mx.gluon.data.ArrayDataset(data0, data1, data2)
calib_data = mx.gluon.data.DataLoader(data, batch_size=1)
net = quantization.quantize_net(net,
device=mx.cpu(),
exclude_layers=None,
exclude_operators=None,
calib_mode='naive',
calib_data=calib_data,
num_calib_batches=1,
quantize_mode=quantize_mode,
quantize_granularity=quantize_granularity
)
net.hybridize(static_alloc=True, static_shape=True)
measure(net, data0, data1, data2, shape, nhid)
dump_graph_fn(net, operator_string(elemwise_add) + \
'_' + str(quantize_mode) + '_' + str(quantize_granularity))

for elemwise_add in [True, False]:
benchmark_float(elemwise_add)

for quantize_mode in ['smart', 'full']:
for quantize_granularity in ['tensor-wise', 'channel-wise']:
for elemwise_add in [True, False]:
benchmark_int8(quantize_mode, quantize_granularity, elemwise_add)
54 changes: 54 additions & 0 deletions benchmark/python/dnnl/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Script for running python benchmark with properly setting OMP prarameters for it

check_parametrs() {
if [ "$#" -eq 0 ] ; then
echo "Please give python script to run as parameter."
echo "Optionally you can give number of threads to use and python scripts parameters:"
echo " `basename "$0"` [num_threads] python_script [python script parameters]"
exit
fi
}

check_parametrs $@

NUM_SOCKET=`lscpu | grep 'Socket(s)' | awk '{print $NF}'`
CORES_PER_SOCKET=`lscpu | grep 'Core(s) per socket' | awk '{print $NF}'`
NUM_CORES=$((CORES_PER_SOCKET * NUM_SOCKET))

integer_reg='^[0-9]+$'
if [[ $1 =~ $integer_reg ]] ; then
if (($1 > $NUM_CORES)); then
echo >&2
echo "WARNING: given number of threads = $1" \
" is greater than number of physical cores = $NUM_CORES." >&2
echo >&2
fi
NUM_CORES=$1
shift
check_parametrs $@
fi

CORES={0}:${NUM_CORES}:1

INSTRUCTION="OMP_NUM_THREADS=${NUM_CORES} OMP_PROC_BIND=TRUE OMP_PLACES=${CORES} python3 -u $@"
echo $INSTRUCTION >&2
eval $INSTRUCTION
82 changes: 82 additions & 0 deletions benchmark/python/dnnl/run_per_thread.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/bin/bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Script for running python benchmark against number of used OMP threads


help_and_exit() {
echo "Usage:"
echo " `basename "$0"` [start_num_threads step_num_threads end_num_threads] python_script [python script parameters]"
echo "Number of threads range parameters and python script are optional."
exit
}

if [ "$#" -eq 0 ] ; then
help_and_exit
fi

NUM_SOCKET=`lscpu | grep 'Socket(s)' | awk '{print $NF}'`
CORES_PER_SOCKET=`lscpu | grep 'Core(s) per socket' | awk '{print $NF}'`
NUM_CORES=$((CORES_PER_SOCKET * NUM_SOCKET))

NT_START=1
NT_STEP=1
NT_END=$NUM_CORES

integer_reg='^[0-9]+$'
signed_integer_reg='^[+-]*[0-9]+$'
if [[ $1 =~ $integer_reg ]] ; then
if [[ $2 =~ $signed_integer_reg ]] && [[ $3 =~ $integer_reg ]]; then
NT_START=$1
NT_STEP=$2
NT_END=$3
shift 3
if [ "$#" -eq 0 ] ; then
help_and_exit
fi
else
echo "Provide 3 numbers for threads range: start, step and the end."
help_and_exit
fi
fi

NT_SEQUENCE=`seq $NT_START $NT_STEP $NT_END`
if [ -z "$NT_SEQUENCE" ]; then
echo "Given threads range produce empy sequence."
help_and_exit
else
echo "Start python script $1 for following number of threads:" >&2
echo $NT_SEQUENCE >&2
fi

RUN_SCRIPT=`dirname "$0"`/run.sh
for NT in $NT_SEQUENCE;
do
TMP_FILE=/tmp/_result_${NT}.txt
echo 1>${TMP_FILE}
if [[ $NT -eq $NT_START ]]; then
echo "NUM_THREADS = $NT" 1>>${TMP_FILE}
$RUN_SCRIPT $NT $@ 1>>${TMP_FILE}
else
echo " $NT" 1>>${TMP_FILE}
$RUN_SCRIPT $NT $@ --no_size_column --no_test_header 1>>${TMP_FILE}
fi
TMP_FILES+=" ${TMP_FILE}"
done
paste -d "" ${TMP_FILES}