Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ __pycache__
build
cmake-build*
data
model
recommonmark
deps

Expand Down
3 changes: 2 additions & 1 deletion cpp-package/include/mxnet-cpp/monitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,9 @@ class Monitor {
/*!
* \brief install callback to executor. Supports installing to multiple executors.
* \param exe The executor to install to.
* \param monitor_all If true, monitor both input and output, otherwise monitor output only.
*/
void install(Executor *exe);
void install(Executor *exe, bool monitor_all = false);

/*!
* \brief Start collecting stats for current batch. Call before calling forward.
Expand Down
6 changes: 3 additions & 3 deletions cpp-package/include/mxnet-cpp/monitor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ inline Monitor::Monitor(int interval, std::regex pattern, StatFunc stat_func)
: interval(interval), pattern(pattern), stat_func(stat_func), step(0) {
}

inline void Monitor::install(Executor *exe) {
inline void Monitor::install(Executor *exe, bool monitor_all) {
MXExecutorSetMonitorCallback(exe->handle_,
static_cast<ExecutorMonitorCallback>(&Monitor::executor_callback),
this);
static_cast<ExecutorMonitorCallback>(&Monitor::executor_callback),
this, monitor_all);
exes.push_back(exe);
}

Expand Down
37 changes: 14 additions & 23 deletions example/quantization/imagenet_gen_qsym_mkldnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,24 +55,24 @@ def convert_from_gluon(model_name, image_shape, classes=1000, logger=None):
symnet = mx.symbol.load_json(y.tojson())
params = net.collect_params()
args = {}
auxs = {}
auxs = {}
for param in params.values():
v = param._reduce()
k = param.name
if 'running' in k:
auxs[k] = v
else:
args[k] = v
args[k] = v
mod = mx.mod.Module(symbol=symnet, context=mx.cpu(),
label_names = ['softmax_label'])
mod.bind(for_training=False,
data_shapes=[('data', (1,) +
mod.bind(for_training=False,
data_shapes=[('data', (1,) +
tuple([int(i) for i in image_shape.split(',')]))])
mod.set_params(arg_params=args, aux_params=auxs)
dst_dir = os.path.join(dir_path, 'model')
prefix = os.path.join(dir_path, 'model', model_name)
if not os.path.isdir(dst_dir):
os.mkdir(dst_dir)
os.mkdir(dst_dir)
mod.save_checkpoint(prefix, 0)
return prefix

Expand Down Expand Up @@ -104,7 +104,7 @@ def save_params(fname, arg_params, aux_params, logger=None):
'you can set to custom to load your pre-trained model.')
parser.add_argument('--use-gluon-model', type=bool, default=False,
help='If enabled, will download pretrained model from Gluon-CV '
'and convert to symbolic model ')
'and convert to symbolic model ')
parser.add_argument('--batch-size', type=int, default=32)
parser.add_argument('--label-name', type=str, default='softmax_label')
parser.add_argument('--calib-dataset', type=str, default='data/val_256_q90.rec',
Expand All @@ -114,7 +114,7 @@ def save_params(fname, arg_params, aux_params, logger=None):
help='number of threads for data decoding')
parser.add_argument('--num-calib-batches', type=int, default=10,
help='number of batches for calibration')
parser.add_argument('--exclude-first-conv', action='store_true', default=True,
parser.add_argument('--exclude-first-conv', action='store_true', default=False,
help='excluding quantizing the first conv layer since the'
' input data may have negative value which doesn\'t support at moment' )
parser.add_argument('--shuffle-dataset', action='store_true', default=True,
Expand All @@ -140,8 +140,8 @@ def save_params(fname, arg_params, aux_params, logger=None):
' thresholds. This mode is expected to produce the best inference accuracy of all three'
' kinds of quantized models if the calibration dataset is representative enough of the'
' inference dataset.')
parser.add_argument('--quantized-dtype', type=str, default='uint8',
choices=['int8', 'uint8'],
parser.add_argument('--quantized-dtype', type=str, default='auto',
choices=['auto', 'int8', 'uint8'],
help='quantization destination data type for input data')
parser.add_argument('--enable-calib-quantize', type=bool, default=True,
help='If enabled, the quantize op will '
Expand Down Expand Up @@ -203,35 +203,30 @@ def save_params(fname, arg_params, aux_params, logger=None):
if args.model == 'imagenet1k-resnet-152':
rgb_mean = '0,0,0'
rgb_std = '1,1,1'
calib_layer = lambda name: name.endswith('_output')
excluded_sym_names += ['flatten0', 'fc1', 'pooling0']
excluded_sym_names += ['flatten0', 'fc1']
if exclude_first_conv:
excluded_sym_names += ['conv0']
elif args.model == 'imagenet1k-inception-bn':
rgb_mean = '123.68,116.779,103.939'
rgb_std = '1,1,1'
calib_layer = lambda name: name.endswith('_output')
excluded_sym_names += ['flatten', 'fc1']
if exclude_first_conv:
excluded_sym_names += ['conv_1']
elif args.model in ['resnet50_v1', 'resnet101_v1']:
rgb_mean = '123.68,116.779,103.939'
rgb_std = '58.393, 57.12, 57.375'
calib_layer = lambda name: name.endswith('_output')
excluded_sym_names += ['resnetv10_dense0_fwd', 'resnetv10_pool0_fwd']
excluded_sym_names += ['resnetv10_dense0_fwd']
if exclude_first_conv:
excluded_sym_names += ['resnetv10_conv0_fwd']
elif args.model == 'squeezenet1.0':
rgb_mean = '123.68,116.779,103.939'
rgb_std = '58.393, 57.12, 57.375'
calib_layer = lambda name: name.endswith('_output')
excluded_sym_names += ['squeezenet0_flatten0_flatten0']
if exclude_first_conv:
excluded_sym_names += ['squeezenet0_conv0_fwd']
elif args.model == 'mobilenet1.0':
rgb_mean = '123.68,116.779,103.939'
rgb_std = '58.393, 57.12, 57.375'
calib_layer = lambda name: name.endswith('_output')
excluded_sym_names += ['mobilenet0_flatten0_flatten0',
'mobilenet0_dense0_fwd',
'mobilenet0_pool0_fwd']
Expand All @@ -240,16 +235,13 @@ def save_params(fname, arg_params, aux_params, logger=None):
elif args.model == 'inceptionv3':
rgb_mean = '123.68,116.779,103.939'
rgb_std = '58.393, 57.12, 57.375'
calib_layer = lambda name: name.endswith('_output')
excluded_sym_names += ['inception30_dense0_fwd',
'inception30_pool0_fwd']
excluded_sym_names += ['inception30_dense0_fwd']
if exclude_first_conv:
excluded_sym_names += ['inception30_conv0_fwd']
elif args.model == 'custom':
# add rgb mean/std of your model.
rgb_mean = '0,0,0'
rgb_std = '0,0,0'
calib_layer = lambda name: name.endswith('_output')
# add layer names you donnot want to quantize.
# add conv/pool layer names that has negative inputs
# since Intel MKL-DNN only support uint8 quantization temporary.
Expand Down Expand Up @@ -302,9 +294,8 @@ def save_params(fname, arg_params, aux_params, logger=None):
ctx=ctx, excluded_sym_names=excluded_sym_names,
calib_mode=calib_mode, calib_data=data,
num_calib_examples=num_calib_batches * batch_size,
calib_layer=calib_layer, quantized_dtype=args.quantized_dtype,
label_names=(label_name,), calib_quantize_op = True,
logger=logger)
calib_layer=None, quantized_dtype=args.quantized_dtype,
label_names=(label_name,), logger=logger)
if calib_mode == 'entropy':
suffix = '-quantized-%dbatches-entropy' % num_calib_batches
elif calib_mode == 'naive':
Expand Down
17 changes: 6 additions & 11 deletions example/ssd/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def save_params(fname, arg_params, aux_params, logger=None):
parser.add_argument('--batch-size', type=int, default=32)
parser.add_argument('--num-calib-batches', type=int, default=5,
help='number of batches for calibration')
parser.add_argument('--exclude-first-conv', action='store_true', default=True,
parser.add_argument('--exclude-first-conv', action='store_true', default=False,
help='excluding quantizing the first conv layer since the'
' number of channels is usually not a multiple of 4 in that layer'
' which does not satisfy the requirement of cuDNN')
Expand All @@ -78,8 +78,8 @@ def save_params(fname, arg_params, aux_params, logger=None):
' thresholds. This mode is expected to produce the best inference accuracy of all three'
' kinds of quantized models if the calibration dataset is representative enough of the'
' inference dataset.')
parser.add_argument('--quantized-dtype', type=str, default='uint8',
choices=['int8', 'uint8'],
parser.add_argument('--quantized-dtype', type=str, default='auto',
choices=['auto', 'int8', 'uint8'],
help='quantization destination data type for input data')

args = parser.parse_args()
Expand Down Expand Up @@ -119,12 +119,9 @@ def save_params(fname, arg_params, aux_params, logger=None):
exclude_first_conv = args.exclude_first_conv
excluded_sym_names = []
rgb_mean = '123,117,104'
calib_layer = lambda name: name.endswith('_output')
for i in range(1,19):
excluded_sym_names += ['flatten'+str(i)]
excluded_sym_names += ['relu4_3_cls_pred_conv',
'relu7_cls_pred_conv',
'relu4_3_loc_pred_conv']

if exclude_first_conv:
excluded_sym_names += ['conv1_1']

Expand Down Expand Up @@ -156,10 +153,8 @@ def save_params(fname, arg_params, aux_params, logger=None):
ctx=ctx, excluded_sym_names=excluded_sym_names,
calib_mode=calib_mode, calib_data=eval_iter,
num_calib_examples=num_calib_batches * batch_size,
calib_layer=calib_layer, quantized_dtype=args.quantized_dtype,
label_names=(label_name,),
calib_quantize_op = True,
logger=logger)
calib_layer=None, quantized_dtype=args.quantized_dtype,
label_names=(label_name,), logger=logger)
sym_name = '%s-symbol.json' % ('./model/cqssd_vgg16_reduced_300')
param_name = '%s-%04d.params' % ('./model/cqssd_vgg16_reduced_300', epoch)
qsym = qsym.get_backend_symbol('MKLDNN_POST_QUANTIZE')
Expand Down
7 changes: 4 additions & 3 deletions include/mxnet/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1556,13 +1556,12 @@ MXNET_DLL int MXSymbolInferType(SymbolHandle sym,
* \param num_offline number of parameters that are quantized offline
* \param offline_params array of c strings representing the names of params quantized offline
* \param quantized_dtype the quantized destination type for input data.
* \param calib_quantize whether calibrate quantize op with offline calibration data.
*/
MXNET_DLL int MXQuantizeSymbol(SymbolHandle sym_handle, SymbolHandle *ret_sym_handle,
const mx_uint num_excluded_symbols,
const char **excluded_symbols,
const mx_uint num_offline, const char **offline_params,
const char *quantized_dtype, const bool calib_quantize);
const char *quantized_dtype);

/*!
* \brief Set calibration table to node attributes in the sym
Expand Down Expand Up @@ -1833,10 +1832,12 @@ MXNET_DLL int MXExecutorGetOptimizedSymbol(ExecutorHandle handle,

/*!
* \brief set a call back to notify the completion of operation
* \param monitor_all If true, monitor both input and output, otherwise monitor output only.
*/
MXNET_DLL int MXExecutorSetMonitorCallback(ExecutorHandle handle,
ExecutorMonitorCallback callback,
void* callback_handle);
void* callback_handle,
bool monitor_all);
//--------------------------------------------
// Part 5: IO Interface
//--------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion include/mxnet/executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ class Executor {
/*!
* \brief Install a callback to notify the completion of operation.
*/
virtual void SetMonitorCallback(const MonitorCallback& callback) {}
virtual void SetMonitorCallback(const MonitorCallback& callback, bool monitor_all) {}
}; // class executor
} // namespace mxnet
#endif // MXNET_EXECUTOR_H_
10 changes: 7 additions & 3 deletions include/mxnet/ndarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -694,9 +694,13 @@ class NDArray {
/*
* Create NDArray from mkldnn memory.
* mkldnn_mem The mkldnn memory to be managed.
* static_data If true, mkldnn memory won't be freed on destruction.
*/
explicit NDArray(const mkldnn::memory *mkldnn_mem, bool static_data = true);
explicit NDArray(const std::shared_ptr<mkldnn::memory> &mkldnn_mem);
/*
* Create NDArray from mkldnn memory descriptor.
* mem_pd The mkldnn memory descriptor to be created.
*/
explicit NDArray(mkldnn::memory::primitive_desc mem_pd);
/*
* Test if the data is stored in one of special MKLDNN format.
*/
Expand Down Expand Up @@ -776,7 +780,7 @@ class NDArray {
/*!
* \ Fix mkldnn memory descriptor mismatch from NDArray.
*/
void UpdateMKLDNNMemDesc();
void UpdateMKLDNNMemDesc(mkldnn::memory::format format);
#endif

/*!
Expand Down
2 changes: 1 addition & 1 deletion include/mxnet/tensor_blob.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ class TBlob {
CHECK(Device::kDevMask == this->dev_mask())
<< "TBlob.get: device type do not match specified type";
CHECK_EQ(this->CheckContiguous(), true) << "TBlob.get_reshape: must be contiguous";
CHECK_EQ(this->shape_.Size(), shape.Size())
CHECK_EQ(this->shape_.Size(), static_cast<size_t>(shape.Size()))
<< "TBlob.get_with_shape: new and old shape do not match total elements";
return mshadow::Tensor<Device, dim, DType>(dptr<DType>(), shape,
shape[dim - 1], stream);
Expand Down
5 changes: 3 additions & 2 deletions perl-package/AI-MXNetCAPI/mxnet.i
Original file line number Diff line number Diff line change
Expand Up @@ -1614,10 +1614,12 @@ int MXExecutorReshape(int partial_shaping,

/*!
* \brief set a call back to notify the completion of operation
* \param monitor_all If true, monitor both input and output, otherwise monitor output only.
*/
int MXExecutorSetMonitorCallback(ExecutorHandle handle,
ExecutorMonitorCallback callback,
void* callback_handle);
void* callback_handle,
bool monitor_all);
//--------------------------------------------
// Part 5: IO Interface
//--------------------------------------------
Expand Down Expand Up @@ -2167,4 +2169,3 @@ int MXRtcCudaKernelCall(CudaKernelHandle handle, int dev_id, void** cuda_kernel_
mx_uint grid_dim_z, mx_uint block_dim_x,
mx_uint block_dim_y, mx_uint block_dim_z,
mx_uint shared_mem);

Loading