Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions onnxruntime/core/providers/nuphar/scripts/model_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __iter__(self): # need this to make dict for json
('QuantizationType', 'Signed' if self.sign_bit_ else 'Unsigned'),
('ReservedBit', self.reserved_bits_)])

def quantize_matmul_2d_with_weight(in_node, in_graph, nf, converted_weights, quantized_inputs, qcfg_dict, update_qcfg_dict, default_qcfg):
def quantize_matmul_2d_with_weight(in_node, in_graph, nf, converted_weights, quantized_inputs, qcfg_dict, update_qcfg_dict, default_qcfg, onnx_opset_ver):
assert in_node.op_type == 'MatMul'

# quantize weight
Expand Down Expand Up @@ -158,7 +158,11 @@ def quantize_matmul_2d_with_weight(in_node, in_graph, nf, converted_weights, qua
Q_Xf = nf.make_node('Mul', [norm_X, np.asarray(x_qcfg.q_range()).astype(np.float32)])
Q_Xf = nf.make_node('Add', [Q_Xf, np.asarray(0.5).astype(np.float32)])
Q_Xf = nf.make_node('Floor', Q_Xf)
Q_Xf = nf.make_node('Clip', Q_Xf, {'max':x_qcfg.q_max(), 'min':x_qcfg.q_min()})
if onnx_opset_ver < 11:
Q_Xf = nf.make_node('Clip', Q_Xf, {'max':x_qcfg.q_max(), 'min':x_qcfg.q_min()})
else:
# Clip changed min max to inputs in opset 11
Q_Xf = nf.make_node('Clip', [Q_Xf, np.asarray(x_qcfg.q_min()).astype(np.float32), np.asarray(x_qcfg.q_max()).astype(np.float32)])
Q_X = nf.make_node('Cast', Q_Xf, {'to':int({np.uint8 : onnx.TensorProto.UINT8,
np.int8 : onnx.TensorProto.INT8,
np.uint16 : onnx.TensorProto.UINT16,
Expand Down Expand Up @@ -238,7 +242,7 @@ def convert_matmul_model(input_model, output_model, only_for_scan=False, share_i
out_mp = onnx.ModelProto()
out_mp.CopyFrom(in_mp)
out_mp.ir_version = 5 # update ir version to avoid requirement of initializer in graph input
ensure_opset(out_mp, 10) # bump up to ONNX opset 10, which is required for MatMulInteger
onnx_opset_ver = ensure_opset(out_mp, 10) # bump up to ONNX opset 10, which is required for MatMulInteger
ensure_opset(out_mp, 1, 'com.microsoft') # add MS domain for MatMulInteger16
out_mp.graph.ClearField('node')
nf = NodeFactory(out_mp.graph)
Expand All @@ -249,7 +253,7 @@ def convert_matmul_model(input_model, output_model, only_for_scan=False, share_i
continue

if in_n.op_type == 'MatMul' and not only_for_scan:
if quantize_matmul_2d_with_weight(in_n, in_mp.graph, nf, converted_weights, quantized_inputs, qcfg_dict, export_qcfg_json, default_qcfg):
if quantize_matmul_2d_with_weight(in_n, in_mp.graph, nf, converted_weights, quantized_inputs, qcfg_dict, export_qcfg_json, default_qcfg, onnx_opset_ver):
continue

out_n = out_mp.graph.node.add()
Expand All @@ -262,7 +266,7 @@ def convert_matmul_model(input_model, output_model, only_for_scan=False, share_i
subgraph_quantized_inputs = {} if share_input_quantization else None # remember quantized inputs that might be able to share between MatMuls
for in_sn in in_subgraph.node:
if in_sn.op_type == 'MatMul':
if quantize_matmul_2d_with_weight(in_sn, in_subgraph, scan_nf, converted_weights, subgraph_quantized_inputs, qcfg_dict, export_qcfg_json, default_qcfg):
if quantize_matmul_2d_with_weight(in_sn, in_subgraph, scan_nf, converted_weights, subgraph_quantized_inputs, qcfg_dict, export_qcfg_json, default_qcfg, onnx_opset_ver):
continue

if upgrade_op(scan_nf, in_sn):
Expand Down
52 changes: 27 additions & 25 deletions onnxruntime/core/providers/nuphar/scripts/symbolic_shape_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,12 +230,16 @@ def _merge_symbols(self, dims):
if self.auto_merge_:
assert len(dims) == 2 # only allow symbol->int merge in binary ops for now
is_int = [is_literal(d) for d in dims]
assert sum(is_int) == 1
int_dim = is_int.index(1)
if self.verbose_ > 0:
print('dim {} has been merged with value {}'.format(dims[1 - int_dim], dims[int_dim]))
self._check_merged_dims(dims, allow_broadcast=False)
return dims[int_dim]
if sum(is_int) == 1:
int_dim = is_int.index(1)
if self.verbose_ > 0:
print('dim {} has been merged with value {}'.format(dims[1 - int_dim], dims[int_dim]))
self._check_merged_dims(dims, allow_broadcast=False)
return dims[int_dim]
else:
if self.verbose_ > 0:
print('dim {} has been mergd with dim {}'.format(dims[0], dims[1]))
return dims[0]
else:
return None
if all([d == dims[0] for d in dims]):
Expand Down Expand Up @@ -645,12 +649,10 @@ def _infer_ConstantOfShape(self, node):
def _infer_Expand(self, node):
expand_to_shape = self._try_get_value(node, 1)
if expand_to_shape is not None:
sympy_shape = self._get_sympy_shape(node, 0)
new_sympy_shape = self._broadcast_shapes(sympy_shape, expand_to_shape)

# new_shape's dim can come from 'Expand' computation
self._update_computed_dims(new_sympy_shape)
new_shape = get_shape_from_sympy_shape(new_sympy_shape)
# new_shape's dim can come from shape value
self._update_computed_dims(expand_to_shape)
shape = self._get_shape(node, 0)
new_shape = self._broadcast_shapes(shape, get_shape_from_sympy_shape(expand_to_shape))
vi = self.known_vi_[node.output[0]]
vi.CopyFrom(helper.make_tensor_value_info(node.output[0], self.known_vi_[node.input[0]].type.tensor_type.elem_type, new_shape))

Expand Down Expand Up @@ -780,13 +782,13 @@ def _infer_Pad(self, node):
rank = len(sympy_shape)
if pads is not None:
assert len(pads) == 2*rank
new_shape = [d + pad_up + pad_down for d, pad_up, pad_down in zip(sympy_shape, pads[:rank], pads[rank:])]
self._update_computed_dims(new_shape)
new_sympy_shape = [d + pad_up + pad_down for d, pad_up, pad_down in zip(sympy_shape, pads[:rank], pads[rank:])]
self._update_computed_dims(new_sympy_shape)
else:
# dynamic pads, create new symbolic dimensions
new_shape = self._new_symbolic_shape(rank, node)
new_sympy_shape = self._new_symbolic_shape(rank, node)
output_tp = self.known_vi_[node.input[0]].type.tensor_type.elem_type
vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_tp, get_shape_from_sympy_shape(new_shape)))
vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_tp, get_shape_from_sympy_shape(new_sympy_shape)))

def _infer_Pool(self, node):
sympy_shape = self._compute_conv_pool_shape(node)
Expand All @@ -804,12 +806,12 @@ def _infer_Range(self, node):
start = as_scalar(input_data[0])
limit = as_scalar(input_data[1])
delta = as_scalar(input_data[2])
new_shape = [sympy.Max(sympy.ceiling((limit - start)/delta), 0)]
new_sympy_shape = [sympy.Max(sympy.ceiling((limit - start)/delta), 0)]
else:
new_dim = self._new_symbolic_dim_from_output(node)
new_shape = [self.symbolic_dims_[new_dim]]
self._update_computed_dims(new_shape)
vi.CopyFrom(helper.make_tensor_value_info(node.output[0], self.known_vi_[node.input[0]].type.tensor_type.elem_type, get_shape_from_sympy_shape(new_shape)))
new_sympy_shape = [self.symbolic_dims_[new_dim]]
self._update_computed_dims(new_sympy_shape)
vi.CopyFrom(helper.make_tensor_value_info(node.output[0], self.known_vi_[node.input[0]].type.tensor_type.elem_type, get_shape_from_sympy_shape(new_sympy_shape)))

def _infer_ReduceProd(self, node):
axes = get_attribute(node, 'axes')
Expand Down Expand Up @@ -1042,15 +1044,15 @@ def _infer_Squeeze(self, node):
def _infer_Tile(self, node):
repeats_value = self._get_value(node, 1)
input_sympy_shape = self._get_sympy_shape(node, 0)
new_shape = []
new_sympy_shape = []
for i,d in enumerate(input_sympy_shape):
new_dim = d * repeats_value[i]
new_shape.append(new_dim)
self._update_computed_dims(new_shape)
new_sympy_shape.append(new_dim)
self._update_computed_dims(new_sympy_shape)
vi = self.known_vi_[node.output[0]]
vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
vi.type.tensor_type.elem_type,
get_shape_from_sympy_shape(new_shape)))
get_shape_from_sympy_shape(new_sympy_shape)))

def _infer_TopK(self, node):
rank = self._get_shape_rank(node, 0)
Expand Down Expand Up @@ -1268,4 +1270,4 @@ def parse_arguments():
print('output model ' + args.output)
print('Doing symbolic shape inference...')
out_mp = SymbolicShapeInference.infer_shapes(args.input, args.output, args.int_max, args.auto_merge, args.guess_output_rank, args.verbose)
print('Done!')
print('Done!')