From 5faa211476eb1506bb9813293518af8417a8ce5e Mon Sep 17 00:00:00 2001
From: Wei Chu <weichu@amazon.com>
Date: Mon, 22 Mar 2021 13:25:01 -0700
Subject: [PATCH 1/7] split test_onnxruntime.py

---
 ci/docker/runtime_functions.sh                |   36 +-
 tests/nightly/JenkinsfileForBinaries          |   14 +-
 tests/python-pytest/onnx/test_onnxruntime.py  | 1233 -----------------
 .../python-pytest/onnx/test_onnxruntime_cv.py |  534 +++++++
 .../onnx/test_onnxruntime_nlp.py              |  427 ++++++
 5 files changed, 994 insertions(+), 1250 deletions(-)
 create mode 100644 tests/python-pytest/onnx/test_onnxruntime_cv.py
 create mode 100644 tests/python-pytest/onnx/test_onnxruntime_nlp.py

diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 9bfc8418b6ad..3b0270fd1311 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -1263,18 +1263,17 @@ integrationtest_ubuntu_cpu_onnx() {
     # Skip this as https://github.com/apache/incubator-mxnet/pull/19914 breaks import
     #pytest $COV_ARG --verbose tests/python-pytest/onnx/test_models.py
     #pytest $COV_ARG --verbose tests/python-pytest/onnx/test_node.py
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime.py::test_bert_inference_onnxruntime
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime.py::test_obj_class_model_inference_onnxruntime[mobilenetv3_large]
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime.py::test_obj_class_model_inference_onnxruntime[resnest200]
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime.py::test_obj_class_model_inference_onnxruntime[resnet50_v2]
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime.py::test_obj_class_model_inference_onnxruntime[vgg19_bn]
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime.py::test_obj_detection_model_inference_onnxruntime[center_net_resnet101_v1b_voc]
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime.py::test_img_segmentation_model_inference_onnxruntime[deeplab_resnet50_citys]
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime.py::test_pose_estimation_model_inference_onnxruntime[mobile_pose_mobilenet1.0]
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime.py::test_action_recognition_model_inference_onnxruntime[inceptionv3_kinetics400]
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime.py::test_dynamic_shape_bert_inference_onnxruntime
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime.py::test_dynamic_shape_cv_inference_onnxruntime
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime.py::test_transformer_pretrained_inference_onnxruntime
+    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_nlp.py::test_bert_inference_onnxruntime
+    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_obj_class_model_inference_onnxruntime[mobilenetv3_large]
+    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_obj_class_model_inference_onnxruntime[resnest200]
+    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_obj_class_model_inference_onnxruntime[resnet50_v2]
+    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_obj_class_model_inference_onnxruntime[vgg19_bn]
+    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_obj_detection_model_inference_onnxruntime[center_net_resnet101_v1b_voc]
+    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_img_segmentation_model_inference_onnxruntime[deeplab_resnet50_citys]
+    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_pose_estimation_model_inference_onnxruntime[mobile_pose_mobilenet1.0]
+    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_action_recognition_model_inference_onnxruntime[inceptionv3_kinetics400]
+    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_nlp.py::test_dynamic_shape_bert_inference_onnxruntime
+    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_dynamic_shape_cv_inference_onnxruntime
 }
 
 integrationtest_ubuntu_gpu_python() {
@@ -1604,13 +1603,22 @@ nightly_estimator() {
     nosetests test_sentiment_rnn.py
 }
 
-nightly_onnx_tests() {
+nightly_onnx_cv_tests() {
     set -ex
     export PYTHONPATH=./python/
     export MXNET_SUBGRAPH_VERBOSE=0
     export DMLC_LOG_STACK_TRACE_DEPTH=10
     COV_ARG="--cov=./ --cov-report=xml --cov-append"
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime.py
+    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py
+}
+
+nightly_onnx_nlp_tests() {
+    set -ex
+    export PYTHONPATH=./python/
+    export MXNET_SUBGRAPH_VERBOSE=0
+    export DMLC_LOG_STACK_TRACE_DEPTH=10
+    COV_ARG="--cov=./ --cov-report=xml --cov-append"
+    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_nlp.py
 }
 
 # For testing PRs
diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries
index 00f543fb58cb..26d92010ba9a 100755
--- a/tests/nightly/JenkinsfileForBinaries
+++ b/tests/nightly/JenkinsfileForBinaries
@@ -94,11 +94,19 @@ core_logic: {
         }
       }
     },
-    'ONNX: CPU': {
+    'ONNX-CV: CPU': {
       node(NODE_LINUX_CPU) {
-        ws('workspace/onnx-test-cpu') {
+        ws('workspace/onnx-cv-test-cpu') {
           utils.unpack_and_init('cpu_int64', mx_cmake_lib)
-          utils.docker_run('ubuntu_nightly_cpu', 'nightly_onnx_tests', false)
+          utils.docker_run('ubuntu_nightly_cpu', 'nightly_onnx_cv_tests', false)
+        }
+      }
+    }
+    'ONNX-NLP: CPU': {
+      node(NODE_LINUX_CPU) {
+        ws('workspace/onnx-nlp-test-cpu') {
+          utils.unpack_and_init('cpu_int64', mx_cmake_lib)
+          utils.docker_run('ubuntu_nightly_cpu', 'nightly_onnx_nlp_tests', false)
         }
       }
     }
diff --git a/tests/python-pytest/onnx/test_onnxruntime.py b/tests/python-pytest/onnx/test_onnxruntime.py
index 6ad0794f875d..e69de29bb2d1 100644
--- a/tests/python-pytest/onnx/test_onnxruntime.py
+++ b/tests/python-pytest/onnx/test_onnxruntime.py
@@ -1,1233 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import mxnet as mx
-import numpy as np
-import gluoncv
-import onnxruntime
-
-from mxnet.test_utils import assert_almost_equal
-from common import with_seed
-
-import json
-import os
-import pytest
-import shutil
-
-
-
-class GluonModel():
-    def __init__(self, model_name, input_shape, input_dtype, tmpdir):
-        self.model_name = model_name
-        self.input_shape = input_shape
-        self.input_dtype = input_dtype
-        self.modelpath = os.path.join(tmpdir, model_name)
-        self.ctx = mx.cpu(0)
-        self.get_model()
-        self.export()
-
-    def get_model(self):
-        self.model = gluoncv.model_zoo.get_model(self.model_name, pretrained=True, ctx=self.ctx)
-        self.model.hybridize()
-
-    def export(self):
-        data = mx.nd.zeros(self.input_shape, dtype=self.input_dtype, ctx=self.ctx)
-        self.model.forward(data)
-        self.model.export(self.modelpath, 0)
-
-    def export_onnx(self):
-        onnx_file = self.modelpath + ".onnx"
-        mx.contrib.onnx.export_model(self.modelpath + "-symbol.json", self.modelpath + "-0000.params",
-                                     [self.input_shape], self.input_dtype, onnx_file)
-        return onnx_file
-
-    def export_onnx_dynamic(self, dynamic_input_shapes):
-        onnx_file = self.modelpath + ".onnx"
-        mx.contrib.onnx.export_model(self.modelpath + "-symbol.json", self.modelpath + "-0000.params",
-                                     [self.input_shape], self.input_dtype, onnx_file, dynamic=True,
-                                     dynamic_input_shapes=dynamic_input_shapes)
-        return onnx_file
-
-    def predict(self, data):
-        return self.model(data)
-
-
-
-@with_seed()
-@pytest.mark.parametrize('model', ['bert_12_768_12'])
-def test_bert_inference_onnxruntime(tmp_path, model):
-    tmp_path = str(tmp_path)
-    try:
-        import gluonnlp as nlp
-        dataset = 'book_corpus_wiki_en_uncased'
-        ctx = mx.cpu(0)
-        model, vocab = nlp.model.get_model(
-            name=model,
-            ctx=ctx,
-            dataset_name=dataset,
-            pretrained=False,
-            use_pooler=True,
-            use_decoder=False,
-            use_classifier=False)
-        model.initialize(ctx=ctx)
-        model.hybridize(static_alloc=True)
-
-        batch = 5
-        seq_length = 16
-        # create synthetic test data
-        inputs = mx.nd.random.uniform(0, 30522, shape=(batch, seq_length), dtype='float32')
-        token_types = mx.nd.random.uniform(0, 2, shape=(batch, seq_length), dtype='float32')
-        valid_length = mx.nd.array([seq_length] * batch, dtype='float32')
-
-        seq_encoding, cls_encoding = model(inputs, token_types, valid_length)
-
-        prefix = "%s/bert" % tmp_path
-        model.export(prefix)
-        sym_file = "%s-symbol.json" % prefix
-        params_file = "%s-0000.params" % prefix
-        onnx_file = "%s.onnx" % prefix
-
-
-        input_shapes = [(batch, seq_length), (batch, seq_length), (batch,)]
-        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes, np.float32, onnx_file)
-
-
-        # create onnxruntime session using the generated onnx file
-        ses_opt = onnxruntime.SessionOptions()
-        ses_opt.log_severity_level = 3
-        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
-        onnx_inputs = [inputs, token_types, valid_length]
-        input_dict = dict((session.get_inputs()[i].name, onnx_inputs[i].asnumpy()) for i in range(len(onnx_inputs)))
-        pred_onx, cls_onx = session.run(None, input_dict)
-
-        assert_almost_equal(seq_encoding, pred_onx, rtol=0.01, atol=0.01)
-        assert_almost_equal(cls_encoding, cls_onx, rtol=0.01, atol=0.01)
-
-    finally:
-        shutil.rmtree(tmp_path)
-
-
-
-@pytest.fixture(scope="session")
-def obj_class_test_images(tmpdir_factory):
-    tmpdir = tmpdir_factory.mktemp("obj_class_data")
-    from urllib.parse import urlparse
-    test_image_urls = [
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/bikers.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/car.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/dancer.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/duck.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/fieldhockey.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/flower.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/runners.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/shark.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/soccer2.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/tree.jpg',
-    ]
-    paths = []
-    for url in test_image_urls:
-        fn = os.path.join(tmpdir, os.path.basename(urlparse(url).path))
-        mx.test_utils.download(url, fname=fn)
-        paths.append(fn)
-    return paths
-
-@pytest.mark.parametrize('model', [
-    'alexnet',
-    'cifar_resnet20_v1',
-    'cifar_resnet56_v1',
-    'cifar_resnet110_v1',
-    'cifar_resnet20_v2',
-    'cifar_resnet56_v2',
-    'cifar_resnet110_v2',
-    'cifar_wideresnet16_10',
-    'cifar_wideresnet28_10',
-    'cifar_wideresnet40_8',
-    'cifar_resnext29_16x64d',
-    'darknet53',
-    'densenet121',
-    'densenet161',
-    'densenet169',
-    'densenet201',
-    'googlenet',
-    'mobilenet1.0',
-    'mobilenet0.75',
-    'mobilenet0.5',
-    'mobilenet0.25',
-    'mobilenetv2_1.0',
-    'mobilenetv2_0.75',
-    'mobilenetv2_0.5',
-    'mobilenetv2_0.25',
-    'mobilenetv3_large',
-    'mobilenetv3_small',
-    'resnest14',
-    'resnest26',
-    'resnest50',
-    'resnest101',
-    'resnest200',
-    'resnest269',
-    'resnet18_v1',
-    'resnet18_v1b_0.89',
-    'resnet18_v2',
-    'resnet34_v1',
-    'resnet34_v2',
-    'resnet50_v1',
-    'resnet50_v1d_0.86',
-    'resnet50_v1d_0.48',
-    'resnet50_v1d_0.37',
-    'resnet50_v1d_0.11',
-    'resnet50_v2',
-    'resnet101_v1',
-    'resnet101_v1d_0.76',
-    'resnet101_v1d_0.73',
-    'resnet101_v2',
-    'resnet152_v1',
-    'resnet152_v2',
-    'resnext50_32x4d',
-    'resnext101_32x4d',
-    'resnext101_64x4d',
-    'senet_154',
-    'se_resnext101_32x4d',
-    'se_resnext101_64x4d',
-    'se_resnext50_32x4d',
-    'squeezenet1.0',
-    'squeezenet1.1',
-    'vgg11',
-    'vgg11_bn',
-    'vgg13',
-    'vgg13_bn',
-    'vgg16',
-    'vgg16_bn',
-    'vgg19',
-    'vgg19_bn',
-    'xception',
-    'inceptionv3'
-])
-def test_obj_class_model_inference_onnxruntime(tmp_path, model, obj_class_test_images):
-    inlen = 299 if 'inceptionv3' == model else 224
-    def normalize_image(imgfile):
-        img_data = mx.image.imread(imgfile)
-        img_data = mx.image.imresize(img_data, inlen, inlen)
-        img_data = img_data.transpose([2, 0, 1]).astype('float32')
-        mean_vec = mx.nd.array([0.485, 0.456, 0.406])
-        stddev_vec = mx.nd.array([0.229, 0.224, 0.225])
-        norm_img_data = mx.nd.zeros(img_data.shape).astype('float32')
-        for i in range(img_data.shape[0]):
-            norm_img_data[i,:,:] = (img_data[i,:,:]/255 - mean_vec[i]) / stddev_vec[i]
-        return norm_img_data.reshape(1, 3, inlen, inlen).astype('float32')
-
-    try:
-        tmp_path = str(tmp_path)
-        M = GluonModel(model, (1,3,inlen,inlen), 'float32', tmp_path)
-        onnx_file = M.export_onnx()
-
-        # create onnxruntime session using the generated onnx file
-        ses_opt = onnxruntime.SessionOptions()
-        ses_opt.log_severity_level = 3
-        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
-        input_name = session.get_inputs()[0].name
-
-        for img in obj_class_test_images:
-            img_data = normalize_image(img)
-            mx_result = M.predict(img_data)
-            onnx_result = session.run([], {input_name: img_data.asnumpy()})[0]
-            assert_almost_equal(mx_result, onnx_result)
-
-    finally:
-        shutil.rmtree(tmp_path)
-
-
-@pytest.fixture(scope="session")
-def obj_detection_test_images(tmpdir_factory):
-    tmpdir = tmpdir_factory.mktemp("obj_det_data")
-    from urllib.parse import urlparse
-    test_image_urls = [
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/car.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/duck.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/fieldhockey.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/flower.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/runners.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/shark.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/soccer2.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/tree.jpg',
-    ]
-    paths = []
-    for url in test_image_urls:
-        fn = os.path.join(tmpdir, os.path.basename(urlparse(url).path))
-        mx.test_utils.download(url, fname=fn)
-        paths.append(fn)
-    return paths
-
-
-@pytest.mark.parametrize('model', [
-    'center_net_resnet18_v1b_voc',
-    'center_net_resnet50_v1b_voc',
-    'center_net_resnet101_v1b_voc',
-    'center_net_resnet18_v1b_coco',
-    'center_net_resnet50_v1b_coco',
-    'center_net_resnet101_v1b_coco',
-    'ssd_300_vgg16_atrous_voc',
-    'ssd_512_vgg16_atrous_voc',
-    'ssd_512_resnet50_v1_voc',
-    'ssd_512_mobilenet1.0_voc',
-    'faster_rcnn_resnet50_v1b_voc',
-    'yolo3_darknet53_voc',
-    'yolo3_mobilenet1.0_voc',
-    'ssd_300_vgg16_atrous_coco',
-    'ssd_512_vgg16_atrous_coco',
-    # 'ssd_300_resnet34_v1b_coco', #cannot import
-    'ssd_512_resnet50_v1_coco',
-    'ssd_512_mobilenet1.0_coco',
-    'faster_rcnn_resnet50_v1b_coco',
-    'faster_rcnn_resnet101_v1d_coco',
-    'yolo3_darknet53_coco',
-    'yolo3_mobilenet1.0_coco',
-])
-def test_obj_detection_model_inference_onnxruntime(tmp_path, model, obj_detection_test_images):
-    def assert_obj_detetion_result(mx_ids, mx_scores, mx_boxes,
-                                   onnx_ids, onnx_scores, onnx_boxes,
-                                   score_thresh=0.6, score_tol=1e-4):
-        def assert_bbox(mx_boxe, onnx_boxe, box_tol=1e-2):
-            def assert_scalar(a, b, tol=box_tol):
-                return np.abs(a-b) <= tol
-            return assert_scalar(mx_boxe[0], onnx_boxe[0]) and assert_scalar(mx_boxe[1], onnx_boxe[1]) \
-                      and assert_scalar(mx_boxe[2], onnx_boxe[2]) and assert_scalar(mx_boxe[3], onnx_boxe[3])
-
-        found_match = False
-        for i in range(len(onnx_ids)):
-            onnx_id = onnx_ids[i][0]
-            onnx_score = onnx_scores[i][0]
-            onnx_boxe = onnx_boxes[i]
-
-            if onnx_score < score_thresh:
-                break
-            for j in range(len(mx_ids)):
-                mx_id = mx_ids[j].asnumpy()[0]
-                mx_score = mx_scores[j].asnumpy()[0]
-                mx_boxe = mx_boxes[j].asnumpy()
-                # check socre 
-                if onnx_score < mx_score - score_tol:
-                    continue
-                if onnx_score > mx_score + score_tol:
-                    return False
-                # check id
-                if onnx_id != mx_id:
-                    continue
-                # check bounding box
-                if assert_bbox(mx_boxe, onnx_boxe):
-                    found_match = True
-                    break
-            if not found_match:
-                return False
-            found_match = False
-        return True
-
-    def normalize_image(imgfile):
-        img = mx.image.imread(imgfile)
-        img, _ = mx.image.center_crop(img, size=(512, 512))
-        img, _ = gluoncv.data.transforms.presets.center_net.transform_test(img, short=512)
-        return img
-
-    try:
-        tmp_path = str(tmp_path)
-        M = GluonModel(model, (1,3,512,512), 'float32', tmp_path)
-        onnx_file = M.export_onnx()
-        # create onnxruntime session using the generated onnx file
-        ses_opt = onnxruntime.SessionOptions()
-        ses_opt.log_severity_level = 3
-        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
-        input_name = session.get_inputs()[0].name
-
-        for img in obj_detection_test_images:
-            img_data = normalize_image(img)
-            mx_class_ids, mx_scores, mx_boxes = M.predict(img_data)
-            # center_net_resnet models have different output format
-            if 'center_net_resnet' in model:
-                onnx_scores, onnx_class_ids, onnx_boxes = session.run([], {input_name: img_data.asnumpy()})
-                assert_almost_equal(mx_class_ids, onnx_class_ids)
-                assert_almost_equal(mx_scores, onnx_scores)
-                assert_almost_equal(mx_boxes, onnx_boxes)
-            else:
-                onnx_class_ids, onnx_scores, onnx_boxes = session.run([], {input_name: img_data.asnumpy()})
-                if not assert_obj_detetion_result(mx_class_ids[0], mx_scores[0], mx_boxes[0], \
-                        onnx_class_ids[0], onnx_scores[0], onnx_boxes[0]):
-                    raise AssertionError("Assertion error on model: " + model)
-
-    finally:
-        shutil.rmtree(tmp_path)
-
-@pytest.fixture(scope="session")
-def img_segmentation_test_images(tmpdir_factory):
-    tmpdir = tmpdir_factory.mktemp("img_seg_data")
-    from urllib.parse import urlparse
-    test_image_urls = [
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/bikers.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/car.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/dancer.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/duck.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/fieldhockey.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/flower.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/runners.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/shark.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/soccer2.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/tree.jpg',
-    ]
-    paths = []
-    for url in test_image_urls:
-        fn = os.path.join(tmpdir, os.path.basename(urlparse(url).path))
-        mx.test_utils.download(url, fname=fn)
-        paths.append(fn)
-    return paths
-
-@pytest.mark.parametrize('model', [
-    'fcn_resnet50_ade',
-    'fcn_resnet101_ade',
-    'deeplab_resnet50_ade',
-    'deeplab_resnet101_ade',
-    'deeplab_resnest50_ade',
-    'deeplab_resnest101_ade',
-    'deeplab_resnest200_ade',
-    'deeplab_resnest269_ade',
-    'fcn_resnet101_coco',
-    'deeplab_resnet101_coco',
-    'fcn_resnet101_voc',
-    'deeplab_resnet101_voc',
-    'deeplab_resnet152_voc',
-    'deeplab_resnet50_citys',
-    'deeplab_resnet101_citys',
-    'deeplab_v3b_plus_wideresnet_citys'
-])
-def test_img_segmentation_model_inference_onnxruntime(tmp_path, model, img_segmentation_test_images):
-    def normalize_image(imgfile):
-        img = mx.image.imread(imgfile).astype('float32')
-        img, _ = mx.image.center_crop(img, size=(480, 480))
-        img = gluoncv.data.transforms.presets.segmentation.test_transform(img, mx.cpu(0))
-        return img
-
-
-    try:
-        tmp_path = str(tmp_path)
-        M = GluonModel(model, (1,3,480,480), 'float32', tmp_path)
-        onnx_file = M.export_onnx()
-        # create onnxruntime session using the generated onnx file
-        ses_opt = onnxruntime.SessionOptions()
-        ses_opt.log_severity_level = 3
-        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
-        input_name = session.get_inputs()[0].name
-
-        for img in img_segmentation_test_images:
-            img_data = normalize_image(img)
-            mx_result = M.predict(img_data)
-            onnx_result = session.run([], {input_name: img_data.asnumpy()})
-            assert(len(mx_result) == len(onnx_result))
-            for i in range(len(mx_result)):
-                assert_almost_equal(mx_result[i], onnx_result[i])
-
-    finally:
-        shutil.rmtree(tmp_path)
-
-
-@pytest.fixture(scope="session")
-def pose_estimation_test_images(tmpdir_factory):
-    tmpdir = tmpdir_factory.mktemp("pose_est_data")
-    from urllib.parse import urlparse
-    test_image_urls = [
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/bikers.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/dancer.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/fieldhockey.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/runners.jpg',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/soccer2.jpg',
-    ]
-    paths = []
-    for url in test_image_urls:
-        fn = os.path.join(tmpdir, os.path.basename(urlparse(url).path))
-        mx.test_utils.download(url, fname=fn)
-        paths.append(fn)
-    return paths
-
-@pytest.mark.parametrize('model', [
-    'simple_pose_resnet18_v1b',
-    'simple_pose_resnet50_v1b',
-    'simple_pose_resnet50_v1d',
-    'simple_pose_resnet101_v1b',
-    'simple_pose_resnet101_v1d',
-    'simple_pose_resnet152_v1b',
-    'simple_pose_resnet152_v1d',
-    'alpha_pose_resnet101_v1b_coco',
-    'mobile_pose_resnet18_v1b',
-    'mobile_pose_resnet50_v1b',
-    'mobile_pose_mobilenet1.0',
-    'mobile_pose_mobilenetv2_1.0',
-    'mobile_pose_mobilenetv3_large',
-    'mobile_pose_mobilenetv3_small',
-])
-def test_pose_estimation_model_inference_onnxruntime(tmp_path, model, pose_estimation_test_images):
-    def normalize_image(imgfile):
-        img = mx.image.imread(imgfile).astype('float32')
-        img, _ = mx.image.center_crop(img, size=(512, 512))
-        img = gluoncv.data.transforms.presets.segmentation.test_transform(img, mx.cpu(0))
-        return img
-
-    try:
-        tmp_path = str(tmp_path)
-        M = GluonModel(model, (1,3,512,512), 'float32', tmp_path)
-        onnx_file = M.export_onnx()
-        # create onnxruntime session using the generated onnx file
-        ses_opt = onnxruntime.SessionOptions()
-        ses_opt.log_severity_level = 3
-        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
-        input_name = session.get_inputs()[0].name
-
-        for img in pose_estimation_test_images:
-            img_data = normalize_image(img)
-            mx_result = M.predict(img_data)
-            onnx_result = session.run([], {input_name: img_data.asnumpy()})
-            assert(len(mx_result) == len(onnx_result))
-            for i in range(len(mx_result)):
-                assert_almost_equal(mx_result[i], onnx_result[i])
-
-    finally:
-        shutil.rmtree(tmp_path)
-
-@pytest.fixture(scope="session")
-def act_recognition_test_data(tmpdir_factory):
-    tmpdir = tmpdir_factory.mktemp("act_rec_data")
-    from urllib.parse import urlparse
-    test_image_urls = [
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/actions/biking.rec',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/actions/diving.rec',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/actions/golfing.rec',
-        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/actions/sledding.rec',
-    ]
-    paths = []
-    for url in test_image_urls:
-        fn = os.path.join(tmpdir, os.path.basename(urlparse(url).path))
-        mx.test_utils.download(url, fname=fn)
-        paths.append(fn)
-    return paths
-
-@pytest.mark.parametrize('model', [
-    'inceptionv1_kinetics400',
-    'resnet18_v1b_kinetics400',
-    'resnet34_v1b_kinetics400',
-    'resnet50_v1b_kinetics400',
-    'resnet101_v1b_kinetics400',
-    'resnet152_v1b_kinetics400',
-    'resnet50_v1b_hmdb51',
-    'resnet50_v1b_sthsthv2',
-    'vgg16_ucf101',
-    'inceptionv3_kinetics400',
-    'inceptionv3_ucf101',
-])
-def test_action_recognition_model_inference_onnxruntime(tmp_path, model, act_recognition_test_data):
-    batch_size = 64
-    input_len = 224
-    if 'inceptionv3' in model:
-        input_len = 340
-
-    def load_video(filepath):
-        iterator = mx.image.ImageIter(batch_size=batch_size, data_shape=(3,input_len,input_len), path_imgrec=filepath)
-        for batch in iterator:
-            return batch.data[0]
-
-    try:
-        tmp_path = str(tmp_path)
-        M = GluonModel(model, (batch_size,3,input_len,input_len), 'float32', tmp_path)
-        onnx_file = M.export_onnx()
-        # create onnxruntime session using the generated onnx file
-        ses_opt = onnxruntime.SessionOptions()
-        ses_opt.log_severity_level = 3
-        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
-        input_name = session.get_inputs()[0].name
-
-        for video in act_recognition_test_data:
-            data = load_video(video)
-            mx_result = M.predict(data)
-            onnx_result = session.run([], {input_name: data.asnumpy()})[0]
-            assert_almost_equal(mx_result, onnx_result, rtol=0.001, atol=0.01)
-
-    finally:
-        shutil.rmtree(tmp_path)
-
-
-@with_seed()
-@pytest.mark.parametrize('model_name', ['roberta_24_1024_16', 'roberta_12_768_12'])
-def test_roberta_inference_onnxruntime(tmp_path, model_name):
-    tmp_path = str(tmp_path)
-    try:
-        import gluonnlp as nlp
-        ctx = mx.cpu(0)
-
-        dataset= 'openwebtext_ccnews_stories_books_cased'#'book_corpus_wiki_en_uncased'
-        model, _ = nlp.model.get_model(
-        name=model_name,
-        ctx=ctx,
-        pretrained=True,
-        use_decoder=True,
-        dataset_name=dataset)
-        
-        model.hybridize(static_alloc=False)
-
-        batch = 2
-        seq_length = 32
-        num_masked_positions = 1
-        inputs = mx.nd.random.uniform(0, 30522, shape=(batch, seq_length), dtype='float32', ctx=ctx)
-        valid_length = mx.nd.array([seq_length] * batch, dtype='float32', ctx=ctx)
-        masked_positions = mx.nd.random.uniform(0, 32, shape=(batch, num_masked_positions),
-            dtype='float32', ctx=ctx).astype('int32')
-
-        sequence_outputs, attention_outputs= model(inputs, valid_length, masked_positions)    
-
-        prefix = "%s/roberta" % tmp_path
-        model.export(prefix)
-
-        sym_file = "%s-symbol.json" % prefix
-        params_file = "%s-0000.params" % prefix
-        onnx_file = "%s.onnx" % prefix
-        input_shapes = [(batch, seq_length), (batch,), (batch, num_masked_positions)]
-        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes,
-                                                            [np.float32, np.float32, np.int32],
-                                                            onnx_file, verbose=True)
-
-        sess_options = onnxruntime.SessionOptions()
-        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
-        sess = onnxruntime.InferenceSession(onnx_file, sess_options)
-
-        in_tensors = [inputs, valid_length, masked_positions]
-        input_dict = dict((sess.get_inputs()[i].name, in_tensors[i].asnumpy()) for i in range(len(in_tensors)))
-        pred = sess.run(None, input_dict)
-
-        assert_almost_equal(sequence_outputs, pred[0])
-        assert_almost_equal(attention_outputs, pred[1])
-
-    finally:
-        shutil.rmtree(tmp_path)
-
-
-@with_seed()
-@pytest.mark.parametrize('model', ['bert_12_768_12', 'bert_24_1024_16'])
-def test_bert_inference_onnxruntime(tmp_path, model):
-    tmp_path = str(tmp_path)
-    try:
-        import gluonnlp as nlp
-        dataset = 'book_corpus_wiki_en_uncased'
-        ctx = mx.cpu(0)
-        model, vocab = nlp.model.get_model(
-            name=model,
-            ctx=ctx,
-            dataset_name=dataset,
-            pretrained=True,
-            use_pooler=True,
-            use_decoder=False,
-            use_classifier=False)
-
-        model.hybridize(static_alloc=True)
-
-        batch = 5
-        seq_length = 16
-        # create synthetic test data
-        inputs = mx.nd.random.uniform(0, 30522, shape=(batch, seq_length), dtype='float32')
-        token_types = mx.nd.random.uniform(0, 2, shape=(batch, seq_length), dtype='float32')
-        valid_length = mx.nd.array([seq_length] * batch, dtype='float32')
-
-        seq_encoding, cls_encoding = model(inputs, token_types, valid_length)
-
-        prefix = "%s/bert" % tmp_path
-        model.export(prefix)
-        sym_file = "%s-symbol.json" % prefix
-        params_file = "%s-0000.params" % prefix
-        onnx_file = "%s.onnx" % prefix
-
-
-        input_shapes = [(batch, seq_length), (batch, seq_length), (batch,)]
-        input_types = [np.float32, np.float32, np.float32]
-        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes, input_types, onnx_file)
-
-
-        # create onnxruntime session using the generated onnx file
-        ses_opt = onnxruntime.SessionOptions()
-        ses_opt.log_severity_level = 3
-        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
-        onnx_inputs = [inputs, token_types, valid_length]
-        input_dict = dict((session.get_inputs()[i].name, onnx_inputs[i].asnumpy()) for i in range(len(onnx_inputs)))
-        pred_onx, cls_onx = session.run(None, input_dict)
-
-        assert_almost_equal(seq_encoding, pred_onx)
-        assert_almost_equal(cls_encoding, cls_onx)
-
-    finally:
-        shutil.rmtree(tmp_path)
-
-
-@with_seed()
-@pytest.mark.parametrize('model_name', ['distilbert_6_768_12'])
-def test_distilbert_inference_onnxruntime(tmp_path, model_name):
-    tmp_path = str(tmp_path)
-    try:
-        import gluonnlp as nlp
-        dataset = 'distilbert_book_corpus_wiki_en_uncased'
-        ctx = mx.cpu(0)
-        model, _ = nlp.model.get_model(
-            name=model_name,
-            ctx=ctx,
-            pretrained=True,
-            dataset_name=dataset)
-
-        model.hybridize(static_alloc=True)
-
-        batch = 2
-        seq_length = 32
-        num_masked_positions = 1
-        inputs = mx.nd.random.uniform(0, 30522, shape=(batch, seq_length), dtype='float32', ctx=ctx)
-        valid_length = mx.nd.array([seq_length] * batch, dtype='float32', ctx=ctx)
-
-        sequence_outputs = model(inputs, valid_length)
-
-        prefix = "%s/distilbert" % tmp_path
-        model.export(prefix)
-        sym_file = "%s-symbol.json" % prefix
-        params_file = "%s-0000.params" % prefix
-        onnx_file = "%s.onnx" % prefix
-
-        input_shapes = [(batch, seq_length), (batch,)]
-        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes,
-                                                            [np.float32, np.float32],
-                                                            onnx_file, verbose=True)
-        sess_options = onnxruntime.SessionOptions()
-        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
-        sess = onnxruntime.InferenceSession(onnx_file, sess_options)
-
-        in_tensors = [inputs, valid_length]
-        input_dict = dict((sess.get_inputs()[i].name, in_tensors[i].asnumpy()) for i in range(len(in_tensors)))
-        pred = sess.run(None, input_dict)
-
-        assert_almost_equal(sequence_outputs, pred[0])
-
-    finally:
-        shutil.rmtree(tmp_path)
-
-
-@with_seed()
-@pytest.mark.parametrize('model_name', [('standard_lstm_lm_200', 200), ('standard_lstm_lm_650', 650),
-                                        ('standard_lstm_lm_1500', 1500)])
-@pytest.mark.parametrize('seq_length', [64, 128])
-def test_standard_rnn_lstm_pretrained_inference_onnxruntime(tmp_path, model_name, seq_length):
-    try:
-        import gluonnlp as nlp
-        ctx = mx.cpu()
-        dataset= 'wikitext-2'
-        model, _ = nlp.model.get_model(
-            name=model_name[0],
-            ctx=ctx,
-            pretrained=True,
-            dataset_name=dataset,
-            dropout=0)
-        model.hybridize()
-
-        batch = 2
-        num_hidden = model_name[1]
-        num_layers = 2
-        inputs = mx.nd.random.randint(0, 33278, shape=(seq_length, batch),
-                                      ctx=ctx).astype('float32')
-        begin_state = model.begin_state(func=mx.nd.random.uniform, low=0, high=1,
-                                        batch_size=batch, dtype='float32', ctx=ctx)
-        out, out_state= model(inputs, begin_state)
-
-        prefix = "%s/standard_rnn_lstm" % tmp_path
-        model.export(prefix)
-        sym_file = "%s-symbol.json" % prefix
-        params_file = "%s-0000.params" % prefix
-        onnx_file = "%s.onnx" % prefix
-
-        input_shapes = [(seq_length, batch), np.shape(begin_state[0]), np.shape(begin_state[1])]
-        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes,
-                                                            [np.float32, np.float32, np.float32],
-                                                            onnx_file, verbose=True)
-        sess_options = onnxruntime.SessionOptions()
-        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
-        sess = onnxruntime.InferenceSession(onnx_file, sess_options)
-
-        in_tensors = [inputs, begin_state[0], begin_state[1]]
-        input_dict = dict((sess.get_inputs()[i].name, in_tensors[i].asnumpy()) for i in range(len(in_tensors)))
-        pred = sess.run(None, input_dict)
-
-        assert_almost_equal(out, pred[2])
-        assert_almost_equal(out_state[0], pred[0])
-        assert_almost_equal(out_state[1], pred[1])
-
-    finally:
-        shutil.rmtree(tmp_path)
-
-
-@with_seed()
-@pytest.mark.parametrize('model_name', ['mobilenet1.0', 'inceptionv3', 'darknet53', 'resnest14'])
-def test_dynamic_shape_cv_inference_onnxruntime(tmp_path, model_name):
-    tmp_path = str(tmp_path)
-    try:
-        M = GluonModel(model_name, (1, 3, 512, 512), 'float32', tmp_path)
-        dynamic_input_shapes = [(None, 3, 512, 512)]
-        onnx_file = M.export_onnx_dynamic(dynamic_input_shapes)
-
-        # create onnxruntime session using the generated onnx file
-        ses_opt = onnxruntime.SessionOptions()
-        ses_opt.log_severity_level = 3
-        sess = onnxruntime.InferenceSession(onnx_file, ses_opt)
-
-        # test on a different batch size
-        x = mx.random.uniform(0, 10, (5, 3, 512, 512))
-        in_tensors = [x]
-        input_dict = dict((sess.get_inputs()[i].name, in_tensors[i].asnumpy()) for i in range(len(in_tensors)))
-        pred_on = sess.run(None, input_dict)
-
-        pred_mx = M.predict(x)
-
-        assert_almost_equal(pred_mx, pred_on[0])
-
-    finally:
-        shutil.rmtree(tmp_path)
-
-
-@with_seed()
-@pytest.mark.parametrize('model', ['bert_12_768_12'])
-def test_dynamic_shape_bert_inference_onnxruntime(tmp_path, model):
-    tmp_path = str(tmp_path)
-    try:
-        import gluonnlp as nlp
-        dataset = 'book_corpus_wiki_en_uncased'
-        ctx = mx.cpu(0)
-        model, vocab = nlp.model.get_model(
-            name=model,
-            ctx=ctx,
-            dataset_name=dataset,
-            pretrained=True,
-            use_pooler=True,
-            use_decoder=False,
-            num_layers = 3,
-            hparam_allow_override = True,
-            use_classifier=False)
-
-        model.hybridize(static_alloc=True)
-
-        batch = 5
-        seq_length = 16
-        # create synthetic test data
-        inputs = mx.nd.random.uniform(0, 30522, shape=(batch, seq_length), dtype='float32')
-        token_types = mx.nd.random.uniform(0, 2, shape=(batch, seq_length), dtype='float32')
-        valid_length = mx.nd.array([seq_length] * batch, dtype='float32')
-
-        seq_encoding, cls_encoding = model(inputs, token_types, valid_length)
-
-        prefix = "%s/bert" % tmp_path
-        model.export(prefix)
-        sym_file = "%s-symbol.json" % prefix
-        params_file = "%s-0000.params" % prefix
-        onnx_file = "%s.onnx" % prefix
-
-        dynamic_input_shapes = [(None, seq_length), (None, seq_length), (None,)]
-        input_shapes = [(batch, seq_length), (batch, seq_length), (batch,)]
-        input_types = [np.float32, np.float32, np.float32]
-        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes,
-                                                            input_types, onnx_file,
-                                                            dynamic=True,
-                                                            dynamic_input_shapes=dynamic_input_shapes)
-
-        # create onnxruntime session using the generated onnx file
-        ses_opt = onnxruntime.SessionOptions()
-        ses_opt.log_severity_level = 3
-        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
-
-        # test on a different batch size
-        batch = 7
-        seq_length = 16
-        inputs = mx.nd.random.uniform(0, 30522, shape=(batch, seq_length), dtype='float32')
-        token_types = mx.nd.random.uniform(0, 2, shape=(batch, seq_length), dtype='float32')
-        valid_length = mx.nd.array([seq_length] * batch, dtype='float32')
-
-        seq_encoding, cls_encoding = model(inputs, token_types, valid_length)
-
-        onnx_inputs = [inputs, token_types, valid_length]
-        input_dict = dict((session.get_inputs()[i].name, onnx_inputs[i].asnumpy()) for i in range(len(onnx_inputs)))
-        pred_onx, cls_onx = session.run(None, input_dict)
-
-        assert_almost_equal(seq_encoding, pred_onx)
-        assert_almost_equal(cls_encoding, cls_onx)
-
-    finally:
-        shutil.rmtree(tmp_path)
-
-
-@with_seed()
-@pytest.mark.parametrize('model_name', [('awd_lstm_lm_600', 600), ('awd_lstm_lm_1150', 1150)])
-@pytest.mark.parametrize('seq_length', [16, 128, 256])
-def test_awd_rnn_lstm_pretrained_inference_onnxruntime(tmp_path, model_name, seq_length):
-    try:
-        import gluonnlp as nlp
-        ctx = mx.cpu()
-        dataset= 'wikitext-2'
-        model, _ = nlp.model.get_model(
-            name=model_name[0],
-            ctx=ctx,
-            pretrained=True,
-            dataset_name=dataset,
-            dropout=0)
-        model.hybridize()
-
-        batch = 2
-        num_hidden = model_name[1]
-        num_layers = 2
-        inputs = mx.nd.random.randint(0, 33278, shape=(seq_length, batch),
-                                      ctx=ctx).astype('float32')
-        begin_state = model.begin_state(func=mx.nd.random.uniform, low=0, high=1,
-                                        batch_size=batch, dtype='float32', ctx=ctx)
-        out, out_state= model(inputs, begin_state)
-
-        prefix = "%s/awd_lstm" % tmp_path
-        model.export(prefix)
-        sym_file = "%s-symbol.json" % prefix
-        params_file = "%s-0000.params" % prefix
-        onnx_file = "%s.onnx" % prefix
-
-        input_shapes = [(seq_length, batch), 
-                        np.shape(begin_state[0][0]), np.shape(begin_state[0][1]),
-                        np.shape(begin_state[1][0]), np.shape(begin_state[1][1]),
-                        np.shape(begin_state[2][0]), np.shape(begin_state[2][1])]
-        input_types = [np.float32, np.float32, np.float32, np.float32, np.float32, np.float32,
-                       np.float32]
-        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes,
-                                                            input_types, onnx_file, verbose=True)
-
-        sess_options = onnxruntime.SessionOptions()
-        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
-        sess = onnxruntime.InferenceSession(onnx_file, sess_options)
-
-        in_tensors = [inputs, begin_state[0][0], begin_state[0][1],
-                      begin_state[1][0], begin_state[1][1],
-                      begin_state[2][0], begin_state[2][1]]
-        input_dict = dict((sess.get_inputs()[i].name, in_tensors[i].asnumpy()) for i in range(len(in_tensors)))
-        pred = sess.run(None, input_dict)
-
-        assert_almost_equal(out, pred[6])
-        assert_almost_equal(out_state[0][0], pred[0])
-        assert_almost_equal(out_state[0][1], pred[1])
-        assert_almost_equal(out_state[1][0], pred[2])
-        assert_almost_equal(out_state[1][1], pred[3])
-        assert_almost_equal(out_state[2][0], pred[4])
-        assert_almost_equal(out_state[2][1], pred[5])
-
-    finally:
-        shutil.rmtree(tmp_path)
-
-
-@with_seed()
-@pytest.mark.parametrize('model_name', ['ernie_12_768_12'])
-def test_ernie_inference_onnxruntime(tmp_path, model_name):
-    tmp_path = str(tmp_path)
-    try:
-        import gluonnlp as nlp
-        dataset = 'baidu_ernie_uncased'
-        ctx = mx.cpu(0)
-        model, vocab = nlp.model.get_model(
-            name=model_name,
-            ctx=ctx,
-            dataset_name=dataset,
-            pretrained=True,
-            use_pooler=True,
-            use_decoder=False,
-            num_layers = 3,
-            hparam_allow_override = True,
-            use_classifier=False)
-
-        model.hybridize(static_alloc=True)
-
-        batch = 5
-        seq_length = 16
-        # create synthetic test data
-        inputs = mx.nd.random.uniform(0, 17964, shape=(batch, seq_length), dtype='float32')
-        token_types = mx.nd.random.uniform(0, 2, shape=(batch, seq_length), dtype='float32')
-        valid_length = mx.nd.array([seq_length] * batch, dtype='float32')
-
-        seq_encoding, cls_encoding = model(inputs, token_types, valid_length)
-
-        prefix = "%s/ernie" % tmp_path
-        model.export(prefix)
-        sym_file = "%s-symbol.json" % prefix
-        params_file = "%s-0000.params" % prefix
-        onnx_file = "%s.onnx" % prefix
-
-        input_shapes = [(batch, seq_length), (batch, seq_length), (batch,)]
-        input_types = [np.float32, np.float32, np.float32]
-        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes,
-                                                            input_types, onnx_file)
-
-        # create onnxruntime session using the generated onnx file
-        ses_opt = onnxruntime.SessionOptions()
-        ses_opt.log_severity_level = 3
-        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
-
-        seq_encoding, cls_encoding = model(inputs, token_types, valid_length)
-
-        onnx_inputs = [inputs, token_types, valid_length]
-        input_dict = dict((session.get_inputs()[i].name, onnx_inputs[i].asnumpy()) for i in range(len(onnx_inputs)))
-        pred_onx, cls_onx = session.run(None, input_dict)
-
-        assert_almost_equal(seq_encoding, pred_onx)
-        assert_almost_equal(cls_encoding, cls_onx)
-
-    finally:
-        shutil.rmtree(tmp_path)
-
-
-@with_seed()
-@pytest.mark.parametrize('model_name', ['transformer_en_de_512'])
-def test_transformer_pretrained_inference_onnxruntime(tmp_path, model_name):
-    tmp_path = str(tmp_path)
-    try:
-        import gluonnlp as nlp
-        dataset = 'WMT2014'
-        ctx = mx.cpu(0)
-        model, _, _ = nlp.model.get_model(
-            name=model_name,
-            ctx=ctx,
-            pretrained=True,
-            dataset_name=dataset)
-
-        model.hybridize(static_alloc=False)
-
-        batch = 7
-        seq_length = 16
-        C_in = 512
-        C_out = 512
-        src = mx.nd.random.uniform(0, 36794, shape=(batch, seq_length), dtype='float32')
-        step_input = mx.nd.random.uniform(0, 36794, shape=(batch,), dtype='float32')
-        src_valid_length = mx.nd.array([seq_length] * batch, dtype='float32')
-
-        encoder_outputs, encoder_additional_outputs = model.encode(src,
-                                                                   valid_length=src_valid_length)
-
-        decoder_states = model.decoder.init_state_from_encoder(encoder_outputs, src_valid_length)
-
-        step_output, states, additional_outputs = model.decode_step(step_input, decoder_states)
-
-        # skip export of 'decoder' as it's used for training only
-        for component in ['encoder', 'one_step_ahead_decoder', 'src_embed', 'tgt_embed',
-                         'tgt_proj']:
-
-            prefix = "%s/%s" %(tmp_path, component)
-            component = getattr(model, component)
-            component.export(prefix)
-            sym_file = "%s-symbol.json" % prefix
-            params_file = "%s-0000.params" % prefix
-            onnx_file = "%s.onnx" % prefix
-
-        def export_to_onnx(prefix, input_shapes, input_types, **kwargs):
-            sym_file = "%s-symbol.json" % prefix
-            params_file = "%s-0000.params" % prefix
-            onnx_file = "%s.onnx" % prefix
-            return mx.contrib.onnx.export_model(sym_file, params_file, input_shapes, input_types,
-                                                onnx_file, **kwargs)
-
-        def onnx_runtime_predict(onnx_file, onnx_inputs):
-            ses_opt = onnxruntime.SessionOptions()
-            ses_opt.log_severity_level = 3
-            session = onnxruntime.InferenceSession(onnx_file, ses_opt)
-            input_dict = dict((session.get_inputs()[i].name, onnx_inputs[i].asnumpy())
-                            for i in range(len(onnx_inputs)))
-            return session.run(None, input_dict)
-
-        def verify_encoder():
-            inputs = mx.nd.random.uniform(-1, 1, shape=(batch, seq_length, C_in), dtype='float32')
-            valid_length = mx.nd.array([seq_length] * batch, dtype='float32')
-            pred = model.encoder(inputs, valid_length=valid_length)
-
-            prefix = "%s/encoder" %tmp_path
-            input_shapes = [(batch, seq_length, C_in), (batch,)]
-            input_types = [np.float32, np.float32]
-            onnx_file = export_to_onnx(prefix, input_shapes, input_types)
-            onnx_inputs = [inputs, valid_length]
-            pred_onx = onnx_runtime_predict(onnx_file, onnx_inputs)
-
-            assert_almost_equal(pred[0], pred_onx[0])
-
-        def verify_src_embed():
-            src = mx.nd.random.uniform(0, 36794, shape=(batch, seq_length), dtype='float32')
-            pred = model.src_embed(src)
-
-            prefix = "%s/src_embed" %tmp_path
-            input_shapes = [(batch, seq_length)]
-            input_types = [np.float32]
-            onnx_file = export_to_onnx(prefix, input_shapes, input_types)
-            onnx_inputs = [src]
-            pred_onx = onnx_runtime_predict(onnx_file, onnx_inputs)
-
-            assert_almost_equal(pred, pred_onx[0])
-
-        def verify_tgt_embed():
-            tgt = mx.nd.random.uniform(0, 36794, shape=(batch, seq_length), dtype='float32')
-            pred = model.tgt_embed(tgt)
-
-            prefix = "%s/tgt_embed" %tmp_path
-            input_shapes = [(batch, seq_length)]
-            input_types = [np.float32]
-            onnx_file = export_to_onnx(prefix, input_shapes, input_types)
-            onnx_inputs = [tgt]
-            pred_onx = onnx_runtime_predict(onnx_file, onnx_inputs)
-
-            assert_almost_equal(pred, pred_onx[0])
-
-        def verify_tgt_proj():
-            decoder_out = mx.nd.random.uniform(0, 512, shape=(batch, seq_length, C_out),
-                                               dtype='float32')
-            pred = model.tgt_proj(decoder_out)
-
-            prefix = "%s/tgt_proj" %tmp_path
-            input_shapes = [(batch, seq_length, C_out)]
-            input_types = [np.float32]
-            onnx_file = export_to_onnx(prefix, input_shapes, input_types)
-            onnx_inputs = [decoder_out]
-            pred_onx = onnx_runtime_predict(onnx_file, onnx_inputs)
-
-            assert_almost_equal(pred, pred_onx[0], rtol=1.e-04, atol=1.5e-03)
-
-        def verify_one_step_ahead_decoder():
-            prefix = "%s/one_step_ahead_decoder" %tmp_path
-
-            # the input data order
-            perm = [2, 0, 1]
-            input_shapes = [(batch, seq_length, C_in), (batch, seq_length, C_out),
-                            (batch, seq_length)]
-            input_shapes = [input_shapes[i] for i in perm]
-            dynamic_input_shapes = [(batch, 'seq_length', C_in), (batch, 'seq_length', C_out),
-                                    (batch, 'seq_length')]
-            dynamic_input_shapes = [dynamic_input_shapes[i] for i in perm]
-            input_types = [np.float32, np.float32, np.float32]
-            # do a dynamic export
-            onnx_file = export_to_onnx(prefix, input_shapes, input_types, dynamic=True,
-                                       dynamic_input_shapes=dynamic_input_shapes)
-
-            # step 0
-            step_input = mx.nd.random.uniform(-1, 1, shape=(batch, C_in), dtype='float32')
-            # mxnet
-            pred, step_states, _ = model.one_step_ahead_decoder(step_input, decoder_states)
-            # onnx
-            # note that we need to expand the sequence axis just like in here:
-            # https://github.com/dmlc/gluon-nlp/blob/v0.10.x/src/gluonnlp/model/transformer.py#L831
-            input_onx = mx.nd.expand_dims(step_input, axis=1)
-            onnx_inputs = [input_onx, decoder_states[0], decoder_states[1]]
-            onnx_inputs = [onnx_inputs[i] for i in perm]
-            pred_onx = onnx_runtime_predict(onnx_file, onnx_inputs)
-
-            assert_almost_equal(pred, pred_onx[0])
-
-            # step >= 1
-            for i in range(20):
-                step_input = mx.nd.random.uniform(-10*i, 10*i, shape=(batch, C_in), dtype='float32')
-                # mxnet
-                pred, step_states, _ = model.one_step_ahead_decoder(step_input, step_states)
-                # onnx
-                # note that we need to concat the step_input with the previous inpus
-                # just like in here:
-                # https://github.com/dmlc/gluon-nlp/blob/v0.10.x/src/gluonnlp/model/transformer.py#L828
-                input_onx = mx.nd.concat(input_onx, mx.nd.expand_dims(step_input, axis=1), dim=1)
-                onnx_inputs = [input_onx, decoder_states[0], decoder_states[1]]
-                onnx_inputs = [onnx_inputs[i] for i in perm]
-                pred_onx = onnx_runtime_predict(onnx_file, onnx_inputs)
-
-                assert_almost_equal(pred, pred_onx[0])
-
-        verify_encoder()
-        verify_src_embed()
-        verify_tgt_embed()
-        verify_tgt_proj()
-        verify_one_step_ahead_decoder()
-
-    finally:
-        shutil.rmtree(tmp_path)
-
-
-@with_seed()
-@pytest.mark.parametrize('model_params', [('gpt2_117m', 24), ('gpt2_345m', 48)])
-def test_gpt_pretrained_inference_onnxruntime(tmp_path, model_params):
-    tmp_path = str(tmp_path)
-    try:
-        import gluonnlp as nlp
-        import urllib.request
-        from zipfile import ZipFile
-        import importlib.util
-        import sys
-
-        url = 'https://nlp.gluon.ai/_downloads/77d227fbc8f1613e6802acc7253cc090/text_generation.zip'
-        urllib.request.urlretrieve(url, tmp_path + 'text_generation.zip')
-
-        with ZipFile(tmp_path + 'text_generation.zip', 'r') as zipObj:
-            zipObj.extractall(tmp_path)
-
-        # load in the text_generation module, refer to:
-        # https://github.com/dmlc/gluon-nlp/tree/v0.10.x/scripts/text_generation
-        spec = importlib.util.spec_from_file_location(
-            'text_generation',
-            tmp_path + '/text_generation/__init__.py')
-        mod = importlib.util.module_from_spec(spec)
-        sys.modules[spec.name] = mod
-        spec.loader.exec_module(mod)
-
-        ctx = mx.cpu(0)
-        model_name= model_params[0]
-        dataset= 'openai_webtext'
-        # get_model() is overridden in here:
-        # https://github.com/dmlc/gluon-nlp/blob/v0.10.x/scripts/text_generation/model/__init__.py#L23
-        model, _ = mod.model.get_model(
-            name=model_name,
-            ctx=ctx,
-            pretrained=True,
-            dataset_name=dataset)
-
-        model.hybridize()
-
-        batch = 4
-        seq_length = 64
-        inputs = mx.nd.random.uniform(0, 50257, shape=(batch, seq_length), dtype='float32',
-                                      ctx=ctx)
-
-        pred = model(inputs)
-
-        prefix = "%s/%s" % (tmp_path, model_name)
-        model.export(prefix)
-        sym_file = "%s-symbol.json" % prefix
-        params_file = "%s-0000.params" % prefix
-        onnx_file = "%s.onnx" % prefix
-
-        input_shapes = [(batch, seq_length)]
-        input_types = [np.float32]
-        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes,
-                                                            input_types, onnx_file)
-
-        ses_opt = onnxruntime.SessionOptions()
-        ses_opt.log_severity_level = 3
-        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
-        onnx_inputs = [inputs]
-        input_dict = dict((session.get_inputs()[i].name, onnx_inputs[i].asnumpy()) for i in range(len(onnx_inputs)))
-        pred_onx = session.run(None, input_dict)
-
-        # check output
-        assert_almost_equal(pred[0], pred_onx[0])
-        # check states
-        num_states = model_params[1]
-        for i in range(num_states):
-            assert_almost_equal(pred[1][i], pred_onx[i+1])
-
-    finally:
-        shutil.rmtree(tmp_path)
-
diff --git a/tests/python-pytest/onnx/test_onnxruntime_cv.py b/tests/python-pytest/onnx/test_onnxruntime_cv.py
new file mode 100644
index 000000000000..d2041d58e212
--- /dev/null
+++ b/tests/python-pytest/onnx/test_onnxruntime_cv.py
@@ -0,0 +1,534 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import mxnet as mx
+import numpy as np
+import gluoncv
+import onnxruntime
+
+from mxnet.test_utils import assert_almost_equal
+from common import with_seed
+
+import json
+import os
+import pytest
+import shutil
+
+
+class GluonModel():
+    def __init__(self, model_name, input_shape, input_dtype, tmpdir):
+        self.model_name = model_name
+        self.input_shape = input_shape
+        self.input_dtype = input_dtype
+        self.modelpath = os.path.join(tmpdir, model_name)
+        self.ctx = mx.cpu(0)
+        self.get_model()
+        self.export()
+
+    def get_model(self):
+        self.model = gluoncv.model_zoo.get_model(self.model_name, pretrained=True, ctx=self.ctx)
+        self.model.hybridize()
+
+    def export(self):
+        data = mx.nd.zeros(self.input_shape, dtype=self.input_dtype, ctx=self.ctx)
+        self.model.forward(data)
+        self.model.export(self.modelpath, 0)
+
+    def export_onnx(self):
+        onnx_file = self.modelpath + ".onnx"
+        mx.contrib.onnx.export_model(self.modelpath + "-symbol.json", self.modelpath + "-0000.params",
+                                     [self.input_shape], self.input_dtype, onnx_file)
+        return onnx_file
+
+    def export_onnx_dynamic(self, dynamic_input_shapes):
+        onnx_file = self.modelpath + ".onnx"
+        mx.contrib.onnx.export_model(self.modelpath + "-symbol.json", self.modelpath + "-0000.params",
+                                     [self.input_shape], self.input_dtype, onnx_file, dynamic=True,
+                                     dynamic_input_shapes=dynamic_input_shapes)
+        return onnx_file
+
+    def predict(self, data):
+        return self.model(data)
+
+
+@pytest.fixture(scope="session")
+def obj_class_test_images(tmpdir_factory):
+    tmpdir = tmpdir_factory.mktemp("obj_class_data")
+    from urllib.parse import urlparse
+    test_image_urls = [
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/bikers.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/car.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/dancer.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/duck.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/fieldhockey.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/flower.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/runners.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/shark.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/soccer2.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/tree.jpg',
+    ]
+    paths = []
+    for url in test_image_urls:
+        fn = os.path.join(tmpdir, os.path.basename(urlparse(url).path))
+        mx.test_utils.download(url, fname=fn)
+        paths.append(fn)
+    return paths
+
+@pytest.mark.parametrize('model', [
+    'alexnet',
+    'cifar_resnet20_v1',
+    'cifar_resnet56_v1',
+    'cifar_resnet110_v1',
+    'cifar_resnet20_v2',
+    'cifar_resnet56_v2',
+    'cifar_resnet110_v2',
+    'cifar_wideresnet16_10',
+    'cifar_wideresnet28_10',
+    'cifar_wideresnet40_8',
+    'cifar_resnext29_16x64d',
+    'darknet53',
+    'densenet121',
+    'densenet161',
+    'densenet169',
+    'densenet201',
+    'googlenet',
+    'mobilenet1.0',
+    'mobilenet0.75',
+    'mobilenet0.5',
+    'mobilenet0.25',
+    'mobilenetv2_1.0',
+    'mobilenetv2_0.75',
+    'mobilenetv2_0.5',
+    'mobilenetv2_0.25',
+    'mobilenetv3_large',
+    'mobilenetv3_small',
+    'resnest14',
+    'resnest26',
+    'resnest50',
+    'resnest101',
+    'resnest200',
+    'resnest269',
+    'resnet18_v1',
+    'resnet18_v1b_0.89',
+    'resnet18_v2',
+    'resnet34_v1',
+    'resnet34_v2',
+    'resnet50_v1',
+    'resnet50_v1d_0.86',
+    'resnet50_v1d_0.48',
+    'resnet50_v1d_0.37',
+    'resnet50_v1d_0.11',
+    'resnet50_v2',
+    'resnet101_v1',
+    'resnet101_v1d_0.76',
+    'resnet101_v1d_0.73',
+    'resnet101_v2',
+    'resnet152_v1',
+    'resnet152_v2',
+    'resnext50_32x4d',
+    'resnext101_32x4d',
+    'resnext101_64x4d',
+    'senet_154',
+    'se_resnext101_32x4d',
+    'se_resnext101_64x4d',
+    'se_resnext50_32x4d',
+    'squeezenet1.0',
+    'squeezenet1.1',
+    'vgg11',
+    'vgg11_bn',
+    'vgg13',
+    'vgg13_bn',
+    'vgg16',
+    'vgg16_bn',
+    'vgg19',
+    'vgg19_bn',
+    'xception',
+    'inceptionv3'
+])
+def test_obj_class_model_inference_onnxruntime(tmp_path, model, obj_class_test_images):
+    inlen = 299 if 'inceptionv3' == model else 224
+    def normalize_image(imgfile):
+        img_data = mx.image.imread(imgfile)
+        img_data = mx.image.imresize(img_data, inlen, inlen)
+        img_data = img_data.transpose([2, 0, 1]).astype('float32')
+        mean_vec = mx.nd.array([0.485, 0.456, 0.406])
+        stddev_vec = mx.nd.array([0.229, 0.224, 0.225])
+        norm_img_data = mx.nd.zeros(img_data.shape).astype('float32')
+        for i in range(img_data.shape[0]):
+            norm_img_data[i,:,:] = (img_data[i,:,:]/255 - mean_vec[i]) / stddev_vec[i]
+        return norm_img_data.reshape(1, 3, inlen, inlen).astype('float32')
+
+    try:
+        tmp_path = str(tmp_path)
+        M = GluonModel(model, (1,3,inlen,inlen), 'float32', tmp_path)
+        onnx_file = M.export_onnx()
+
+        # create onnxruntime session using the generated onnx file
+        ses_opt = onnxruntime.SessionOptions()
+        ses_opt.log_severity_level = 3
+        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
+        input_name = session.get_inputs()[0].name
+
+        for img in obj_class_test_images:
+            img_data = normalize_image(img)
+            mx_result = M.predict(img_data)
+            onnx_result = session.run([], {input_name: img_data.asnumpy()})[0]
+            assert_almost_equal(mx_result, onnx_result)
+
+    finally:
+        shutil.rmtree(tmp_path)
+
+
+@pytest.fixture(scope="session")
+def obj_detection_test_images(tmpdir_factory):
+    tmpdir = tmpdir_factory.mktemp("obj_det_data")
+    from urllib.parse import urlparse
+    test_image_urls = [
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/car.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/duck.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/fieldhockey.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/flower.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/runners.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/shark.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/soccer2.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/tree.jpg',
+    ]
+    paths = []
+    for url in test_image_urls:
+        fn = os.path.join(tmpdir, os.path.basename(urlparse(url).path))
+        mx.test_utils.download(url, fname=fn)
+        paths.append(fn)
+    return paths
+
+
+@pytest.mark.parametrize('model', [
+    'center_net_resnet18_v1b_voc',
+    'center_net_resnet50_v1b_voc',
+    'center_net_resnet101_v1b_voc',
+    'center_net_resnet18_v1b_coco',
+    'center_net_resnet50_v1b_coco',
+    'center_net_resnet101_v1b_coco',
+    'ssd_300_vgg16_atrous_voc',
+    'ssd_512_vgg16_atrous_voc',
+    'ssd_512_resnet50_v1_voc',
+    'ssd_512_mobilenet1.0_voc',
+    'faster_rcnn_resnet50_v1b_voc',
+    'yolo3_darknet53_voc',
+    'yolo3_mobilenet1.0_voc',
+    'ssd_300_vgg16_atrous_coco',
+    'ssd_512_vgg16_atrous_coco',
+    # 'ssd_300_resnet34_v1b_coco', #cannot import
+    'ssd_512_resnet50_v1_coco',
+    'ssd_512_mobilenet1.0_coco',
+    'faster_rcnn_resnet50_v1b_coco',
+    'faster_rcnn_resnet101_v1d_coco',
+    'yolo3_darknet53_coco',
+    'yolo3_mobilenet1.0_coco',
+])
+def test_obj_detection_model_inference_onnxruntime(tmp_path, model, obj_detection_test_images):
+    def assert_obj_detetion_result(mx_ids, mx_scores, mx_boxes,
+                                   onnx_ids, onnx_scores, onnx_boxes,
+                                   score_thresh=0.6, score_tol=1e-4):
+        def assert_bbox(mx_boxe, onnx_boxe, box_tol=1e-2):
+            def assert_scalar(a, b, tol=box_tol):
+                return np.abs(a-b) <= tol
+            return assert_scalar(mx_boxe[0], onnx_boxe[0]) and assert_scalar(mx_boxe[1], onnx_boxe[1]) \
+                      and assert_scalar(mx_boxe[2], onnx_boxe[2]) and assert_scalar(mx_boxe[3], onnx_boxe[3])
+
+        found_match = False
+        for i in range(len(onnx_ids)):
+            onnx_id = onnx_ids[i][0]
+            onnx_score = onnx_scores[i][0]
+            onnx_boxe = onnx_boxes[i]
+
+            if onnx_score < score_thresh:
+                break
+            for j in range(len(mx_ids)):
+                mx_id = mx_ids[j].asnumpy()[0]
+                mx_score = mx_scores[j].asnumpy()[0]
+                mx_boxe = mx_boxes[j].asnumpy()
+                # check socre 
+                if onnx_score < mx_score - score_tol:
+                    continue
+                if onnx_score > mx_score + score_tol:
+                    return False
+                # check id
+                if onnx_id != mx_id:
+                    continue
+                # check bounding box
+                if assert_bbox(mx_boxe, onnx_boxe):
+                    found_match = True
+                    break
+            if not found_match:
+                return False
+            found_match = False
+        return True
+
+    def normalize_image(imgfile):
+        img = mx.image.imread(imgfile)
+        img, _ = mx.image.center_crop(img, size=(512, 512))
+        img, _ = gluoncv.data.transforms.presets.center_net.transform_test(img, short=512)
+        return img
+
+    try:
+        tmp_path = str(tmp_path)
+        M = GluonModel(model, (1,3,512,512), 'float32', tmp_path)
+        onnx_file = M.export_onnx()
+        # create onnxruntime session using the generated onnx file
+        ses_opt = onnxruntime.SessionOptions()
+        ses_opt.log_severity_level = 3
+        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
+        input_name = session.get_inputs()[0].name
+
+        for img in obj_detection_test_images:
+            img_data = normalize_image(img)
+            mx_class_ids, mx_scores, mx_boxes = M.predict(img_data)
+            # center_net_resnet models have different output format
+            if 'center_net_resnet' in model:
+                onnx_scores, onnx_class_ids, onnx_boxes = session.run([], {input_name: img_data.asnumpy()})
+                assert_almost_equal(mx_class_ids, onnx_class_ids)
+                assert_almost_equal(mx_scores, onnx_scores)
+                assert_almost_equal(mx_boxes, onnx_boxes)
+            else:
+                onnx_class_ids, onnx_scores, onnx_boxes = session.run([], {input_name: img_data.asnumpy()})
+                if not assert_obj_detetion_result(mx_class_ids[0], mx_scores[0], mx_boxes[0], \
+                        onnx_class_ids[0], onnx_scores[0], onnx_boxes[0]):
+                    raise AssertionError("Assertion error on model: " + model)
+
+    finally:
+        shutil.rmtree(tmp_path)
+
+@pytest.fixture(scope="session")
+def img_segmentation_test_images(tmpdir_factory):
+    tmpdir = tmpdir_factory.mktemp("img_seg_data")
+    from urllib.parse import urlparse
+    test_image_urls = [
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/bikers.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/car.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/dancer.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/duck.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/fieldhockey.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/flower.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/runners.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/shark.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/soccer2.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/tree.jpg',
+    ]
+    paths = []
+    for url in test_image_urls:
+        fn = os.path.join(tmpdir, os.path.basename(urlparse(url).path))
+        mx.test_utils.download(url, fname=fn)
+        paths.append(fn)
+    return paths
+
+@pytest.mark.parametrize('model', [
+    'fcn_resnet50_ade',
+    'fcn_resnet101_ade',
+    'deeplab_resnet50_ade',
+    'deeplab_resnet101_ade',
+    'deeplab_resnest50_ade',
+    'deeplab_resnest101_ade',
+    'deeplab_resnest200_ade',
+    'deeplab_resnest269_ade',
+    'fcn_resnet101_coco',
+    'deeplab_resnet101_coco',
+    'fcn_resnet101_voc',
+    'deeplab_resnet101_voc',
+    'deeplab_resnet152_voc',
+    'deeplab_resnet50_citys',
+    'deeplab_resnet101_citys',
+    'deeplab_v3b_plus_wideresnet_citys'
+])
+def test_img_segmentation_model_inference_onnxruntime(tmp_path, model, img_segmentation_test_images):
+    def normalize_image(imgfile):
+        img = mx.image.imread(imgfile).astype('float32')
+        img, _ = mx.image.center_crop(img, size=(480, 480))
+        img = gluoncv.data.transforms.presets.segmentation.test_transform(img, mx.cpu(0))
+        return img
+
+
+    try:
+        tmp_path = str(tmp_path)
+        M = GluonModel(model, (1,3,480,480), 'float32', tmp_path)
+        onnx_file = M.export_onnx()
+        # create onnxruntime session using the generated onnx file
+        ses_opt = onnxruntime.SessionOptions()
+        ses_opt.log_severity_level = 3
+        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
+        input_name = session.get_inputs()[0].name
+
+        for img in img_segmentation_test_images:
+            img_data = normalize_image(img)
+            mx_result = M.predict(img_data)
+            onnx_result = session.run([], {input_name: img_data.asnumpy()})
+            assert(len(mx_result) == len(onnx_result))
+            for i in range(len(mx_result)):
+                assert_almost_equal(mx_result[i], onnx_result[i])
+
+    finally:
+        shutil.rmtree(tmp_path)
+
+
+@pytest.fixture(scope="session")
+def pose_estimation_test_images(tmpdir_factory):
+    tmpdir = tmpdir_factory.mktemp("pose_est_data")
+    from urllib.parse import urlparse
+    test_image_urls = [
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/bikers.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/dancer.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/fieldhockey.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/runners.jpg',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/images/soccer2.jpg',
+    ]
+    paths = []
+    for url in test_image_urls:
+        fn = os.path.join(tmpdir, os.path.basename(urlparse(url).path))
+        mx.test_utils.download(url, fname=fn)
+        paths.append(fn)
+    return paths
+
+@pytest.mark.parametrize('model', [
+    'simple_pose_resnet18_v1b',
+    'simple_pose_resnet50_v1b',
+    'simple_pose_resnet50_v1d',
+    'simple_pose_resnet101_v1b',
+    'simple_pose_resnet101_v1d',
+    'simple_pose_resnet152_v1b',
+    'simple_pose_resnet152_v1d',
+    'alpha_pose_resnet101_v1b_coco',
+    'mobile_pose_resnet18_v1b',
+    'mobile_pose_resnet50_v1b',
+    'mobile_pose_mobilenet1.0',
+    'mobile_pose_mobilenetv2_1.0',
+    'mobile_pose_mobilenetv3_large',
+    'mobile_pose_mobilenetv3_small',
+])
+def test_pose_estimation_model_inference_onnxruntime(tmp_path, model, pose_estimation_test_images):
+    def normalize_image(imgfile):
+        img = mx.image.imread(imgfile).astype('float32')
+        img, _ = mx.image.center_crop(img, size=(512, 512))
+        img = gluoncv.data.transforms.presets.segmentation.test_transform(img, mx.cpu(0))
+        return img
+
+    try:
+        tmp_path = str(tmp_path)
+        M = GluonModel(model, (1,3,512,512), 'float32', tmp_path)
+        onnx_file = M.export_onnx()
+        # create onnxruntime session using the generated onnx file
+        ses_opt = onnxruntime.SessionOptions()
+        ses_opt.log_severity_level = 3
+        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
+        input_name = session.get_inputs()[0].name
+
+        for img in pose_estimation_test_images:
+            img_data = normalize_image(img)
+            mx_result = M.predict(img_data)
+            onnx_result = session.run([], {input_name: img_data.asnumpy()})
+            assert(len(mx_result) == len(onnx_result))
+            for i in range(len(mx_result)):
+                assert_almost_equal(mx_result[i], onnx_result[i])
+
+    finally:
+        shutil.rmtree(tmp_path)
+
+@pytest.fixture(scope="session")
+def act_recognition_test_data(tmpdir_factory):
+    tmpdir = tmpdir_factory.mktemp("act_rec_data")
+    from urllib.parse import urlparse
+    test_image_urls = [
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/actions/biking.rec',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/actions/diving.rec',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/actions/golfing.rec',
+        'https://github.com/apache/incubator-mxnet-ci/raw/master/test-data/actions/sledding.rec',
+    ]
+    paths = []
+    for url in test_image_urls:
+        fn = os.path.join(tmpdir, os.path.basename(urlparse(url).path))
+        mx.test_utils.download(url, fname=fn)
+        paths.append(fn)
+    return paths
+
+@pytest.mark.parametrize('model', [
+    'inceptionv1_kinetics400',
+    'resnet18_v1b_kinetics400',
+    'resnet34_v1b_kinetics400',
+    'resnet50_v1b_kinetics400',
+    'resnet101_v1b_kinetics400',
+    'resnet152_v1b_kinetics400',
+    'resnet50_v1b_hmdb51',
+    'resnet50_v1b_sthsthv2',
+    'vgg16_ucf101',
+    'inceptionv3_kinetics400',
+    'inceptionv3_ucf101',
+])
+def test_action_recognition_model_inference_onnxruntime(tmp_path, model, act_recognition_test_data):
+    batch_size = 64
+    input_len = 224
+    if 'inceptionv3' in model:
+        input_len = 340
+
+    def load_video(filepath):
+        iterator = mx.image.ImageIter(batch_size=batch_size, data_shape=(3,input_len,input_len), path_imgrec=filepath)
+        for batch in iterator:
+            return batch.data[0]
+
+    try:
+        tmp_path = str(tmp_path)
+        M = GluonModel(model, (batch_size,3,input_len,input_len), 'float32', tmp_path)
+        onnx_file = M.export_onnx()
+        # create onnxruntime session using the generated onnx file
+        ses_opt = onnxruntime.SessionOptions()
+        ses_opt.log_severity_level = 3
+        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
+        input_name = session.get_inputs()[0].name
+
+        for video in act_recognition_test_data:
+            data = load_video(video)
+            mx_result = M.predict(data)
+            onnx_result = session.run([], {input_name: data.asnumpy()})[0]
+            assert_almost_equal(mx_result, onnx_result, rtol=0.001, atol=0.01)
+
+    finally:
+        shutil.rmtree(tmp_path)
+
+
+@with_seed()
+@pytest.mark.parametrize('model_name', ['mobilenet1.0', 'inceptionv3', 'darknet53', 'resnest14'])
+def test_dynamic_shape_cv_inference_onnxruntime(tmp_path, model_name):
+    tmp_path = str(tmp_path)
+    try:
+        M = GluonModel(model_name, (1, 3, 512, 512), 'float32', tmp_path)
+        dynamic_input_shapes = [(None, 3, 512, 512)]
+        onnx_file = M.export_onnx_dynamic(dynamic_input_shapes)
+
+        # create onnxruntime session using the generated onnx file
+        ses_opt = onnxruntime.SessionOptions()
+        ses_opt.log_severity_level = 3
+        sess = onnxruntime.InferenceSession(onnx_file, ses_opt)
+
+        # test on a different batch size
+        x = mx.random.uniform(0, 10, (5, 3, 512, 512))
+        in_tensors = [x]
+        input_dict = dict((sess.get_inputs()[i].name, in_tensors[i].asnumpy()) for i in range(len(in_tensors)))
+        pred_on = sess.run(None, input_dict)
+
+        pred_mx = M.predict(x)
+
+        assert_almost_equal(pred_mx, pred_on[0])
+
+    finally:
+        shutil.rmtree(tmp_path)
diff --git a/tests/python-pytest/onnx/test_onnxruntime_nlp.py b/tests/python-pytest/onnx/test_onnxruntime_nlp.py
new file mode 100644
index 000000000000..ea47b51001cb
--- /dev/null
+++ b/tests/python-pytest/onnx/test_onnxruntime_nlp.py
@@ -0,0 +1,427 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import mxnet as mx
+import numpy as np
+import onnxruntime
+
+from mxnet.test_utils import assert_almost_equal
+from common import with_seed
+
+import json
+import os
+import pytest
+import shutil
+
+
+@with_seed()
+@pytest.mark.parametrize('model_name', ['roberta_24_1024_16', 'roberta_12_768_12'])
+def test_roberta_inference_onnxruntime(tmp_path, model_name):
+    tmp_path = str(tmp_path)
+    try:
+        import gluonnlp as nlp
+        ctx = mx.cpu(0)
+
+        dataset= 'openwebtext_ccnews_stories_books_cased'#'book_corpus_wiki_en_uncased'
+        model, _ = nlp.model.get_model(
+        name=model_name,
+        ctx=ctx,
+        pretrained=True,
+        use_decoder=True,
+        dataset_name=dataset)
+        
+        model.hybridize(static_alloc=False)
+
+        batch = 2
+        seq_length = 32
+        num_masked_positions = 1
+        inputs = mx.nd.random.uniform(0, 30522, shape=(batch, seq_length), dtype='float32', ctx=ctx)
+        valid_length = mx.nd.array([seq_length] * batch, dtype='float32', ctx=ctx)
+        masked_positions = mx.nd.random.uniform(0, 32, shape=(batch, num_masked_positions),
+            dtype='float32', ctx=ctx).astype('int32')
+
+        sequence_outputs, attention_outputs= model(inputs, valid_length, masked_positions)    
+
+        prefix = "%s/roberta" % tmp_path
+        model.export(prefix)
+
+        sym_file = "%s-symbol.json" % prefix
+        params_file = "%s-0000.params" % prefix
+        onnx_file = "%s.onnx" % prefix
+        input_shapes = [(batch, seq_length), (batch,), (batch, num_masked_positions)]
+        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes,
+                                                            [np.float32, np.float32, np.int32],
+                                                            onnx_file, verbose=True)
+
+        sess_options = onnxruntime.SessionOptions()
+        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
+        sess = onnxruntime.InferenceSession(onnx_file, sess_options)
+
+        in_tensors = [inputs, valid_length, masked_positions]
+        input_dict = dict((sess.get_inputs()[i].name, in_tensors[i].asnumpy()) for i in range(len(in_tensors)))
+        pred = sess.run(None, input_dict)
+
+        assert_almost_equal(sequence_outputs, pred[0])
+        assert_almost_equal(attention_outputs, pred[1])
+
+    finally:
+        shutil.rmtree(tmp_path)
+
+
+@with_seed()
+@pytest.mark.parametrize('model', ['bert_12_768_12', 'bert_24_1024_16'])
+def test_bert_inference_onnxruntime(tmp_path, model):
+    tmp_path = str(tmp_path)
+    try:
+        import gluonnlp as nlp
+        dataset = 'book_corpus_wiki_en_uncased'
+        ctx = mx.cpu(0)
+        model, vocab = nlp.model.get_model(
+            name=model,
+            ctx=ctx,
+            dataset_name=dataset,
+            pretrained=True,
+            use_pooler=True,
+            use_decoder=False,
+            use_classifier=False)
+
+        model.hybridize(static_alloc=True)
+
+        batch = 5
+        seq_length = 16
+        # create synthetic test data
+        inputs = mx.nd.random.uniform(0, 30522, shape=(batch, seq_length), dtype='float32')
+        token_types = mx.nd.random.uniform(0, 2, shape=(batch, seq_length), dtype='float32')
+        valid_length = mx.nd.array([seq_length] * batch, dtype='float32')
+
+        seq_encoding, cls_encoding = model(inputs, token_types, valid_length)
+
+        prefix = "%s/bert" % tmp_path
+        model.export(prefix)
+        sym_file = "%s-symbol.json" % prefix
+        params_file = "%s-0000.params" % prefix
+        onnx_file = "%s.onnx" % prefix
+
+
+        input_shapes = [(batch, seq_length), (batch, seq_length), (batch,)]
+        input_types = [np.float32, np.float32, np.float32]
+        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes, input_types, onnx_file)
+
+
+        # create onnxruntime session using the generated onnx file
+        ses_opt = onnxruntime.SessionOptions()
+        ses_opt.log_severity_level = 3
+        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
+        onnx_inputs = [inputs, token_types, valid_length]
+        input_dict = dict((session.get_inputs()[i].name, onnx_inputs[i].asnumpy()) for i in range(len(onnx_inputs)))
+        pred_onx, cls_onx = session.run(None, input_dict)
+
+        assert_almost_equal(seq_encoding, pred_onx)
+        assert_almost_equal(cls_encoding, cls_onx)
+
+    finally:
+        shutil.rmtree(tmp_path)
+
+
+@with_seed()
+@pytest.mark.parametrize('model_name', ['distilbert_6_768_12'])
+def test_distilbert_inference_onnxruntime(tmp_path, model_name):
+    tmp_path = str(tmp_path)
+    try:
+        import gluonnlp as nlp
+        dataset = 'distilbert_book_corpus_wiki_en_uncased'
+        ctx = mx.cpu(0)
+        model, _ = nlp.model.get_model(
+            name=model_name,
+            ctx=ctx,
+            pretrained=True,
+            dataset_name=dataset)
+
+        model.hybridize(static_alloc=True)
+
+        batch = 2
+        seq_length = 32
+        num_masked_positions = 1
+        inputs = mx.nd.random.uniform(0, 30522, shape=(batch, seq_length), dtype='float32', ctx=ctx)
+        valid_length = mx.nd.array([seq_length] * batch, dtype='float32', ctx=ctx)
+
+        sequence_outputs = model(inputs, valid_length)
+
+        prefix = "%s/distilbert" % tmp_path
+        model.export(prefix)
+        sym_file = "%s-symbol.json" % prefix
+        params_file = "%s-0000.params" % prefix
+        onnx_file = "%s.onnx" % prefix
+
+        input_shapes = [(batch, seq_length), (batch,)]
+        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes,
+                                                            [np.float32, np.float32],
+                                                            onnx_file, verbose=True)
+        sess_options = onnxruntime.SessionOptions()
+        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
+        sess = onnxruntime.InferenceSession(onnx_file, sess_options)
+
+        in_tensors = [inputs, valid_length]
+        input_dict = dict((sess.get_inputs()[i].name, in_tensors[i].asnumpy()) for i in range(len(in_tensors)))
+        pred = sess.run(None, input_dict)
+
+        assert_almost_equal(sequence_outputs, pred[0])
+
+    finally:
+        shutil.rmtree(tmp_path)
+
+
+@with_seed()
+@pytest.mark.parametrize('model_name', [('standard_lstm_lm_200', 200), ('standard_lstm_lm_650', 650),
+                                        ('standard_lstm_lm_1500', 1500)])
+@pytest.mark.parametrize('seq_length', [16, 32])
+def test_standard_rnn_lstm_pretrained_inference_onnxruntime(tmp_path, model_name, seq_length):
+    try:
+        import gluonnlp as nlp
+        ctx = mx.cpu()
+        dataset= 'wikitext-2'
+        model, _ = nlp.model.get_model(
+            name=model_name[0],
+            ctx=ctx,
+            pretrained=True,
+            dataset_name=dataset,
+            dropout=0)
+        model.hybridize()
+
+        batch = 2
+        num_hidden = model_name[1]
+        num_layers = 2
+        inputs = mx.nd.random.randint(0, 33278, shape=(seq_length, batch),
+                                      ctx=ctx).astype('float32')
+        begin_state = model.begin_state(func=mx.nd.random.uniform, low=0, high=1,
+                                        batch_size=batch, dtype='float32', ctx=ctx)
+        out, out_state= model(inputs, begin_state)
+
+        prefix = "%s/standard_rnn_lstm" % tmp_path
+        model.export(prefix)
+        sym_file = "%s-symbol.json" % prefix
+        params_file = "%s-0000.params" % prefix
+        onnx_file = "%s.onnx" % prefix
+
+        input_shapes = [(seq_length, batch), np.shape(begin_state[0]), np.shape(begin_state[1])]
+        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes,
+                                                            [np.float32, np.float32, np.float32],
+                                                            onnx_file, verbose=True)
+        sess_options = onnxruntime.SessionOptions()
+        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
+        sess = onnxruntime.InferenceSession(onnx_file, sess_options)
+
+        in_tensors = [inputs, begin_state[0], begin_state[1]]
+        input_dict = dict((sess.get_inputs()[i].name, in_tensors[i].asnumpy()) for i in range(len(in_tensors)))
+        pred = sess.run(None, input_dict)
+
+        assert_almost_equal(out, pred[2])
+        assert_almost_equal(out_state[0], pred[0])
+        assert_almost_equal(out_state[1], pred[1])
+
+    finally:
+        shutil.rmtree(tmp_path)
+
+
+@with_seed()
+@pytest.mark.parametrize('model', ['bert_12_768_12'])
+def test_dynamic_shape_bert_inference_onnxruntime(tmp_path, model):
+    tmp_path = str(tmp_path)
+    try:
+        import gluonnlp as nlp
+        dataset = 'book_corpus_wiki_en_uncased'
+        ctx = mx.cpu(0)
+        model, vocab = nlp.model.get_model(
+            name=model,
+            ctx=ctx,
+            dataset_name=dataset,
+            pretrained=True,
+            use_pooler=True,
+            use_decoder=False,
+            num_layers = 3,
+            hparam_allow_override = True,
+            use_classifier=False)
+
+        model.hybridize(static_alloc=True)
+
+        batch = 5
+        seq_length = 16
+        # create synthetic test data
+        inputs = mx.nd.random.uniform(0, 30522, shape=(batch, seq_length), dtype='float32')
+        token_types = mx.nd.random.uniform(0, 2, shape=(batch, seq_length), dtype='float32')
+        valid_length = mx.nd.array([seq_length] * batch, dtype='float32')
+
+        seq_encoding, cls_encoding = model(inputs, token_types, valid_length)
+
+        prefix = "%s/bert" % tmp_path
+        model.export(prefix)
+        sym_file = "%s-symbol.json" % prefix
+        params_file = "%s-0000.params" % prefix
+        onnx_file = "%s.onnx" % prefix
+
+        dynamic_input_shapes = [(None, seq_length), (None, seq_length), (None,)]
+        input_shapes = [(batch, seq_length), (batch, seq_length), (batch,)]
+        input_types = [np.float32, np.float32, np.float32]
+        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes,
+                                                            input_types, onnx_file,
+                                                            dynamic=True,
+                                                            dynamic_input_shapes=dynamic_input_shapes)
+
+        # create onnxruntime session using the generated onnx file
+        ses_opt = onnxruntime.SessionOptions()
+        ses_opt.log_severity_level = 3
+        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
+
+        # test on a different batch size
+        batch = 7
+        seq_length = 16
+        inputs = mx.nd.random.uniform(0, 30522, shape=(batch, seq_length), dtype='float32')
+        token_types = mx.nd.random.uniform(0, 2, shape=(batch, seq_length), dtype='float32')
+        valid_length = mx.nd.array([seq_length] * batch, dtype='float32')
+
+        seq_encoding, cls_encoding = model(inputs, token_types, valid_length)
+
+        onnx_inputs = [inputs, token_types, valid_length]
+        input_dict = dict((session.get_inputs()[i].name, onnx_inputs[i].asnumpy()) for i in range(len(onnx_inputs)))
+        pred_onx, cls_onx = session.run(None, input_dict)
+
+        assert_almost_equal(seq_encoding, pred_onx)
+        assert_almost_equal(cls_encoding, cls_onx)
+
+    finally:
+        shutil.rmtree(tmp_path)
+
+
+@with_seed()
+@pytest.mark.parametrize('model_name', [('awd_lstm_lm_600', 600), ('awd_lstm_lm_1150', 1150)])
+@pytest.mark.parametrize('seq_length', [16, 128, 256])
+def test_awd_rnn_lstm_pretrained_inference_onnxruntime(tmp_path, model_name, seq_length):
+    try:
+        import gluonnlp as nlp
+        ctx = mx.cpu()
+        dataset= 'wikitext-2'
+        model, _ = nlp.model.get_model(
+            name=model_name[0],
+            ctx=ctx,
+            pretrained=True,
+            dataset_name=dataset,
+            dropout=0)
+        model.hybridize()
+
+        batch = 2
+        num_hidden = model_name[1]
+        num_layers = 2
+        inputs = mx.nd.random.randint(0, 33278, shape=(seq_length, batch),
+                                      ctx=ctx).astype('float32')
+        begin_state = model.begin_state(func=mx.nd.random.uniform, low=0, high=1,
+                                        batch_size=batch, dtype='float32', ctx=ctx)
+        out, out_state= model(inputs, begin_state)
+
+        prefix = "%s/awd_lstm" % tmp_path
+        model.export(prefix)
+        sym_file = "%s-symbol.json" % prefix
+        params_file = "%s-0000.params" % prefix
+        onnx_file = "%s.onnx" % prefix
+
+        input_shapes = [(seq_length, batch), 
+                        np.shape(begin_state[0][0]), np.shape(begin_state[0][1]),
+                        np.shape(begin_state[1][0]), np.shape(begin_state[1][1]),
+                        np.shape(begin_state[2][0]), np.shape(begin_state[2][1])]
+        input_types = [np.float32, np.float32, np.float32, np.float32, np.float32, np.float32,
+                       np.float32]
+        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes,
+                                                            input_types, onnx_file, verbose=True)
+
+        sess_options = onnxruntime.SessionOptions()
+        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
+        sess = onnxruntime.InferenceSession(onnx_file, sess_options)
+
+        in_tensors = [inputs, begin_state[0][0], begin_state[0][1],
+                      begin_state[1][0], begin_state[1][1],
+                      begin_state[2][0], begin_state[2][1]]
+        input_dict = dict((sess.get_inputs()[i].name, in_tensors[i].asnumpy()) for i in range(len(in_tensors)))
+        pred = sess.run(None, input_dict)
+
+        assert_almost_equal(out, pred[6])
+        assert_almost_equal(out_state[0][0], pred[0])
+        assert_almost_equal(out_state[0][1], pred[1])
+        assert_almost_equal(out_state[1][0], pred[2])
+        assert_almost_equal(out_state[1][1], pred[3])
+        assert_almost_equal(out_state[2][0], pred[4])
+        assert_almost_equal(out_state[2][1], pred[5])
+
+    finally:
+        shutil.rmtree(tmp_path)
+
+
+@with_seed()
+@pytest.mark.parametrize('model_name', ['ernie_12_768_12'])
+def test_ernie_inference_onnxruntime(tmp_path, model_name):
+    tmp_path = str(tmp_path)
+    try:
+        import gluonnlp as nlp
+        dataset = 'baidu_ernie_uncased'
+        ctx = mx.cpu(0)
+        model, vocab = nlp.model.get_model(
+            name=model_name,
+            ctx=ctx,
+            dataset_name=dataset,
+            pretrained=True,
+            use_pooler=True,
+            use_decoder=False,
+            num_layers = 3,
+            hparam_allow_override = True,
+            use_classifier=False)
+
+        model.hybridize(static_alloc=True)
+
+        batch = 5
+        seq_length = 16
+        # create synthetic test data
+        inputs = mx.nd.random.uniform(0, 17964, shape=(batch, seq_length), dtype='float32')
+        token_types = mx.nd.random.uniform(0, 2, shape=(batch, seq_length), dtype='float32')
+        valid_length = mx.nd.array([seq_length] * batch, dtype='float32')
+
+        seq_encoding, cls_encoding = model(inputs, token_types, valid_length)
+
+        prefix = "%s/ernie" % tmp_path
+        model.export(prefix)
+        sym_file = "%s-symbol.json" % prefix
+        params_file = "%s-0000.params" % prefix
+        onnx_file = "%s.onnx" % prefix
+
+        input_shapes = [(batch, seq_length), (batch, seq_length), (batch,)]
+        input_types = [np.float32, np.float32, np.float32]
+        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes,
+                                                            input_types, onnx_file)
+
+        # create onnxruntime session using the generated onnx file
+        ses_opt = onnxruntime.SessionOptions()
+        ses_opt.log_severity_level = 3
+        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
+
+        seq_encoding, cls_encoding = model(inputs, token_types, valid_length)
+
+        onnx_inputs = [inputs, token_types, valid_length]
+        input_dict = dict((session.get_inputs()[i].name, onnx_inputs[i].asnumpy()) for i in range(len(onnx_inputs)))
+        pred_onx, cls_onx = session.run(None, input_dict)
+
+        assert_almost_equal(seq_encoding, pred_onx)
+        assert_almost_equal(cls_encoding, cls_onx)
+
+    finally:
+        shutil.rmtree(tmp_path)

From 5da7bda94565182f18e3e196ad81adef7d625e62 Mon Sep 17 00:00:00 2001
From: Wei Chu <weichu@amazon.com>
Date: Tue, 23 Mar 2021 13:16:20 -0700
Subject: [PATCH 2/7] pytest with mark

---
 ci/docker/runtime_functions.sh                |  13 +-
 .../python-pytest/onnx/test_onnxruntime_cv.py |  17 +-
 .../onnx/test_onnxruntime_nlp.py              | 169 ++++++++++++++++++
 3 files changed, 180 insertions(+), 19 deletions(-)

diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 3b0270fd1311..e44cffcb81ef 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -1263,17 +1263,8 @@ integrationtest_ubuntu_cpu_onnx() {
     # Skip this as https://github.com/apache/incubator-mxnet/pull/19914 breaks import
     #pytest $COV_ARG --verbose tests/python-pytest/onnx/test_models.py
     #pytest $COV_ARG --verbose tests/python-pytest/onnx/test_node.py
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_nlp.py::test_bert_inference_onnxruntime
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_obj_class_model_inference_onnxruntime[mobilenetv3_large]
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_obj_class_model_inference_onnxruntime[resnest200]
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_obj_class_model_inference_onnxruntime[resnet50_v2]
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_obj_class_model_inference_onnxruntime[vgg19_bn]
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_obj_detection_model_inference_onnxruntime[center_net_resnet101_v1b_voc]
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_img_segmentation_model_inference_onnxruntime[deeplab_resnet50_citys]
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_pose_estimation_model_inference_onnxruntime[mobile_pose_mobilenet1.0]
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_action_recognition_model_inference_onnxruntime[inceptionv3_kinetics400]
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_nlp.py::test_dynamic_shape_bert_inference_onnxruntime
-    pytest $COV_ARG --verbose tests/python-pytest/onnx/test_onnxruntime_cv.py::test_dynamic_shape_cv_inference_onnxruntime
+    pytest $COV_ARG -v -m integrationtest_onnx tests/python-pytest/onnx/test_onnxruntime_cv.py
+    pytest $COV_ARG -v -m integrationtest_onnx tests/python-pytest/onnx/test_onnxruntime_nlp.py
 }
 
 integrationtest_ubuntu_gpu_python() {
diff --git a/tests/python-pytest/onnx/test_onnxruntime_cv.py b/tests/python-pytest/onnx/test_onnxruntime_cv.py
index d2041d58e212..143923ff7ee4 100644
--- a/tests/python-pytest/onnx/test_onnxruntime_cv.py
+++ b/tests/python-pytest/onnx/test_onnxruntime_cv.py
@@ -114,13 +114,13 @@ def obj_class_test_images(tmpdir_factory):
     'mobilenetv2_0.75',
     'mobilenetv2_0.5',
     'mobilenetv2_0.25',
-    'mobilenetv3_large',
+    pytest.param('mobilenetv3_large', marks=pytest.mark.integrationtest_onnx),
     'mobilenetv3_small',
     'resnest14',
     'resnest26',
     'resnest50',
     'resnest101',
-    'resnest200',
+    pytest.param('resnest200', marks=pytest.mark.integrationtest_onnx),
     'resnest269',
     'resnet18_v1',
     'resnet18_v1b_0.89',
@@ -132,7 +132,7 @@ def obj_class_test_images(tmpdir_factory):
     'resnet50_v1d_0.48',
     'resnet50_v1d_0.37',
     'resnet50_v1d_0.11',
-    'resnet50_v2',
+    pytest.param('resnet50_v2', marks=pytest.mark.integrationtest_onnx),
     'resnet101_v1',
     'resnet101_v1d_0.76',
     'resnet101_v1d_0.73',
@@ -155,7 +155,7 @@ def obj_class_test_images(tmpdir_factory):
     'vgg16',
     'vgg16_bn',
     'vgg19',
-    'vgg19_bn',
+    pytest.param('vgg19_bn', marks=pytest.mark.integrationtest_onnx),
     'xception',
     'inceptionv3'
 ])
@@ -218,7 +218,7 @@ def obj_detection_test_images(tmpdir_factory):
 @pytest.mark.parametrize('model', [
     'center_net_resnet18_v1b_voc',
     'center_net_resnet50_v1b_voc',
-    'center_net_resnet101_v1b_voc',
+    pytest.param('center_net_resnet101_v1b_voc', marks=pytest.mark.integrationtest_onnx),
     'center_net_resnet18_v1b_coco',
     'center_net_resnet50_v1b_coco',
     'center_net_resnet101_v1b_coco',
@@ -349,7 +349,7 @@ def img_segmentation_test_images(tmpdir_factory):
     'fcn_resnet101_voc',
     'deeplab_resnet101_voc',
     'deeplab_resnet152_voc',
-    'deeplab_resnet50_citys',
+    pytest.param('deeplab_resnet50_citys', marks=pytest.mark.integrationtest_onnx),
     'deeplab_resnet101_citys',
     'deeplab_v3b_plus_wideresnet_citys'
 ])
@@ -412,7 +412,7 @@ def pose_estimation_test_images(tmpdir_factory):
     'alpha_pose_resnet101_v1b_coco',
     'mobile_pose_resnet18_v1b',
     'mobile_pose_resnet50_v1b',
-    'mobile_pose_mobilenet1.0',
+    pytest.param('mobile_pose_mobilenet1.0', marks=pytest.mark.integrationtest_onnx),
     'mobile_pose_mobilenetv2_1.0',
     'mobile_pose_mobilenetv3_large',
     'mobile_pose_mobilenetv3_small',
@@ -472,7 +472,7 @@ def act_recognition_test_data(tmpdir_factory):
     'resnet50_v1b_hmdb51',
     'resnet50_v1b_sthsthv2',
     'vgg16_ucf101',
-    'inceptionv3_kinetics400',
+    pytest.param('inceptionv3_kinetics400', marks=pytest.mark.integrationtest_onnx),
     'inceptionv3_ucf101',
 ])
 def test_action_recognition_model_inference_onnxruntime(tmp_path, model, act_recognition_test_data):
@@ -507,6 +507,7 @@ def load_video(filepath):
 
 
 @with_seed()
+@pytest.mark.integrationtest_onnx
 @pytest.mark.parametrize('model_name', ['mobilenet1.0', 'inceptionv3', 'darknet53', 'resnest14'])
 def test_dynamic_shape_cv_inference_onnxruntime(tmp_path, model_name):
     tmp_path = str(tmp_path)
diff --git a/tests/python-pytest/onnx/test_onnxruntime_nlp.py b/tests/python-pytest/onnx/test_onnxruntime_nlp.py
index ea47b51001cb..566e64dd14eb 100644
--- a/tests/python-pytest/onnx/test_onnxruntime_nlp.py
+++ b/tests/python-pytest/onnx/test_onnxruntime_nlp.py
@@ -83,6 +83,7 @@ def test_roberta_inference_onnxruntime(tmp_path, model_name):
 
 
 @with_seed()
+@pytest.mark.integrationtest_onnx
 @pytest.mark.parametrize('model', ['bert_12_768_12', 'bert_24_1024_16'])
 def test_bert_inference_onnxruntime(tmp_path, model):
     tmp_path = str(tmp_path)
@@ -238,6 +239,7 @@ def test_standard_rnn_lstm_pretrained_inference_onnxruntime(tmp_path, model_name
 
 
 @with_seed()
+@pytest.mark.integrationtest_onnx
 @pytest.mark.parametrize('model', ['bert_12_768_12'])
 def test_dynamic_shape_bert_inference_onnxruntime(tmp_path, model):
     tmp_path = str(tmp_path)
@@ -425,3 +427,170 @@ def test_ernie_inference_onnxruntime(tmp_path, model_name):
 
     finally:
         shutil.rmtree(tmp_path)
+
+
+@with_seed()
+@pytest.mark.parametrize('model_name', ['transformer_en_de_512'])
+def test_transformer_pretrained_inference_onnxruntime(tmp_path, model_name):
+    tmp_path = str(tmp_path)
+    try:
+        import gluonnlp as nlp
+        dataset = 'WMT2014'
+        ctx = mx.cpu(0)
+        model, _, _ = nlp.model.get_model(
+            name=model_name,
+            ctx=ctx,
+            pretrained=True,
+            dataset_name=dataset)
+
+        model.hybridize(static_alloc=False)
+
+        batch = 7
+        seq_length = 16
+        C_in = 512
+        C_out = 512
+        src = mx.nd.random.uniform(0, 36794, shape=(batch, seq_length), dtype='float32')
+        step_input = mx.nd.random.uniform(0, 36794, shape=(batch,), dtype='float32')
+        src_valid_length = mx.nd.array([seq_length] * batch, dtype='float32')
+
+        encoder_outputs, encoder_additional_outputs = model.encode(src,
+                                                                   valid_length=src_valid_length)
+
+        decoder_states = model.decoder.init_state_from_encoder(encoder_outputs, src_valid_length)
+
+        step_output, states, additional_outputs = model.decode_step(step_input, decoder_states)
+
+        # skip export of 'decoder' as it's used for training only
+        for component in ['encoder', 'one_step_ahead_decoder', 'src_embed', 'tgt_embed',
+                         'tgt_proj']:
+
+            prefix = "%s/%s" %(tmp_path, component)
+            component = getattr(model, component)
+            component.export(prefix)
+            sym_file = "%s-symbol.json" % prefix
+            params_file = "%s-0000.params" % prefix
+            onnx_file = "%s.onnx" % prefix
+
+        def export_to_onnx(prefix, input_shapes, input_types, **kwargs):
+            sym_file = "%s-symbol.json" % prefix
+            params_file = "%s-0000.params" % prefix
+            onnx_file = "%s.onnx" % prefix
+            return mx.contrib.onnx.export_model(sym_file, params_file, input_shapes, input_types,
+                                                onnx_file, **kwargs)
+
+        def onnx_runtime_predict(onnx_file, onnx_inputs):
+            ses_opt = onnxruntime.SessionOptions()
+            ses_opt.log_severity_level = 3
+            session = onnxruntime.InferenceSession(onnx_file, ses_opt)
+            input_dict = dict((session.get_inputs()[i].name, onnx_inputs[i].asnumpy())
+                            for i in range(len(onnx_inputs)))
+            return session.run(None, input_dict)
+
+        def verify_encoder():
+            inputs = mx.nd.random.uniform(-1, 1, shape=(batch, seq_length, C_in), dtype='float32')
+            valid_length = mx.nd.array([seq_length] * batch, dtype='float32')
+            pred = model.encoder(inputs, valid_length=valid_length)
+
+            prefix = "%s/encoder" %tmp_path
+            input_shapes = [(batch, seq_length, C_in), (batch,)]
+            input_types = [np.float32, np.float32]
+            onnx_file = export_to_onnx(prefix, input_shapes, input_types)
+            onnx_inputs = [inputs, valid_length]
+            pred_onx = onnx_runtime_predict(onnx_file, onnx_inputs)
+
+            assert_almost_equal(pred[0], pred_onx[0])
+
+        def verify_src_embed():
+            src = mx.nd.random.uniform(0, 36794, shape=(batch, seq_length), dtype='float32')
+            pred = model.src_embed(src)
+
+            prefix = "%s/src_embed" %tmp_path
+            input_shapes = [(batch, seq_length)]
+            input_types = [np.float32]
+            onnx_file = export_to_onnx(prefix, input_shapes, input_types)
+            onnx_inputs = [src]
+            pred_onx = onnx_runtime_predict(onnx_file, onnx_inputs)
+
+            assert_almost_equal(pred, pred_onx[0])
+
+        def verify_tgt_embed():
+            tgt = mx.nd.random.uniform(0, 36794, shape=(batch, seq_length), dtype='float32')
+            pred = model.tgt_embed(tgt)
+
+            prefix = "%s/tgt_embed" %tmp_path
+            input_shapes = [(batch, seq_length)]
+            input_types = [np.float32]
+            onnx_file = export_to_onnx(prefix, input_shapes, input_types)
+            onnx_inputs = [tgt]
+            pred_onx = onnx_runtime_predict(onnx_file, onnx_inputs)
+
+            assert_almost_equal(pred, pred_onx[0])
+
+        def verify_tgt_proj():
+            decoder_out = mx.nd.random.uniform(0, 512, shape=(batch, seq_length, C_out),
+                                               dtype='float32')
+            pred = model.tgt_proj(decoder_out)
+
+            prefix = "%s/tgt_proj" %tmp_path
+            input_shapes = [(batch, seq_length, C_out)]
+            input_types = [np.float32]
+            onnx_file = export_to_onnx(prefix, input_shapes, input_types)
+            onnx_inputs = [decoder_out]
+            pred_onx = onnx_runtime_predict(onnx_file, onnx_inputs)
+
+            assert_almost_equal(pred, pred_onx[0], rtol=1.e-04, atol=1.5e-03)
+
+        def verify_one_step_ahead_decoder():
+            prefix = "%s/one_step_ahead_decoder" %tmp_path
+
+            # the input data order
+            perm = [2, 0, 1]
+            input_shapes = [(batch, seq_length, C_in), (batch, seq_length, C_out),
+                            (batch, seq_length)]
+            input_shapes = [input_shapes[i] for i in perm]
+            dynamic_input_shapes = [(batch, 'seq_length', C_in), (batch, 'seq_length', C_out),
+                                    (batch, 'seq_length')]
+            dynamic_input_shapes = [dynamic_input_shapes[i] for i in perm]
+            input_types = [np.float32, np.float32, np.float32]
+            # do a dynamic export
+            onnx_file = export_to_onnx(prefix, input_shapes, input_types, dynamic=True,
+                                       dynamic_input_shapes=dynamic_input_shapes)
+
+            # step 0
+            step_input = mx.nd.random.uniform(-1, 1, shape=(batch, C_in), dtype='float32')
+            # mxnet
+            pred, step_states, _ = model.one_step_ahead_decoder(step_input, decoder_states)
+            # onnx
+            # note that we need to expand the sequence axis just like in here:
+            # https://github.com/dmlc/gluon-nlp/blob/v0.10.x/src/gluonnlp/model/transformer.py#L831
+            input_onx = mx.nd.expand_dims(step_input, axis=1)
+            onnx_inputs = [input_onx, decoder_states[0], decoder_states[1]]
+            onnx_inputs = [onnx_inputs[i] for i in perm]
+            pred_onx = onnx_runtime_predict(onnx_file, onnx_inputs)
+
+            assert_almost_equal(pred, pred_onx[0])
+
+            # step >= 1
+            for i in range(20):
+                step_input = mx.nd.random.uniform(-10*i, 10*i, shape=(batch, C_in), dtype='float32')
+                # mxnet
+                pred, step_states, _ = model.one_step_ahead_decoder(step_input, step_states)
+                # onnx
+                # note that we need to concat the step_input with the previous inpus
+                # just like in here:
+                # https://github.com/dmlc/gluon-nlp/blob/v0.10.x/src/gluonnlp/model/transformer.py#L828
+                input_onx = mx.nd.concat(input_onx, mx.nd.expand_dims(step_input, axis=1), dim=1)
+                onnx_inputs = [input_onx, decoder_states[0], decoder_states[1]]
+                onnx_inputs = [onnx_inputs[i] for i in perm]
+                pred_onx = onnx_runtime_predict(onnx_file, onnx_inputs)
+
+                assert_almost_equal(pred, pred_onx[0])
+
+        verify_encoder()
+        verify_src_embed()
+        verify_tgt_embed()
+        verify_tgt_proj()
+        verify_one_step_ahead_decoder()
+
+    finally:
+        shutil.rmtree(tmp_path)
\ No newline at end of file

From 5fb103076c735fbb93b08a21274351610da8802e Mon Sep 17 00:00:00 2001
From: Wei Chu <weichu@amazon.com>
Date: Tue, 23 Mar 2021 15:34:05 -0700
Subject: [PATCH 3/7] add pytest.ini

---
 pytest.ini | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 pytest.ini

diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 000000000000..a2481552b8e4
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[pytest]
+markers =
+    seed: set the python, numpy and mxnet random seeds to a specified value for test reproducibility
+    serial: mark a test that requires more resources to run that are thus only suitable for serial run.
+    remote_required: mark a test that requires internet access.
+    gpu: mark a test that requires GPU.
+    integration: mark an integration test
+    onnx_coverage: ONNX coverage test
+    garbage_expected: this test leaks ndarray references. The tested functionality is broken or there is a Python bug.
+    integrationtest_onnx: mark a test for onnx integration test in CI
+
+env =
+    MXNET_HOME=tests/data
+
+timeout = 1200
+faulthandler_timeout = 1200
\ No newline at end of file

From 005ef529b15707348dd7715d1129f28375b9af99 Mon Sep 17 00:00:00 2001
From: Wei Chu <weichu@amazon.com>
Date: Tue, 23 Mar 2021 20:11:37 -0700
Subject: [PATCH 4/7] use integration tag

---
 ci/docker/runtime_functions.sh                 |  4 ++--
 pytest.ini                                     |  1 -
 .../python-pytest/onnx/test_onnxruntime_cv.py  | 18 +++++++++---------
 .../python-pytest/onnx/test_onnxruntime_nlp.py |  4 ++--
 4 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index e44cffcb81ef..7f11151bdca1 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -1263,8 +1263,8 @@ integrationtest_ubuntu_cpu_onnx() {
     # Skip this as https://github.com/apache/incubator-mxnet/pull/19914 breaks import
     #pytest $COV_ARG --verbose tests/python-pytest/onnx/test_models.py
     #pytest $COV_ARG --verbose tests/python-pytest/onnx/test_node.py
-    pytest $COV_ARG -v -m integrationtest_onnx tests/python-pytest/onnx/test_onnxruntime_cv.py
-    pytest $COV_ARG -v -m integrationtest_onnx tests/python-pytest/onnx/test_onnxruntime_nlp.py
+    pytest $COV_ARG -v -m integration tests/python-pytest/onnx/test_onnxruntime_cv.py
+    pytest $COV_ARG -v -m integration tests/python-pytest/onnx/test_onnxruntime_nlp.py
 }
 
 integrationtest_ubuntu_gpu_python() {
diff --git a/pytest.ini b/pytest.ini
index a2481552b8e4..55783a14ffa6 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -24,7 +24,6 @@ markers =
     integration: mark an integration test
     onnx_coverage: ONNX coverage test
     garbage_expected: this test leaks ndarray references. The tested functionality is broken or there is a Python bug.
-    integrationtest_onnx: mark a test for onnx integration test in CI
 
 env =
     MXNET_HOME=tests/data
diff --git a/tests/python-pytest/onnx/test_onnxruntime_cv.py b/tests/python-pytest/onnx/test_onnxruntime_cv.py
index 143923ff7ee4..4e455147dcdd 100644
--- a/tests/python-pytest/onnx/test_onnxruntime_cv.py
+++ b/tests/python-pytest/onnx/test_onnxruntime_cv.py
@@ -114,13 +114,13 @@ def obj_class_test_images(tmpdir_factory):
     'mobilenetv2_0.75',
     'mobilenetv2_0.5',
     'mobilenetv2_0.25',
-    pytest.param('mobilenetv3_large', marks=pytest.mark.integrationtest_onnx),
+    pytest.param('mobilenetv3_large', marks=pytest.mark.integration),
     'mobilenetv3_small',
     'resnest14',
     'resnest26',
     'resnest50',
     'resnest101',
-    pytest.param('resnest200', marks=pytest.mark.integrationtest_onnx),
+    pytest.param('resnest200', marks=pytest.mark.integration),
     'resnest269',
     'resnet18_v1',
     'resnet18_v1b_0.89',
@@ -132,7 +132,7 @@ def obj_class_test_images(tmpdir_factory):
     'resnet50_v1d_0.48',
     'resnet50_v1d_0.37',
     'resnet50_v1d_0.11',
-    pytest.param('resnet50_v2', marks=pytest.mark.integrationtest_onnx),
+    pytest.param('resnet50_v2', marks=pytest.mark.integration),
     'resnet101_v1',
     'resnet101_v1d_0.76',
     'resnet101_v1d_0.73',
@@ -155,7 +155,7 @@ def obj_class_test_images(tmpdir_factory):
     'vgg16',
     'vgg16_bn',
     'vgg19',
-    pytest.param('vgg19_bn', marks=pytest.mark.integrationtest_onnx),
+    pytest.param('vgg19_bn', marks=pytest.mark.integration),
     'xception',
     'inceptionv3'
 ])
@@ -218,7 +218,7 @@ def obj_detection_test_images(tmpdir_factory):
 @pytest.mark.parametrize('model', [
     'center_net_resnet18_v1b_voc',
     'center_net_resnet50_v1b_voc',
-    pytest.param('center_net_resnet101_v1b_voc', marks=pytest.mark.integrationtest_onnx),
+    pytest.param('center_net_resnet101_v1b_voc', marks=pytest.mark.integration),
     'center_net_resnet18_v1b_coco',
     'center_net_resnet50_v1b_coco',
     'center_net_resnet101_v1b_coco',
@@ -349,7 +349,7 @@ def img_segmentation_test_images(tmpdir_factory):
     'fcn_resnet101_voc',
     'deeplab_resnet101_voc',
     'deeplab_resnet152_voc',
-    pytest.param('deeplab_resnet50_citys', marks=pytest.mark.integrationtest_onnx),
+    pytest.param('deeplab_resnet50_citys', marks=pytest.mark.integration),
     'deeplab_resnet101_citys',
     'deeplab_v3b_plus_wideresnet_citys'
 ])
@@ -412,7 +412,7 @@ def pose_estimation_test_images(tmpdir_factory):
     'alpha_pose_resnet101_v1b_coco',
     'mobile_pose_resnet18_v1b',
     'mobile_pose_resnet50_v1b',
-    pytest.param('mobile_pose_mobilenet1.0', marks=pytest.mark.integrationtest_onnx),
+    pytest.param('mobile_pose_mobilenet1.0', marks=pytest.mark.integration),
     'mobile_pose_mobilenetv2_1.0',
     'mobile_pose_mobilenetv3_large',
     'mobile_pose_mobilenetv3_small',
@@ -472,7 +472,7 @@ def act_recognition_test_data(tmpdir_factory):
     'resnet50_v1b_hmdb51',
     'resnet50_v1b_sthsthv2',
     'vgg16_ucf101',
-    pytest.param('inceptionv3_kinetics400', marks=pytest.mark.integrationtest_onnx),
+    pytest.param('inceptionv3_kinetics400', marks=pytest.mark.integration),
     'inceptionv3_ucf101',
 ])
 def test_action_recognition_model_inference_onnxruntime(tmp_path, model, act_recognition_test_data):
@@ -507,7 +507,7 @@ def load_video(filepath):
 
 
 @with_seed()
-@pytest.mark.integrationtest_onnx
+@pytest.mark.integration
 @pytest.mark.parametrize('model_name', ['mobilenet1.0', 'inceptionv3', 'darknet53', 'resnest14'])
 def test_dynamic_shape_cv_inference_onnxruntime(tmp_path, model_name):
     tmp_path = str(tmp_path)
diff --git a/tests/python-pytest/onnx/test_onnxruntime_nlp.py b/tests/python-pytest/onnx/test_onnxruntime_nlp.py
index 566e64dd14eb..ac1ad8171dde 100644
--- a/tests/python-pytest/onnx/test_onnxruntime_nlp.py
+++ b/tests/python-pytest/onnx/test_onnxruntime_nlp.py
@@ -83,7 +83,7 @@ def test_roberta_inference_onnxruntime(tmp_path, model_name):
 
 
 @with_seed()
-@pytest.mark.integrationtest_onnx
+@pytest.mark.integration
 @pytest.mark.parametrize('model', ['bert_12_768_12', 'bert_24_1024_16'])
 def test_bert_inference_onnxruntime(tmp_path, model):
     tmp_path = str(tmp_path)
@@ -239,7 +239,7 @@ def test_standard_rnn_lstm_pretrained_inference_onnxruntime(tmp_path, model_name
 
 
 @with_seed()
-@pytest.mark.integrationtest_onnx
+@pytest.mark.integration
 @pytest.mark.parametrize('model', ['bert_12_768_12'])
 def test_dynamic_shape_bert_inference_onnxruntime(tmp_path, model):
     tmp_path = str(tmp_path)

From f61f6ef3a4248bb541a7fbb32601db3c0039a0a5 Mon Sep 17 00:00:00 2001
From: Wei Chu <weichu@amazon.com>
Date: Fri, 26 Mar 2021 14:20:06 -0700
Subject: [PATCH 5/7] remove test_onnxruntime.py

---
 tests/python-pytest/onnx/test_onnxruntime.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 tests/python-pytest/onnx/test_onnxruntime.py

diff --git a/tests/python-pytest/onnx/test_onnxruntime.py b/tests/python-pytest/onnx/test_onnxruntime.py
deleted file mode 100644
index e69de29bb2d1..000000000000

From c97b141335f581b7690e44b7aa30ceb55d3cfff5 Mon Sep 17 00:00:00 2001
From: Wei Chu <weichu@amazon.com>
Date: Fri, 26 Mar 2021 14:24:05 -0700
Subject: [PATCH 6/7] add test gpt

---
 .../onnx/test_onnxruntime_nlp.py              | 77 ++++++++++++++++++-
 1 file changed, 76 insertions(+), 1 deletion(-)

diff --git a/tests/python-pytest/onnx/test_onnxruntime_nlp.py b/tests/python-pytest/onnx/test_onnxruntime_nlp.py
index ac1ad8171dde..b566369d5233 100644
--- a/tests/python-pytest/onnx/test_onnxruntime_nlp.py
+++ b/tests/python-pytest/onnx/test_onnxruntime_nlp.py
@@ -593,4 +593,79 @@ def verify_one_step_ahead_decoder():
         verify_one_step_ahead_decoder()
 
     finally:
-        shutil.rmtree(tmp_path)
\ No newline at end of file
+        shutil.rmtree(tmp_path)
+
+
+@with_seed()
+@pytest.mark.parametrize('model_params', [('gpt2_117m', 24), ('gpt2_345m', 48)])
+def test_gpt_pretrained_inference_onnxruntime(tmp_path, model_params):
+    tmp_path = str(tmp_path)
+    try:
+        import gluonnlp as nlp
+        import urllib.request
+        from zipfile import ZipFile
+        import importlib.util
+        import sys
+
+        url = 'https://nlp.gluon.ai/_downloads/77d227fbc8f1613e6802acc7253cc090/text_generation.zip'
+        urllib.request.urlretrieve(url, tmp_path + 'text_generation.zip')
+
+        with ZipFile(tmp_path + 'text_generation.zip', 'r') as zipObj:
+            zipObj.extractall(tmp_path)
+
+        # load in the text_generation module, refer to:
+        # https://github.com/dmlc/gluon-nlp/tree/v0.10.x/scripts/text_generation
+        spec = importlib.util.spec_from_file_location(
+            'text_generation',
+            tmp_path + '/text_generation/__init__.py')
+        mod = importlib.util.module_from_spec(spec)
+        sys.modules[spec.name] = mod
+        spec.loader.exec_module(mod)
+
+        ctx = mx.cpu(0)
+        model_name= model_params[0]
+        dataset= 'openai_webtext'
+        # get_model() is overridden in here:
+        # https://github.com/dmlc/gluon-nlp/blob/v0.10.x/scripts/text_generation/model/__init__.py#L23
+        model, _ = mod.model.get_model(
+            name=model_name,
+            ctx=ctx,
+            pretrained=True,
+            dataset_name=dataset)
+
+        model.hybridize()
+
+        batch = 4
+        seq_length = 64
+        inputs = mx.nd.random.uniform(0, 50257, shape=(batch, seq_length), dtype='float32',
+                                      ctx=ctx)
+
+        pred = model(inputs)
+
+        prefix = "%s/%s" % (tmp_path, model_name)
+        model.export(prefix)
+        sym_file = "%s-symbol.json" % prefix
+        params_file = "%s-0000.params" % prefix
+        onnx_file = "%s.onnx" % prefix
+
+        input_shapes = [(batch, seq_length)]
+        input_types = [np.float32]
+        converted_model_path = mx.contrib.onnx.export_model(sym_file, params_file, input_shapes,
+                                                            input_types, onnx_file)
+
+        ses_opt = onnxruntime.SessionOptions()
+        ses_opt.log_severity_level = 3
+        session = onnxruntime.InferenceSession(onnx_file, ses_opt)
+        onnx_inputs = [inputs]
+        input_dict = dict((session.get_inputs()[i].name, onnx_inputs[i].asnumpy()) for i in range(len(onnx_inputs)))
+        pred_onx = session.run(None, input_dict)
+
+        # check output
+        assert_almost_equal(pred[0], pred_onx[0])
+        # check states
+        num_states = model_params[1]
+        for i in range(num_states):
+            assert_almost_equal(pred[1][i], pred_onx[i+1])
+
+    finally:
+        shutil.rmtree(tmp_path)

From 8a071654f5ed77cf3ad25bfc31238c8051eacc8c Mon Sep 17 00:00:00 2001
From: Wei Chu <weichu@amazon.com>
Date: Fri, 26 Mar 2021 14:26:46 -0700
Subject: [PATCH 7/7] fix shape

---
 tests/python-pytest/onnx/test_onnxruntime_nlp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python-pytest/onnx/test_onnxruntime_nlp.py b/tests/python-pytest/onnx/test_onnxruntime_nlp.py
index b566369d5233..ecd94df1e630 100644
--- a/tests/python-pytest/onnx/test_onnxruntime_nlp.py
+++ b/tests/python-pytest/onnx/test_onnxruntime_nlp.py
@@ -189,7 +189,7 @@ def test_distilbert_inference_onnxruntime(tmp_path, model_name):
 @with_seed()
 @pytest.mark.parametrize('model_name', [('standard_lstm_lm_200', 200), ('standard_lstm_lm_650', 650),
                                         ('standard_lstm_lm_1500', 1500)])
-@pytest.mark.parametrize('seq_length', [16, 32])
+@pytest.mark.parametrize('seq_length', [64, 128])
 def test_standard_rnn_lstm_pretrained_inference_onnxruntime(tmp_path, model_name, seq_length):
     try:
         import gluonnlp as nlp