diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..2688d24bc --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "third_party/models"] + path = tftrt/examples/object_detection/third_party/models + url = https://github.com/tensorflow/models +[submodule "third_party/cocoapi"] + path = tftrt/examples/object_detection/third_party/cocoapi + url = https://github.com/cocodataset/cocoapi diff --git a/README.md b/README.md index 57e3269b2..90394bad2 100644 --- a/README.md +++ b/README.md @@ -1 +1,10 @@ -Coming soon: Examples using [NVIDIA TensorRT](https://developer.nvidia.com/tensorrt) in TensorFlow. +# TensorRT Integration in TensorFlow + +This repository demonstrates TensorRT integration in TensorFlow. Currently +it contains examples for accelerated image classification and object +detection. + + +## Examples + +* [Object Detection](tftrt/examples/object_detection) diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..a727bc46e --- /dev/null +++ b/setup.py @@ -0,0 +1,27 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +from setuptools import find_packages, setup + +setup( + name='tftrt', + version='0.0', + description='NVIDIA TensorRT integration in TensorFlow', + author='NVIDIA', + packages=find_packages(), + install_requires=['tqdm'] +) diff --git a/tftrt/__init__.py b/tftrt/__init__.py new file mode 100644 index 000000000..04285a017 --- /dev/null +++ b/tftrt/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= diff --git a/tftrt/examples/__init__.py b/tftrt/examples/__init__.py new file mode 100644 index 000000000..04285a017 --- /dev/null +++ b/tftrt/examples/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= diff --git a/tftrt/examples/object_detection/README.md b/tftrt/examples/object_detection/README.md new file mode 100644 index 000000000..89dead0d2 --- /dev/null +++ b/tftrt/examples/object_detection/README.md @@ -0,0 +1,138 @@ +# TensorRT / TensorFlow Object Detection + +This package demonstrated object detection using TensorRT integration in TensorFlow. +It includes utilities for accuracy and performance benchmarking, along with +utilities for model construction and optimization. + +* [Setup](#setup) +* [Download](#od_download) +* [Optimize](#od_optimize) +* [Benchmark](#od_benchmark) +* [Test](#od_test) + + +## Setup + +1. Install object detection dependencies (from tftrt/examples/object_detection) + +```bash +git submodule update --init +./install_dependencies.sh +``` + +2. Ensure you've installed the tftrt package (from root folder of repository) + +```bash +python setup.py install --user +``` + + +## Object Detection + + +### Download +```python +from tftrt.examples.object_detection import download_model + +config_path, checkpoint_path = download_model('ssd_mobilenet_v1_coco', output_dir='models') +# help(download_model) for more +``` + + +### Optimize + +```python +from tftrt.examples.object_detection import optimize_model + +frozen_graph = optimize_model( + config_path=config_path, + checkpoint_path=checkpoint_path, + use_trt=True, + precision_mode='FP16' +) +# help(optimize_model) for other parameters +``` + + +### Benchmark + +First, we download the validation dataset + +```python +from tftrt.examples.object_detection import download_dataset + +images_dir, annotation_path = download_dataset('val2014', output_dir='dataset') +# help(download_dataset) for more +``` + +Next, we run inference over the dataset to benchmark the optimized model + +```python +from tftrt.examples.object_detection import benchmark_model + +statistics = benchmark_model( + frozen_graph=frozen_graph, + images_dir=images_dir, + annotation_path=annotation_path +) +# help(benchmark_model) for more parameters +``` + + +### Test +To simplify evaluation of different models with different optimization parameters +we include a ``test`` function that ingests a JSON file containing test arguments +and combines the model download, optimization, and benchmark steps. Below is an +example JSON file, call it ``my_test.json`` + +```json +{ + "source_model": { + "model_name": "ssd_inception_v2_coco", + "output_dir": "models" + }, + "optimization_config": { + "use_trt": true, + "precision_mode": "FP16", + "force_nms_cpu": true, + "replace_relu6": true, + "remove_assert": true, + "override_nms_score_threshold": 0.3, + "max_batch_size": 1 + }, + "benchmark_config": { + "images_dir": "coco/val2017", + "annotation_path": "coco/annotations/instances_val2017.json", + "batch_size": 1, + "image_shape": [600, 600], + "num_images": 4096, + "output_path": "stats/ssd_inception_v2_coco_trt_fp16.json" + }, + "assertions": [ + "statistics['map'] > (0.268 - 0.005)" + ] +} +``` + +We execute the test using the ``test`` python function + +```python +from tftrt.examples.object_detection import test + +test('my_test.json') +# help(test) for more details +``` + +Alternatively, we can directly call the object_detection.test module, which +is configured to execute this function by default. + +```shell +python -m tftrt.examples.object_detection.test my_test.json +``` + +For the example configuration shown above, the following steps will be performed + +1. Downloads ssd_inception_v2_coco +2. Optimizes with TensorRT and FP16 precision +3. Benchmarks against the MSCOCO 2017 validation dataset +4. Asserts that the MAP is greater than some reference value diff --git a/tftrt/examples/object_detection/__init__.py b/tftrt/examples/object_detection/__init__.py new file mode 100644 index 000000000..d7675e24e --- /dev/null +++ b/tftrt/examples/object_detection/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +from .object_detection import download_model, download_dataset, optimize_model, benchmark_model +from .test import test diff --git a/tftrt/examples/object_detection/graph_utils.py b/tftrt/examples/object_detection/graph_utils.py new file mode 100644 index 000000000..775127abb --- /dev/null +++ b/tftrt/examples/object_detection/graph_utils.py @@ -0,0 +1,108 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +import tensorflow as tf + + +def make_const6(const6_name='const6'): + graph = tf.Graph() + with graph.as_default(): + tf_6 = tf.constant(dtype=tf.float32, value=6.0, name=const6_name) + return graph.as_graph_def() + + +def make_relu6(output_name, input_name, const6_name='const6'): + graph = tf.Graph() + with graph.as_default(): + tf_x = tf.placeholder(tf.float32, [10, 10], name=input_name) + tf_6 = tf.constant(dtype=tf.float32, value=6.0, name=const6_name) + with tf.name_scope(output_name): + tf_y1 = tf.nn.relu(tf_x, name='relu1') + tf_y2 = tf.nn.relu(tf.subtract(tf_x, tf_6, name='sub1'), name='relu2') + + #tf_y = tf.nn.relu(tf.subtract(tf_6, tf.nn.relu(tf_x, name='relu1'), name='sub'), name='relu2') + #tf_y = tf.subtract(tf_6, tf_y, name=output_name) + tf_y = tf.subtract(tf_y1, tf_y2, name=output_name) + + graph_def = graph.as_graph_def() + graph_def.node[-1].name = output_name + + # remove unused nodes + for node in graph_def.node: + if node.name == input_name: + graph_def.node.remove(node) + for node in graph_def.node: + if node.name == const6_name: + graph_def.node.remove(node) + for node in graph_def.node: + if node.op == '_Neg': + node.op = 'Neg' + + return graph_def + + +def convert_relu6(graph_def, const6_name='const6'): + # add constant 6 + has_const6 = False + for node in graph_def.node: + if node.name == const6_name: + has_const6 = True + if not has_const6: + const6_graph_def = make_const6(const6_name=const6_name) + graph_def.node.extend(const6_graph_def.node) + + for node in graph_def.node: + if node.op == 'Relu6': + input_name = node.input[0] + output_name = node.name + relu6_graph_def = make_relu6(output_name, input_name, const6_name=const6_name) + graph_def.node.remove(node) + graph_def.node.extend(relu6_graph_def.node) + + return graph_def + + +def remove_node(graph_def, node): + for n in graph_def.node: + if node.name in n.input: + n.input.remove(node.name) + ctrl_name = '^' + node.name + if ctrl_name in n.input: + n.input.remove(ctrl_name) + graph_def.node.remove(node) + + +def remove_op(graph_def, op_name): + matches = [node for node in graph_def.node if node.op == op_name] + for match in matches: + remove_node(graph_def, match) + + +def force_nms_cpu(frozen_graph): + for node in frozen_graph.node: + if 'NonMaxSuppression' in node.name: + node.device = '/device:CPU:0' + return frozen_graph + + +def replace_relu6(frozen_graph): + return convert_relu6(frozen_graph) + + +def remove_assert(frozen_graph): + remove_op(frozen_graph, 'Assert') + return frozen_graph diff --git a/tftrt/examples/object_detection/install_dependencies.sh b/tftrt/examples/object_detection/install_dependencies.sh new file mode 100755 index 000000000..0f55d90db --- /dev/null +++ b/tftrt/examples/object_detection/install_dependencies.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +TF_MODELS_DIR=third_party/models +COCO_API_DIR=third_party/cocoapi + +python -V 2>&1 | grep "Python 3" || \ + ( export DEBIAN_FRONTEND=noninteractive && \ + apt-get update && \ + apt-get install -y --no-install-recommends python-tk ) + +RESEARCH_DIR=$TF_MODELS_DIR/research +SLIM_DIR=$RESEARCH_DIR/slim +PYCOCO_DIR=$COCO_API_DIR/PythonAPI + +pushd $RESEARCH_DIR + +# GET PROTOC 3.5 + +BASE_URL="https://github.com/google/protobuf/releases/download/v3.5.1/" +PROTOC_DIR=protoc +PROTOC_EXE=$PROTOC_DIR/bin/protoc + +mkdir -p $PROTOC_DIR +pushd $PROTOC_DIR +ARCH=$(uname -m) +if [ "$ARCH" == "aarch64" ] ; then + filename="protoc-3.5.1-linux-aarch_64.zip" +elif [ "$ARCH" == "x86_64" ] ; then + filename="protoc-3.5.1-linux-x86_64.zip" +else + echo ERROR: $ARCH not supported. + exit 1; +fi +wget --no-check-certificate ${BASE_URL}${filename} +unzip ${filename} +popd + +# BUILD PROTOBUF FILES +$PROTOC_EXE object_detection/protos/*.proto --python_out=. + +# INSTALL OBJECT DETECTION + +pip install -e . + +popd + +pushd $SLIM_DIR +pip install -e . +popd + +# INSTALL PYCOCOTOOLS + +pushd $PYCOCO_DIR +pip install -e . +popd diff --git a/tftrt/examples/object_detection/object_detection.py b/tftrt/examples/object_detection/object_detection.py new file mode 100644 index 000000000..38e86a300 --- /dev/null +++ b/tftrt/examples/object_detection/object_detection.py @@ -0,0 +1,632 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + + +from __future__ import absolute_import + +import tensorflow as tf +import tensorflow.contrib.tensorrt as trt +import tqdm +import pdb + +from collections import namedtuple +from PIL import Image +import numpy as np +import time +import json +import subprocess +import os +import glob + +from .graph_utils import force_nms_cpu as f_force_nms_cpu +from .graph_utils import replace_relu6 as f_replace_relu6 +from .graph_utils import remove_assert as f_remove_assert + +from google.protobuf import text_format +from object_detection.protos import pipeline_pb2, image_resizer_pb2 +from object_detection import exporter + +Model = namedtuple('Model', ['name', 'url', 'extract_dir']) + +INPUT_NAME = 'image_tensor' +BOXES_NAME = 'detection_boxes' +CLASSES_NAME = 'detection_classes' +SCORES_NAME = 'detection_scores' +MASKS_NAME = 'detection_masks' +NUM_DETECTIONS_NAME = 'num_detections' +FROZEN_GRAPH_NAME = 'frozen_inference_graph.pb' +PIPELINE_CONFIG_NAME = 'pipeline.config' +CHECKPOINT_PREFIX = 'model.ckpt' + +MODELS = { + 'ssd_mobilenet_v1_coco': + Model( + 'ssd_mobilenet_v1_coco', + 'http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz', + 'ssd_mobilenet_v1_coco_2018_01_28', + ), + 'ssd_mobilenet_v1_0p75_depth_quantized_coco': + Model( + 'ssd_mobilenet_v1_0p75_depth_quantized_coco', + 'http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_0.75_depth_quantized_300x300_coco14_sync_2018_07_18.tar.gz', + 'ssd_mobilenet_v1_0.75_depth_quantized_300x300_coco14_sync_2018_07_18' + ), + 'ssd_mobilenet_v1_ppn_coco': + Model( + 'ssd_mobilenet_v1_ppn_coco', + 'http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_ppn_shared_box_predictor_300x300_coco14_sync_2018_07_03.tar.gz', + 'ssd_mobilenet_v1_ppn_shared_box_predictor_300x300_coco14_sync_2018_07_03' + ), + 'ssd_mobilenet_v1_fpn_coco': + Model( + 'ssd_mobilenet_v1_fpn_coco', + 'http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz', + 'ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03' + ), + 'ssd_mobilenet_v2_coco': + Model( + 'ssd_mobilenet_v2_coco', + 'http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz', + 'ssd_mobilenet_v2_coco_2018_03_29', + ), + 'ssdlite_mobilenet_v2_coco': + Model( + 'ssdlite_mobilenet_v2_coco', + 'http://download.tensorflow.org/models/object_detection/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz', + 'ssdlite_mobilenet_v2_coco_2018_05_09'), + 'ssd_inception_v2_coco': + Model( + 'ssd_inception_v2_coco', + 'http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2018_01_28.tar.gz', + 'ssd_inception_v2_coco_2018_01_28', + ), + 'ssd_resnet_50_fpn_coco': + Model( + 'ssd_resnet_50_fpn_coco', + 'http://download.tensorflow.org/models/object_detection/ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz', + 'ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03', + ), + 'faster_rcnn_resnet50_coco': + Model( + 'faster_rcnn_resnet50_coco', + 'http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2018_01_28.tar.gz', + 'faster_rcnn_resnet50_coco_2018_01_28', + ), + 'faster_rcnn_nas': + Model( + 'faster_rcnn_nas', + 'http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_coco_2018_01_28.tar.gz', + 'faster_rcnn_nas_coco_2018_01_28', + ), + 'mask_rcnn_resnet50_atrous_coco': + Model( + 'mask_rcnn_resnet50_atrous_coco', + 'http://download.tensorflow.org/models/object_detection/mask_rcnn_resnet50_atrous_coco_2018_01_28.tar.gz', + 'mask_rcnn_resnet50_atrous_coco_2018_01_28', + ), + 'facessd_mobilenet_v2_quantized_open_image_v4': + Model( + 'facessd_mobilenet_v2_quantized_open_image_v4', + 'http://download.tensorflow.org/models/object_detection/facessd_mobilenet_v2_quantized_320x320_open_image_v4.tar.gz', + 'facessd_mobilenet_v2_quantized_320x320_open_image_v4') +} + +Dataset = namedtuple( + 'Dataset', + ['images_url', 'images_dir', 'annotation_url', 'annotation_path']) + +DATASETS = { + 'val2014': + Dataset( + 'http://images.cocodataset.org/zips/val2014.zip', 'val2014', + 'http://images.cocodataset.org/annotations/annotations_trainval2014.zip', + 'annotations/instances_val2014.json'), + 'train2014': + Dataset( + 'http://images.cocodataset.org/zips/train2014.zip', 'train2014', + 'http://images.cocodataset.org/annotations/annotations_trainval2014.zip', + 'annotations/instances_train2014.json'), + 'val2017': + Dataset( + 'http://images.cocodataset.org/zips/val2017.zip', 'val2017', + 'http://images.cocodataset.org/annotations/annotations_trainval2017.zip', + 'annotations/instances_val2017.json'), + 'train2017': + Dataset( + 'http://images.cocodataset.org/zips/train2017.zip', 'train2017', + 'http://images.cocodataset.org/annotations/annotations_trainval2017.zip', + 'annotations/instances_train2017.json') +} + + +def download_model(model_name, output_dir='.'): + """Downloads a model from the TensorFlow Object Detection API + + Downloads a model from the TensorFlow Object Detection API to a specific + output directory. The download will be skipped if an existing directory + for the selected model already found under output_dir. + + Args + ---- + model_name: A string representing the model to download. This must be + one of the keys in the module variable + ``trt_samples.object_detection.MODELS``. + output_dir: A string representing the directory to download the model + under. A directory for the specified model will be created at + ``output_dir/``. If output_dir/ + already exists, then the download will be skipped. + + Returns + ------- + config_path: A string representing the path to the object detection + pipeline configuration file of the downloaded model. + checkpoint_path: A string representing the path to the object detection + model checkpoint. + """ + global MODELS + + model_name + + model = MODELS[model_name] + + # make output directory if it doesn't exist + subprocess.call(['mkdir', '-p', output_dir]) + + tar_file = os.path.join(output_dir, os.path.basename(model.url)) + + config_path = os.path.join(output_dir, model.extract_dir, + PIPELINE_CONFIG_NAME) + checkpoint_path = os.path.join(output_dir, model.extract_dir, + CHECKPOINT_PREFIX) + + extract_dir = os.path.join(output_dir, model.extract_dir) + if os.path.exists(extract_dir): + print('Using cached model found at: %s' % extract_dir) + else: + subprocess.call(['wget', model.url, '-O', tar_file]) + subprocess.call(['tar', '-xzf', tar_file, '-C', output_dir]) + + # hack fix to handle mobilenet_v2 config bug + subprocess.call(['sed', '-i', '/batch_norm_trainable/d', config_path]) + + return config_path, checkpoint_path + + +def optimize_model(config_path, + checkpoint_path, + use_trt=True, + force_nms_cpu=True, + replace_relu6=True, + remove_assert=True, + override_nms_score_threshold=None, + override_resizer_shape=None, + max_batch_size=1, + precision_mode='FP32', + minimum_segment_size=50, + max_workspace_size_bytes=1 << 25, + calib_images_dir=None, + num_calib_images=None, + calib_image_shape=None, + tmp_dir='.optimize_model_tmp_dir', + remove_tmp_dir=True, + output_path=None): + """Optimizes an object detection model using TensorRT + + Optimizes an object detection model using TensorRT. This method also + performs pre-tensorrt optimizations specific to the TensorFlow object + detection API models. Please see the list of arguments for other + optimization parameters. + + Args + ---- + config_path: A string representing the path of the object detection + pipeline config file. + checkpoint_path: A string representing the path of the object + detection model checkpoint. + use_trt: A boolean representing whether to optimize with TensorRT. If + False, regular TensorFlow will be used but other optimizations + (like NMS device placement) will still be applied. + force_nms_cpu: A boolean indicating whether to place NMS operations on + the CPU. + replace_relu6: A boolean indicating whether to replace relu6(x) + operations with relu(x) - relu(x-6). + remove_assert: A boolean indicating whether to remove Assert + operations from the graph. + override_nms_score_threshold: An optional float representing + a NMS score threshold to override that specified in the object + detection configuration file. + override_resizer_shape: An optional list/tuple of integers + representing a fixed shape to override the default image resizer + specified in the object detection configuration file. + max_batch_size: An integer representing the max batch size to use for + TensorRT optimization. + precision_mode: A string representing the precision mode to use for + TensorRT optimization. Must be one of 'FP32', 'FP16', or 'INT8'. + minimum_segment_size: An integer representing the minimum segment size + to use for TensorRT graph segmentation. + max_workspace_size_bytes: An integer representing the max workspace + size for TensorRT optimization. + calib_images_dir: A string representing a directory containing images to + use for int8 calibration. + num_calib_images: An integer representing the number of calibration + images to use. If None, will use all images in directory. + calib_image_shape: A tuple of integers representing the height, + width that images will be resized to for calibration. + tmp_dir: A string representing a directory for temporary files. This + directory will be created and removed by this function and should + not already exist. If the directory exists, an error will be + thrown. + remove_tmp_dir: A boolean indicating whether we should remove the + tmp_dir or throw error. + output_path: An optional string representing the path to save the + optimized GraphDef to. + + Returns + ------- + A GraphDef representing the optimized model. + """ + if os.path.exists(tmp_dir): + if not remove_tmp_dir: + raise RuntimeError( + 'Cannot create temporary directory, path exists: %s' % tmp_dir) + subprocess.call(['rm', '-rf', tmp_dir]) + + # load config from file + config = pipeline_pb2.TrainEvalPipelineConfig() + with open(config_path, 'r') as f: + text_format.Merge(f.read(), config, allow_unknown_extension=True) + + # override some config parameters + if config.model.HasField('ssd'): + config.model.ssd.feature_extractor.override_base_feature_extractor_hyperparams = True + if override_nms_score_threshold is not None: + config.model.ssd.post_processing.batch_non_max_suppression.score_threshold = override_nms_score_threshold + if override_resizer_shape is not None: + config.model.ssd.image_resizer.fixed_shape_resizer.height = override_resizer_shape[ + 0] + config.model.ssd.image_resizer.fixed_shape_resizer.width = override_resizer_shape[ + 1] + elif config.model.HasField('faster_rcnn'): + if override_nms_score_threshold is not None: + config.model.faster_rcnn.second_stage_post_processing.score_threshold = override_nms_score_threshold + if override_resizer_shape is not None: + config.model.faster_rcnn.image_resizer.fixed_shape_resizer.height = override_resizer_shape[ + 0] + config.model.faster_rcnn.image_resizer.fixed_shape_resizer.width = override_resizer_shape[ + 1] + + tf_config = tf.ConfigProto() + tf_config.gpu_options.allow_growth = True + + # export inference graph to file (initial), this will create tmp_dir + with tf.Session(config=tf_config): + with tf.Graph().as_default(): + exporter.export_inference_graph( + INPUT_NAME, + config, + checkpoint_path, + tmp_dir, + input_shape=[max_batch_size, None, None, 3]) + + # read frozen graph from file + frozen_graph_path = os.path.join(tmp_dir, FROZEN_GRAPH_NAME) + frozen_graph = tf.GraphDef() + with open(frozen_graph_path, 'rb') as f: + frozen_graph.ParseFromString(f.read()) + + # apply graph modifications + if force_nms_cpu: + frozen_graph = f_force_nms_cpu(frozen_graph) + if replace_relu6: + frozen_graph = f_replace_relu6(frozen_graph) + if remove_assert: + frozen_graph = f_remove_assert(frozen_graph) + + # get input names + output_names = [BOXES_NAME, CLASSES_NAME, SCORES_NAME, NUM_DETECTIONS_NAME] + + # optionally perform TensorRT optimization + if use_trt: + with tf.Graph().as_default() as tf_graph: + with tf.Session(config=tf_config) as tf_sess: + frozen_graph = trt.create_inference_graph( + input_graph_def=frozen_graph, + outputs=output_names, + max_batch_size=max_batch_size, + max_workspace_size_bytes=max_workspace_size_bytes, + precision_mode=precision_mode, + minimum_segment_size=minimum_segment_size) + + # perform calibration for int8 precision + if precision_mode == 'INT8': + + if calib_images_dir is None: + raise ValueError('calib_images_dir must be provided for int8 optimization.') + + tf.import_graph_def(frozen_graph, name='') + tf_input = tf_graph.get_tensor_by_name(INPUT_NAME + ':0') + tf_boxes = tf_graph.get_tensor_by_name(BOXES_NAME + ':0') + tf_classes = tf_graph.get_tensor_by_name(CLASSES_NAME + ':0') + tf_scores = tf_graph.get_tensor_by_name(SCORES_NAME + ':0') + tf_num_detections = tf_graph.get_tensor_by_name( + NUM_DETECTIONS_NAME + ':0') + + image_paths = glob.glob(os.path.join(calib_images_dir, '*.jpg')) + image_paths = image_paths[0:num_calib_images] + + for image_idx in tqdm.tqdm(range(0, len(image_paths), max_batch_size)): + + # read batch of images + batch_images = [] + for image_path in image_paths[image_idx:image_idx+max_batch_size]: + image = _read_image(image_path, calib_image_shape) + batch_images.append(image) + + # execute batch of images + boxes, classes, scores, num_detections = tf_sess.run( + [tf_boxes, tf_classes, tf_scores, tf_num_detections], + feed_dict={tf_input: batch_images}) + + pdb.set_trace() + frozen_graph = trt.calib_graph_to_infer_graph(frozen_graph) + + # re-enable variable batch size, this was forced to max + # batch size during export to enable TensorRT optimization + for node in frozen_graph.node: + if INPUT_NAME == node.name: + node.attr['shape'].shape.dim[0].size = -1 + + # write optimized model to disk + if output_path is not None: + with open(output_path, 'wb') as f: + f.write(frozen_graph.SerializeToString()) + + # remove temporary directory + subprocess.call(['rm', '-rf', tmp_dir]) + + return frozen_graph + + +def download_dataset(dataset_name, output_dir='.'): + """Downloads a COCO dataset + + Downloads a COCO dataset to the specified output directory. A new + directory corresponding to the specified dataset will be created under + output_dir. This directory will contain the images of the dataset. + + Args + ---- + dataset_name: A string representing the name of the dataset, it must + be one of the keys in trt_samples.object_detection.DATASETS. + + Returns + ------- + images_dir: A string representing the path of the directory containing + images of the dataset. + annotation_path: A string representing the path of the COCO annotation + file for the dataset. + """ + global DATASETS + + dataset = DATASETS[dataset_name] + + subprocess.call(['mkdir', '-p', output_dir]) + + images_dir = os.path.join(output_dir, dataset.images_dir) + images_zip_file = os.path.join(output_dir, + os.path.basename(dataset.images_url)) + annotation_path = os.path.join(output_dir, dataset.annotation_path) + annotation_zip_file = os.path.join( + output_dir, os.path.basename(dataset.annotation_url)) + + # download or use cached annotation + if os.path.exists(annotation_path): + print('Using cached annotation_path; %s' % (annotation_path)) + else: + subprocess.call( + ['wget', dataset.annotation_url, '-O', annotation_zip_file]) + subprocess.call(['unzip', annotation_zip_file, '-d', output_dir]) + + # download or use cached images + if os.path.exists(images_dir): + print('Using cached images_dir; %s' % (images_dir)) + else: + subprocess.call(['wget', dataset.images_url, '-O', images_zip_file]) + subprocess.call(['unzip', images_zip_file, '-d', output_dir]) + + return images_dir, annotation_path + + +def benchmark_model(frozen_graph, + images_dir, + annotation_path, + batch_size=1, + image_shape=None, + num_images=4096, + tmp_dir='.benchmark_model_tmp_dir', + remove_tmp_dir=True, + output_path=None): + """Computes accuracy and performance statistics + + Computes accuracy and performance statistics by executing over many images + from the MSCOCO dataset defined by images_dir and annotation_path. + + Args + ---- + frozen_graph: A GraphDef representing the object detection model to + test. Alternatively, a string representing the path to the saved + frozen graph. + images_dir: A string representing the path of the COCO images + directory. + annotation_path: A string representing the path of the COCO annotation + file. + batch_size: An integer representing the batch size to use when feeding + images to the model. + image_shape: An optional tuple of integers representing a fixed shape + to resize all images before testing. + num_images: An integer representing the number of images in the + dataset to evaluate with. + tmp_dir: A string representing the path where the function may create + a temporary directory to store intermediate files. + output_path: An optional string representing a path to store the + statistics in JSON format. + + Returns + ------- + statistics: A named dictionary of accuracy and performance statistics + computed for the model. + """ + if os.path.exists(tmp_dir): + if not remove_tmp_dir: + raise RuntimeError('Temporary directory exists; %s' % tmp_dir) + subprocess.call(['rm', '-rf', tmp_dir]) + if batch_size > 1 and image_shape is None: + raise RuntimeError( + 'Fixed image shape must be provided for batch size > 1') + + from pycocotools.coco import COCO + from pycocotools.cocoeval import COCOeval + + coco = COCO(annotation_file=annotation_path) + + # get list of image ids to use for evaluation + image_ids = coco.getImgIds() + if num_images > len(image_ids): + print( + 'Num images provided %d exceeds number in dataset %d, using %d images instead' + % (num_images, len(image_ids), len(image_ids))) + num_images = len(image_ids) + image_ids = image_ids[0:num_images] + + # load frozen graph from file if string, otherwise must be GraphDef + if isinstance(frozen_graph, str): + frozen_graph_path = frozen_graph + frozen_graph = tf.GraphDef() + with open(frozen_graph_path, 'rb') as f: + frozen_graph.ParseFromString(f.read()) + elif not isinstance(frozen_graph, tf.GraphDef): + raise TypeError('Expected frozen_graph to be GraphDef or str') + + tf_config = tf.ConfigProto() + tf_config.gpu_options.allow_growth = True + + coco_detections = [] # list of all bounding box detections in coco format + runtimes = [] # list of runtimes for each batch + image_counts = [] # list of number of images in each batch + + with tf.Graph().as_default() as tf_graph: + with tf.Session(config=tf_config) as tf_sess: + tf.import_graph_def(frozen_graph, name='') + tf_input = tf_graph.get_tensor_by_name(INPUT_NAME + ':0') + tf_boxes = tf_graph.get_tensor_by_name(BOXES_NAME + ':0') + tf_classes = tf_graph.get_tensor_by_name(CLASSES_NAME + ':0') + tf_scores = tf_graph.get_tensor_by_name(SCORES_NAME + ':0') + tf_num_detections = tf_graph.get_tensor_by_name( + NUM_DETECTIONS_NAME + ':0') + + # load batches from coco dataset + for image_idx in tqdm.tqdm(range(0, len(image_ids), batch_size)): + batch_image_ids = image_ids[image_idx:image_idx + batch_size] + batch_images = [] + batch_coco_images = [] + + # read images from file + for image_id in batch_image_ids: + coco_img = coco.imgs[image_id] + batch_coco_images.append(coco_img) + image_path = os.path.join(images_dir, + coco_img['file_name']) + image = _read_image(image_path, image_shape) + batch_images.append(image) + + # run once outside of timing to initialize + if image_idx == 0: + boxes, classes, scores, num_detections = tf_sess.run( + [tf_boxes, tf_classes, tf_scores, tf_num_detections], + feed_dict={tf_input: batch_images}) + + # execute model and compute time difference + t0 = time.time() + boxes, classes, scores, num_detections = tf_sess.run( + [tf_boxes, tf_classes, tf_scores, tf_num_detections], + feed_dict={tf_input: batch_images}) + t1 = time.time() + + # log runtime and image count + runtimes.append(float(t1 - t0)) + image_counts.append(len(batch_images)) + + # add coco detections for this batch to running list + batch_coco_detections = [] + for i, image_id in enumerate(batch_image_ids): + image_width = batch_coco_images[i]['width'] + image_height = batch_coco_images[i]['height'] + + for j in range(int(num_detections[i])): + bbox = boxes[i][j] + bbox_coco_fmt = [ + bbox[1] * image_width, # x0 + bbox[0] * image_height, # x1 + (bbox[3] - bbox[1]) * image_width, # width + (bbox[2] - bbox[0]) * image_height, # height + ] + + coco_detection = { + 'image_id': image_id, + 'category_id': int(classes[i][j]), + 'bbox': bbox_coco_fmt, + 'score': float(scores[i][j]) + } + + coco_detections.append(coco_detection) + + # write coco detections to file + subprocess.call(['mkdir', '-p', tmp_dir]) + coco_detections_path = os.path.join(tmp_dir, 'coco_detections.json') + with open(coco_detections_path, 'w') as f: + json.dump(coco_detections, f) + + # compute coco metrics + cocoDt = coco.loadRes(coco_detections_path) + eval = COCOeval(coco, cocoDt, 'bbox') + eval.params.imgIds = image_ids + + eval.evaluate() + eval.accumulate() + eval.summarize() + + statistics = { + 'map': eval.stats[0], + 'avg_latency_ms': 1000.0 * np.mean(runtimes), + 'avg_throughput_fps': np.sum(image_counts) / np.sum(runtimes) + } + + if output_path is not None: + subprocess.call(['mkdir', '-p', os.path.dirname(output_path)]) + with open(output_path, 'w') as f: + json.dump(statistics, f) + + subprocess.call(['rm', '-rf', tmp_dir]) + + return statistics + + +def _read_image(image_path, image_shape): + image = Image.open(image_path).convert('RGB') + if image_shape is not None: + image = image.resize(image_shape[::-1]) + return np.array(image) diff --git a/tftrt/examples/object_detection/test.py b/tftrt/examples/object_detection/test.py new file mode 100644 index 000000000..b7c2248c6 --- /dev/null +++ b/tftrt/examples/object_detection/test.py @@ -0,0 +1,101 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +import argparse +import json +from .object_detection import download_model, download_dataset, optimize_model, benchmark_model + + +def test(test_config_path): + """Runs an object detection test configuration + + This runs an object detection test configuration. This involves + + 1. Download a model architecture (or use cached). + 2. Optimize the downloaded model architecrue + 3. Benchmark the optimized model against a dataset + 4. (optional) Run assertions to check the benchmark output + + The input to this function is a JSON file which specifies the test + configuration. + + example_test_config.json: + + { + "source_model": { ... }, + "optimization_config": { ... }, + "benchmark_config": { ... }, + "assertions": [ ... ] + } + + source_model: A dictionary of arguments passed to download_model, which + specify the pre-optimized model architure. The model downloaded (or + the cached model if found) will be passed to optimize_model. + optimization_config: A dictionary of arguments passed to optimize_model. + Please see help(optimize_model) for more details. + benchmark_config: A dictionary of arguments passed to benchmark_model. + Please see help(benchmark_model) for more details. + assertions: A list of strings containing python code that will be + evaluated. If the code returns false, an error will be thrown. These + assertions can reference any variables local to this 'test' function. + Some useful values are + + statistics['map'] + statistics['avg_latency'] + statistics['avg_throughput'] + + Args + ---- + test_config_path: A string corresponding to the test configuration + JSON file. + """ + with open(args.test_config_path, 'r') as f: + test_config = json.load(f) + print(json.dumps(test_config, sort_keys=True, indent=4)) + + # download model or use cached + config_path, checkpoint_path = download_model(**test_config['source_model']) + + # optimize model using source model + frozen_graph = optimize_model( + config_path=config_path, + checkpoint_path=checkpoint_path, + **test_config['optimization_config']) + + # benchmark optimized model + statistics = benchmark_model( + frozen_graph=frozen_graph, + **test_config['benchmark_config']) + print(json.dumps(statistics, sort_keys=True, indent=4)) + + # run assertions + if 'assertions' in test_config: + for a in test_config['assertions']: + if not eval(a): + raise AssertionError('ASSERTION FAILED: %s' % a) + else: + print('ASSERTION PASSED: %s' % a) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + 'test_config_path', + help='Path of JSON file containing test configuration. Please' + 'see help(tftrt.examples.object_detection.test) for more information') + args=parser.parse_args() + test(args.test_config_path) diff --git a/tftrt/examples/object_detection/third_party/cocoapi b/tftrt/examples/object_detection/third_party/cocoapi new file mode 160000 index 000000000..ed842bffd --- /dev/null +++ b/tftrt/examples/object_detection/third_party/cocoapi @@ -0,0 +1 @@ +Subproject commit ed842bffd41f6ff38707c4f0968d2cfd91088688 diff --git a/tftrt/examples/object_detection/third_party/models b/tftrt/examples/object_detection/third_party/models new file mode 160000 index 000000000..402b561b0 --- /dev/null +++ b/tftrt/examples/object_detection/third_party/models @@ -0,0 +1 @@ +Subproject commit 402b561b03857151f684ee00b3d997e5e6be9778