diff --git a/example/rcnn/README.md b/example/rcnn/README.md index 43cd054cb876..282a1aebe9a9 100644 --- a/example/rcnn/README.md +++ b/example/rcnn/README.md @@ -1,5 +1,7 @@ # Faster R-CNN in MXNet with distributed implementation and data parallelization +![example detections](https://cloud.githubusercontent.com/assets/13162287/22101032/92085dc0-de6c-11e6-9228-67e72606ddbc.png) + ## Why? There exist good implementations of Faster R-CNN yet they lack support for recent ConvNet architectures. The aim of reproducing it from scratch is to fully utilize @@ -43,9 +45,8 @@ MXNet engines and parallelization for object detection. | Faster R-CNN end-to-end | VGG16 | COCO train | COCO val | 21.2 | 22.8 | | Faster R-CNN end-to-end | ResNet-101 | COCO train | COCO val | 27.2 | 26.1 | -All reference results are from original publications. -All VOC experiments are conducted in MXNet-v0.9.1-nnvm. MXNet-v0.8 have similar results. -All COCO experiments are conducted in MXNet-v0.8. +The above experiments were conducted at [mx-rcnn](https://github.com/precedenceguo/mx-rcnn/tree/6a1ab0eec5035a10a1efb5fc8c9d6c54e101b4d0) +using [a MXNet fork, based on MXNet 0.9.1 nnvm pre-release](https://github.com/precedenceguo/mxnet/tree/simple). ## I'm Feeling Lucky * Prepare: `bash script/additional_deps.sh` @@ -56,9 +57,8 @@ All COCO experiments are conducted in MXNet-v0.8. ## Getting started See if `bash script/additional_deps.sh` will do the following for you. * Suppose `HOME` represents where this file is located. All commands, unless stated otherwise, should be started from `HOME`. - Executing scripts in `script` must also be from `HOME`. * Install python package `cython easydict matplotlib scikit-image`. -* Install MXNet Python Interface. Open `python` type `import mxnet` to confirm. +* Install MXNet version v0.9.5 or higher and MXNet Python Interface. Open `python` type `import mxnet` to confirm. * Run `make` in `HOME`. Command line arguments have the same meaning as in mxnet/example/image-classification. @@ -82,7 +82,7 @@ Refer to `script/vgg_voc07.sh` and other experiments for examples. ### Prepare Training Data See `bash script/get_voc.sh` and `bash script/get_coco.sh` will do the following for you. -* Make a folder `data` in `HOME`. `data` folder will be used to place the training data folder `VOCdevkit` and `coco`. +* Make a folder `data` in `HOME`. `data` folder will be used to place the training data folder `VOCdevkit` and `coco`. * Download and extract [Pascal VOC data](http://host.robots.ox.ac.uk/pascal/VOC/), place the `VOCdevkit` folder in `HOME/data`. * Download and extract [coco dataset](http://mscoco.org/dataset/), place all images to `coco/images` and annotation jsons to `data/annotations`. @@ -94,6 +94,7 @@ See `bash script/get_voc.sh` and `bash script/get_coco.sh` will do the following ### Prepare Pretrained Models See if `bash script/get_pretrained_model.sh` will do this for you. If not, * Make a folder `model` in `HOME`. `model` folder will be used to place model checkpoints along the training process. + It is recommended to set `model` as a symbolic link to somewhere else in hard disk. * Download VGG16 pretrained model `vgg16-0000.params` from [MXNet model gallery](https://github.com/dmlc/mxnet-model-gallery/blob/master/imagenet-1k-vgg.md) to `model` folder. * Download ResNet pretrained model `resnet-101-0000.params` from [ResNet](https://github.com/tornadomeet/ResNet) to `model` folder. @@ -174,7 +175,7 @@ History of this implementation is: * Faster R-CNN with end-to-end training and module testing (v4) * Faster R-CNN with accelerated training and resnet (v5) -mxnet/example/rcnn was v1, v2 and v3.5. +mxnet/example/rcnn was v1, v2, v3.5 and now v5. ## References 1. Tianqi Chen, Mu Li, Yutian Li, Min Lin, Naiyan Wang, Minjie Wang, Tianjun Xiao, Bing Xu, Chiyuan Zhang, and Zheng Zhang. MXNet: A Flexible and Efficient Machine Learning Library for Heterogeneous Distributed Systems. In Neural Information Processing Systems, Workshop on Machine Learning Systems, 2015 @@ -186,3 +187,4 @@ mxnet/example/rcnn was v1, v2 and v3.5. 7. Karen Simonyan, and Andrew Zisserman. "Very deep convolutional networks for large-scale image recognition." arXiv preprint arXiv:1409.1556 (2014). 8. Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. "Deep Residual Learning for Image Recognition". In Computer Vision and Pattern Recognition, IEEE Conference on, 2016. 9. Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Dollár, and C. Lawrence Zitnick. "Microsoft COCO: Common Objects in Context" In European Conference on Computer Vision, pp. 740-755. Springer International Publishing, 2014. + diff --git a/example/rcnn/demo.py b/example/rcnn/demo.py index 9c01b48fd1bd..34ea327cffac 100644 --- a/example/rcnn/demo.py +++ b/example/rcnn/demo.py @@ -1,9 +1,9 @@ -from __future__ import print_function import argparse import os import cv2 import mxnet as mx import numpy as np +from rcnn.logger import logger from rcnn.config import config from rcnn.symbol import get_vgg_test, get_vgg_rpn_test from rcnn.io.image import resize, transform @@ -104,17 +104,18 @@ def demo_net(predictor, image_name, vis=False): boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))] # print results - print('class ---- [[x1, x2, y1, y2, confidence]]') + logger.info('---class---') + logger.info('[[x1, x2, y1, y2, confidence]]') for ind, boxes in enumerate(boxes_this_image): if len(boxes) > 0: - print('---------', CLASSES[ind], '---------') - print(boxes) + logger.info('---%s---' % CLASSES[ind]) + logger.info('%s' % boxes) if vis: vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, CLASSES, im_scale) else: result_file = image_name.replace('.', '_result.') - print('results saved to %s' % result_file) + logger.info('results saved to %s' % result_file) im = draw_all_detection(data_dict['data'].asnumpy(), boxes_this_image, CLASSES, im_scale) cv2.imwrite(result_file, im) diff --git a/example/rcnn/rcnn/core/tester.py b/example/rcnn/rcnn/core/tester.py index a99614b370b5..0ccc47df71eb 100644 --- a/example/rcnn/rcnn/core/tester.py +++ b/example/rcnn/rcnn/core/tester.py @@ -1,4 +1,3 @@ -from __future__ import print_function import cPickle import os import time @@ -6,6 +5,7 @@ import numpy as np from module import MutableModule +from rcnn.logger import logger from rcnn.config import config from rcnn.io import image from rcnn.processing.bbox_transform import bbox_pred, clip_boxes @@ -79,9 +79,9 @@ def generate_proposals(predictor, test_data, imdb, vis=False, thresh=0.): if vis: vis_all_detection(data_dict['data'].asnumpy(), [dets], ['obj'], scale) - print('generating %d/%d' % (i + 1, imdb.num_images), - 'proposal %d' % (dets.shape[0]), - 'data %.4fs net %.4fs' % (t1, t2)) + logger.info('generating %d/%d ' % (i + 1, imdb.num_images) + + 'proposal %d ' % (dets.shape[0]) + + 'data %.4fs net %.4fs' % (t1, t2)) i += 1 assert len(imdb_boxes) == imdb.num_images, 'calculations not complete' @@ -100,7 +100,7 @@ def generate_proposals(predictor, test_data, imdb, vis=False, thresh=0.): with open(full_rpn_file, 'wb') as f: cPickle.dump(original_boxes, f, cPickle.HIGHEST_PROTOCOL) - print('wrote rpn proposals to {}'.format(rpn_file)) + logger.info('wrote rpn proposals to %s' % rpn_file) return imdb_boxes @@ -189,7 +189,7 @@ def pred_eval(predictor, test_data, imdb, vis=False, thresh=1e-3): t3 = time.time() - t t = time.time() - print('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(i, imdb.num_images, t1, t2, t3)) + logger.info('testing %d/%d data %.4fs net %.4fs post %.4fs' % (i, imdb.num_images, t1, t2, t3)) i += 1 det_file = os.path.join(imdb.cache_path, imdb.name + '_detections.pkl') diff --git a/example/rcnn/rcnn/cython/setup.py b/example/rcnn/rcnn/cython/setup.py index 330373dddb72..786460798fd2 100644 --- a/example/rcnn/rcnn/cython/setup.py +++ b/example/rcnn/rcnn/cython/setup.py @@ -55,7 +55,13 @@ def locate_cuda(): raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) return cudaconfig -CUDA = locate_cuda() + + +# Test if cuda could be foun +try: + CUDA = locate_cuda() +except EnvironmentError: + CUDA = None # Obtain the numpy include directory. This logic works across numpy versions. @@ -123,25 +129,32 @@ def build_extensions(self): extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, include_dirs = [numpy_include] ), - Extension('gpu_nms', - ['nms_kernel.cu', 'gpu_nms.pyx'], - library_dirs=[CUDA['lib64']], - libraries=['cudart'], - language='c++', - runtime_library_dirs=[CUDA['lib64']], - # this syntax is specific to this build system - # we're only going to use certain compiler args with nvcc and not with - # gcc the implementation of this trick is in customize_compiler() below - extra_compile_args={'gcc': ["-Wno-unused-function"], - 'nvcc': ['-arch=sm_35', - '--ptxas-options=-v', - '-c', - '--compiler-options', - "'-fPIC'"]}, - include_dirs = [numpy_include, CUDA['include']] - ), ] +if CUDA is not None: + ext_modules.append( + Extension('gpu_nms', + ['nms_kernel.cu', 'gpu_nms.pyx'], + library_dirs=[CUDA['lib64']], + libraries=['cudart'], + language='c++', + runtime_library_dirs=[CUDA['lib64']], + # this syntax is specific to this build system + # we're only going to use certain compiler args with nvcc and not with + # gcc the implementation of this trick is in customize_compiler() below + extra_compile_args={'gcc': ["-Wno-unused-function"], + 'nvcc': ['-arch=sm_35', + '--ptxas-options=-v', + '-c', + '--compiler-options', + "'-fPIC'"]}, + include_dirs = [numpy_include, CUDA['include']] + ) + ) +else: + print('Skipping GPU_NMS') + + setup( name='frcnn_cython', ext_modules=ext_modules, diff --git a/example/rcnn/rcnn/dataset/coco.py b/example/rcnn/rcnn/dataset/coco.py index 8026071a90c3..00c4c41cf3ce 100644 --- a/example/rcnn/rcnn/dataset/coco.py +++ b/example/rcnn/rcnn/dataset/coco.py @@ -1,10 +1,10 @@ -from __future__ import print_function import cPickle import cv2 import os import json import numpy as np +from ..logger import logger from imdb import IMDB # coco api @@ -38,7 +38,7 @@ def __init__(self, image_set, root_path, data_path): # load image file names self.image_set_index = self._load_image_set_index() self.num_images = len(self.image_set_index) - print('num_images', self.num_images) + logger.info('%s num_images %d' % (self.name, self.num_images)) # deal with data name view_map = {'minival2014': 'val2014', @@ -68,13 +68,13 @@ def gt_roidb(self): if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) - print('{} gt roidb loaded from {}'.format(self.name, cache_file)) + logger.info('%s gt roidb loaded from %s' % (self.name, cache_file)) return roidb gt_roidb = [self._load_coco_annotation(index) for index in self.image_set_index] with open(cache_file, 'wb') as fid: cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) - print('wrote gt roidb to {}'.format(cache_file)) + logger.info('%s wrote gt roidb to %s' % (self.name, cache_file)) return gt_roidb @@ -155,10 +155,10 @@ def _write_coco_results(self, detections, res_file): for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue - print('Collecting %s results (%d/%d)' % (cls, cls_ind, self.num_classes - 1)) + logger.info('collecting %s results (%d/%d)' % (cls, cls_ind, self.num_classes - 1)) coco_cat_id = self._class_to_coco_ind[cls] results.extend(self._coco_results_one_category(detections[cls_ind], coco_cat_id)) - print('Writing results json to %s' % res_file) + logger.info('writing results json to %s' % res_file) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) @@ -192,7 +192,7 @@ def _do_python_eval(self, res_file, res_folder): eval_file = os.path.join(res_folder, 'detections_%s_results.pkl' % self.image_set) with open(eval_file, 'wb') as f: cPickle.dump(coco_eval, f, cPickle.HIGHEST_PROTOCOL) - print('coco eval results saved to %s' % eval_file) + logger.info('eval results saved to %s' % eval_file) def _print_detection_metrics(self, coco_eval): IoU_lo_thresh = 0.5 @@ -214,15 +214,15 @@ def _get_thr_ind(coco_eval, thr): precision = \ coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2] ap_default = np.mean(precision[precision > -1]) - print('~~~~ Mean and per-category AP @ IoU=%.2f,%.2f] ~~~~' % (IoU_lo_thresh, IoU_hi_thresh)) - print('%-15s %5.1f' % ('all', 100 * ap_default)) + logger.info('~~~~ Mean and per-category AP @ IoU=%.2f,%.2f] ~~~~' % (IoU_lo_thresh, IoU_hi_thresh)) + logger.info('%-15s %5.1f' % ('all', 100 * ap_default)) for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue # minus 1 because of __background__ precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2] ap = np.mean(precision[precision > -1]) - print('%-15s %5.1f' % (cls, 100 * ap)) + logger.info('%-15s %5.1f' % (cls, 100 * ap)) - print('~~~~ Summary metrics ~~~~') + logger.info('~~~~ Summary metrics ~~~~') coco_eval.summarize() diff --git a/example/rcnn/rcnn/dataset/imdb.py b/example/rcnn/rcnn/dataset/imdb.py index 1ad18dbc29bc..acdcd50f8208 100644 --- a/example/rcnn/rcnn/dataset/imdb.py +++ b/example/rcnn/rcnn/dataset/imdb.py @@ -9,7 +9,7 @@ 'boxes', 'gt_classes', 'gt_overlaps', 'max_classes', 'max_overlaps', 'bbox_targets'] """ -from __future__ import print_function +from ..logger import logger import os import cPickle import numpy as np @@ -70,8 +70,8 @@ def load_rpn_data(self, full=False): rpn_file = os.path.join(self.root_path, 'rpn_data', self.name + '_full_rpn.pkl') else: rpn_file = os.path.join(self.root_path, 'rpn_data', self.name + '_rpn.pkl') - print('loading {}'.format(rpn_file)) - assert os.path.exists(rpn_file), 'rpn data not found at {}'.format(rpn_file) + assert os.path.exists(rpn_file), '%s rpn data not found at %s' % (self.name, rpn_file) + logger.info('%s loading rpn data from %s' % (self.name, rpn_file)) with open(rpn_file, 'rb') as f: box_list = cPickle.load(f) return box_list @@ -93,7 +93,7 @@ def rpn_roidb(self, gt_roidb, append_gt=False): :return: roidb of rpn """ if append_gt: - print('appending ground truth annotations') + logger.info('%s appending ground truth annotations' % self.name) rpn_roidb = self.load_rpn_roidb(gt_roidb) roidb = IMDB.merge_roidbs(gt_roidb, rpn_roidb) else: @@ -156,7 +156,7 @@ def append_flipped_images(self, roidb): :param roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] :return: roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] """ - print('append flipped images to roidb') + logger.info('%s append flipped images to roidb' % self.name) assert self.num_images == len(roidb) for i in range(self.num_images): roi_rec = roidb[i] @@ -211,8 +211,8 @@ def evaluate_recall(self, roidb, candidate_boxes=None, thresholds=None): area_counts.append(area_count) total_counts = float(sum(area_counts)) for area_name, area_count in zip(area_names[1:], area_counts): - print('percentage of', area_name, area_count / total_counts) - print('average number of proposal', total_counts / self.num_images) + logger.info('percentage of %s is %f' % (area_name, area_count / total_counts)) + logger.info('average number of proposal is %f' % (total_counts / self.num_images)) for area_name, area_range in zip(area_names, area_ranges): gt_overlaps = np.zeros(0) num_pos = 0 diff --git a/example/rcnn/rcnn/dataset/pascal_voc.py b/example/rcnn/rcnn/dataset/pascal_voc.py index 268399316162..2135971faadf 100644 --- a/example/rcnn/rcnn/dataset/pascal_voc.py +++ b/example/rcnn/rcnn/dataset/pascal_voc.py @@ -6,12 +6,12 @@ criterion. """ -from __future__ import print_function import cPickle import cv2 import os import numpy as np +from ..logger import logger from imdb import IMDB from pascal_voc_eval import voc_eval from ds_utils import unique_boxes, filter_small_boxes @@ -42,7 +42,7 @@ def __init__(self, image_set, root_path, devkit_path): self.num_classes = len(self.classes) self.image_set_index = self.load_image_set_index() self.num_images = len(self.image_set_index) - print('num_images', self.num_images) + logger.info('%s num_images %d' % (self.name, self.num_images)) self.config = {'comp_id': 'comp4', 'use_diff': False, @@ -78,13 +78,13 @@ def gt_roidb(self): if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) - print('{} gt roidb loaded from {}'.format(self.name, cache_file)) + logger.info('%s gt roidb loaded from %s' % (self.name, cache_file)) return roidb gt_roidb = [self.load_pascal_annotation(index) for index in self.image_set_index] with open(cache_file, 'wb') as fid: cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) - print('wrote gt roidb to {}'.format(cache_file)) + logger.info('%s wrote gt roidb to %s' % (self.name, cache_file)) return gt_roidb @@ -168,18 +168,18 @@ def selective_search_roidb(self, gt_roidb, append_gt=False): if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) - print('{} ss roidb loaded from {}'.format(self.name, cache_file)) + logger.info('%s ss roidb loaded from %s' % (self.name, cache_file)) return roidb if append_gt: - print('appending ground truth annotations') + logger.info('%s appending ground truth annotations' % self.name) ss_roidb = self.load_selective_search_roidb(gt_roidb) roidb = IMDB.merge_roidbs(gt_roidb, ss_roidb) else: roidb = self.load_selective_search_roidb(gt_roidb) with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) - print('wrote ss roidb to {}'.format(cache_file)) + logger.info('%s wrote ss roidb to %s' % (self.name, cache_file)) return roidb @@ -224,7 +224,7 @@ def write_pascal_results(self, all_boxes): for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue - print('Writing {} VOC results file'.format(cls)) + logger.info('Writing %s VOC results file' % cls) filename = self.get_result_file_template().format(cls) with open(filename, 'wt') as f: for im_ind, index in enumerate(self.image_set_index): @@ -248,7 +248,7 @@ def do_python_eval(self): aps = [] # The PASCAL VOC metric changed in 2010 use_07_metric = True if int(self.year) < 2010 else False - print('VOC07 metric? ' + ('Y' if use_07_metric else 'No')) + logger.info('VOC07 metric? ' + ('Y' if use_07_metric else 'No')) for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue @@ -256,5 +256,5 @@ def do_python_eval(self): rec, prec, ap = voc_eval(filename, annopath, imageset_file, cls, annocache, ovthresh=0.5, use_07_metric=use_07_metric) aps += [ap] - print('AP for {} = {:.4f}'.format(cls, ap)) - print('Mean AP = {:.4f}'.format(np.mean(aps))) + logger.info('AP for {} = {:.4f}'.format(cls, ap)) + logger.info('Mean AP = {:.4f}'.format(np.mean(aps))) diff --git a/example/rcnn/rcnn/dataset/pascal_voc_eval.py b/example/rcnn/rcnn/dataset/pascal_voc_eval.py index 295b866bb697..54fa12ddccd8 100644 --- a/example/rcnn/rcnn/dataset/pascal_voc_eval.py +++ b/example/rcnn/rcnn/dataset/pascal_voc_eval.py @@ -2,7 +2,7 @@ given a pascal voc imdb, compute mAP """ -from __future__ import print_function +from ..logger import logger import numpy as np import os import cPickle @@ -86,8 +86,8 @@ def voc_eval(detpath, annopath, imageset_file, classname, annocache, ovthresh=0. for ind, image_filename in enumerate(image_filenames): recs[image_filename] = parse_voc_rec(annopath.format(image_filename)) if ind % 100 == 0: - print('reading annotations for {:d}/{:d}'.format(ind + 1, len(image_filenames))) - print('saving annotations cache to {:s}'.format(annocache)) + logger.info('reading annotations for %d/%d' % (ind + 1, len(image_filenames))) + logger.info('saving annotations cache to %s' % annocache) with open(annocache, 'wb') as f: cPickle.dump(recs, f, protocol=cPickle.HIGHEST_PROTOCOL) else: diff --git a/example/rcnn/rcnn/io/rpn.py b/example/rcnn/rcnn/io/rpn.py index c813e4ab06f6..52fe1a50c276 100644 --- a/example/rcnn/rcnn/io/rpn.py +++ b/example/rcnn/rcnn/io/rpn.py @@ -10,10 +10,11 @@ 'bbox_weight': [batch_size, num_anchors, feat_height, feat_width]} """ -from __future__ import print_function +import logging import numpy as np import numpy.random as npr +from ..logger import logger from ..config import config from .image import get_image, tensor_vstack from ..processing.generate_anchor import generate_anchors @@ -94,23 +95,19 @@ def _unmap(data, count, inds, fill=0): ret[inds, :] = data return ret - DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shape[-2:] - if DEBUG: - print('anchors:') - print(base_anchors) - print('anchor shapes:') - print(np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4], - base_anchors[:, 3::4] - base_anchors[:, 1::4]))) - print('im_info', im_info) - print('height', feat_height, 'width', feat_width) - print('gt_boxes shape', gt_boxes.shape) - print('gt_boxes', gt_boxes) + logger.debug('anchors: %s' % base_anchors) + logger.debug('anchor shapes: %s' % np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4], + base_anchors[:, 3::4] - base_anchors[:, 1::4]))) + logger.debug('im_info %s' % im_info) + logger.debug('height %d width %d' % (feat_height, feat_width)) + logger.debug('gt_boxes shape %s' % np.array(gt_boxes.shape)) + logger.debug('gt_boxes %s' % gt_boxes) # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride @@ -132,14 +129,12 @@ def _unmap(data, count, inds, fill=0): (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] - if DEBUG: - print('total_anchors', total_anchors) - print('inds_inside', len(inds_inside)) + logger.debug('total_anchors %d' % total_anchors) + logger.debug('inds_inside %d' % len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] - if DEBUG: - print('anchors shape', anchors.shape) + logger.debug('anchors shape %s' % np.array(anchors.shape)) # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside),), dtype=np.float32) @@ -176,7 +171,7 @@ def _unmap(data, count, inds, fill=0): fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) - if DEBUG: + if logger.level == logging.INFO: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] labels[disable_inds] = -1 @@ -185,7 +180,7 @@ def _unmap(data, count, inds, fill=0): bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) - if DEBUG: + if logger.level == logging.INFO: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] labels[disable_inds] = -1 @@ -196,29 +191,30 @@ def _unmap(data, count, inds, fill=0): bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(config.TRAIN.RPN_BBOX_WEIGHTS) - if DEBUG: + if logger.level == logging.DEBUG: _sums = bbox_targets[labels == 1, :].sum(axis=0) _squared_sums = (bbox_targets[labels == 1, :] ** 2).sum(axis=0) _counts = np.sum(labels == 1) means = _sums / (_counts + 1e-14) stds = np.sqrt(_squared_sums / _counts - means ** 2) - print('means', means) - print('stdevs', stds) + logger.debug('means %s' % means) + logger.debug('stdevs %s' % stds) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) - if DEBUG: - print('rpn: max max_overlaps', np.max(max_overlaps)) - print('rpn: num_positives', np.sum(labels == 1)) - print('rpn: num_negatives', np.sum(labels == 0)) + if logger.level == logging.DEBUG: + if gt_boxes.size > 0: + logger.debug('rpn: max max_overlaps %f' % np.max(max_overlaps)) + logger.debug('rpn: num_positives %f' % np.sum(labels == 1)) + logger.debug('rpn: num_negatives %f' % np.sum(labels == 0)) _fg_sum = np.sum(labels == 1) _bg_sum = np.sum(labels == 0) _count = 1 - print('rpn: num_positive avg', _fg_sum / _count) - print('rpn: num_negative avg', _bg_sum / _count) + logger.debug('rpn: num_positive avg %f' % (_fg_sum / _count)) + logger.debug('rpn: num_negative avg %f' % (_bg_sum / _count)) labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) diff --git a/example/rcnn/rcnn/logger.py b/example/rcnn/rcnn/logger.py new file mode 100644 index 000000000000..2806e1add180 --- /dev/null +++ b/example/rcnn/rcnn/logger.py @@ -0,0 +1,6 @@ +import logging + +# set up logger +logging.basicConfig() +logger = logging.getLogger() +logger.setLevel(logging.INFO) diff --git a/example/rcnn/rcnn/processing/bbox_regression.py b/example/rcnn/rcnn/processing/bbox_regression.py index 46969aa0ec5e..d5b48a71b754 100644 --- a/example/rcnn/rcnn/processing/bbox_regression.py +++ b/example/rcnn/rcnn/processing/bbox_regression.py @@ -2,9 +2,9 @@ This file has functions about generating bounding box regression targets """ -from __future__ import print_function import numpy as np +from ..logger import logger from bbox_transform import bbox_overlaps, bbox_transform from rcnn.config import config @@ -22,12 +22,13 @@ def compute_bbox_regression_targets(rois, overlaps, labels): # Sanity check if len(rois) != len(overlaps): - print('bbox regression: this should not happen') + logger.warning('bbox regression: len(rois) != len(overlaps)') # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] if len(gt_inds) == 0: - print('something wrong : zero ground truth rois') + logger.warning('bbox regression: len(gt_inds) == 0') + # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0] @@ -52,7 +53,7 @@ def add_bbox_regression_targets(roidb): :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb :return: means, std variances of targets """ - print('add bounding box regression targets') + logger.info('bbox regression: add bounding box regression targets') assert len(roidb) > 0 assert 'max_classes' in roidb[0] diff --git a/example/rcnn/rcnn/processing/image_processing.py b/example/rcnn/rcnn/processing/image_processing.py deleted file mode 100644 index dafca3c15850..000000000000 --- a/example/rcnn/rcnn/processing/image_processing.py +++ /dev/null @@ -1,83 +0,0 @@ -import numpy as np -import cv2 - - -def resize(im, target_size, max_size): - """ - only resize input image to target size and return scale - :param im: BGR image input by opencv - :param target_size: one dimensional size (the short side) - :param max_size: one dimensional max size (the long side) - :return: - """ - im_shape = im.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - im_scale = float(target_size) / float(im_size_min) - # prevent bigger axis from being more than max_size: - if np.round(im_scale * im_size_max) > max_size: - im_scale = float(max_size) / float(im_size_max) - im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) - return im, im_scale - - -def transform(im, pixel_means, need_mean=False): - """ - transform into mxnet tensor - subtract pixel size and transform to correct format - :param im: [height, width, channel] in BGR - :param pixel_means: [[[R, G, B pixel means]]] - :return: [batch, channel, height, width] - """ - im = im.copy() - im[:, :, (0, 1, 2)] = im[:, :, (2, 1, 0)] - im = im.astype(float) - if need_mean: - im -= pixel_means - im_tensor = im[np.newaxis, :] - # put channel first - channel_swap = (0, 3, 1, 2) - im_tensor = im_tensor.transpose(channel_swap) - return im_tensor - - -def transform_inverse(im_tensor, pixel_means): - """ - transform from mxnet im_tensor to ordinary RGB image - im_tensor is limited to one image - :param im_tensor: [batch, channel, height, width] - :param pixel_means: [[[R, G, B pixel means]]] - :return: im [height, width, channel(RGB)] - """ - assert im_tensor.shape[0] == 1 - im_tensor = im_tensor.copy() - # put channel back - channel_swap = (0, 2, 3, 1) - im_tensor = im_tensor.transpose(channel_swap) - im = im_tensor[0] - assert im.shape[2] == 3 - im += pixel_means - im = im.astype(np.uint8) - return im - - -def tensor_vstack(tensor_list, pad=0): - """ - vertically stack tensors - :param tensor_list: list of tensor to be stacked vertically - :param pad: label to pad with - :return: tensor with max shape - """ - ndim = len(tensor_list[0].shape) - if ndim == 1: - return np.hstack(tensor_list) - dimensions = [0] - for dim in range(1, ndim): - dimensions.append(max([tensor.shape[dim] for tensor in tensor_list])) - for ind, tensor in enumerate(tensor_list): - pad_shape = [(0, 0)] - for dim in range(1, ndim): - pad_shape.append((0, dimensions[dim] - tensor.shape[dim])) - tensor_list[ind] = np.lib.pad(tensor, pad_shape, 'constant', constant_values=pad) - all_tensor = np.vstack(tensor_list) - return all_tensor diff --git a/example/rcnn/rcnn/processing/nms.py b/example/rcnn/rcnn/processing/nms.py index cab093c51152..230139c413ec 100644 --- a/example/rcnn/rcnn/processing/nms.py +++ b/example/rcnn/rcnn/processing/nms.py @@ -1,6 +1,9 @@ import numpy as np from ..cython.cpu_nms import cpu_nms -from ..cython.gpu_nms import gpu_nms +try: + from ..cython.gpu_nms import gpu_nms +except ImportError: + gpu_nms = None def py_nms_wrapper(thresh): @@ -18,7 +21,10 @@ def _nms(dets): def gpu_nms_wrapper(thresh, device_id): def _nms(dets): return gpu_nms(dets, thresh, device_id) - return _nms + if gpu_nms is not None: + return _nms + else: + return cpu_nms_wrapper(thresh) def nms(dets, thresh): diff --git a/example/rcnn/rcnn/processing/roidb.py b/example/rcnn/rcnn/processing/roidb.py deleted file mode 100644 index 8dddc27f60c9..000000000000 --- a/example/rcnn/rcnn/processing/roidb.py +++ /dev/null @@ -1,91 +0,0 @@ -""" -roidb -basic format [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] -extended ['image', 'max_classes', 'max_overlaps', 'bbox_targets'] -""" - -from __future__ import print_function -import cv2 -import numpy as np - -from bbox_regression import compute_bbox_regression_targets -from rcnn.config import config - - -def prepare_roidb(imdb, roidb): - """ - add image path, max_classes, max_overlaps to roidb - :param imdb: image database, provide path - :param roidb: roidb - :return: None - """ - print('prepare roidb') - for i in range(len(roidb)): # image_index - roidb[i]['image'] = imdb.image_path_from_index(imdb.image_set_index[i]) - if config.TRAIN.ASPECT_GROUPING: - size = cv2.imread(roidb[i]['image']).shape - roidb[i]['height'] = size[0] - roidb[i]['width'] = size[1] - gt_overlaps = roidb[i]['gt_overlaps'].toarray() - max_overlaps = gt_overlaps.max(axis=1) - max_classes = gt_overlaps.argmax(axis=1) - roidb[i]['max_overlaps'] = max_overlaps - roidb[i]['max_classes'] = max_classes - - # background roi => background class - zero_indexes = np.where(max_overlaps == 0)[0] - assert all(max_classes[zero_indexes] == 0) - # foreground roi => foreground class - nonzero_indexes = np.where(max_overlaps > 0)[0] - assert all(max_classes[nonzero_indexes] != 0) - - -def add_bbox_regression_targets(roidb): - """ - given roidb, add ['bbox_targets'] and normalize bounding box regression targets - :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb - :return: means, std variances of targets - """ - print('add bounding box regression targets') - assert len(roidb) > 0 - assert 'max_classes' in roidb[0] - - num_images = len(roidb) - num_classes = roidb[0]['gt_overlaps'].shape[1] - for im_i in range(num_images): - rois = roidb[im_i]['boxes'] - max_overlaps = roidb[im_i]['max_overlaps'] - max_classes = roidb[im_i]['max_classes'] - roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes) - - if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: - # use fixed / precomputed means and stds instead of empirical values - means = np.tile(np.array(config.TRAIN.BBOX_MEANS), (num_classes, 1)) - stds = np.tile(np.array(config.TRAIN.BBOX_STDS), (num_classes, 1)) - else: - # compute mean, std values - class_counts = np.zeros((num_classes, 1)) + config.EPS - sums = np.zeros((num_classes, 4)) - squared_sums = np.zeros((num_classes, 4)) - for im_i in range(num_images): - targets = roidb[im_i]['bbox_targets'] - for cls in range(1, num_classes): - cls_indexes = np.where(targets[:, 0] == cls)[0] - if cls_indexes.size > 0: - class_counts[cls] += cls_indexes.size - sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0) - squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0) - - means = sums / class_counts - # var(x) = E(x^2) - E(x)^2 - stds = np.sqrt(squared_sums / class_counts - means ** 2) - - # normalized targets - for im_i in range(num_images): - targets = roidb[im_i]['bbox_targets'] - for cls in range(1, num_classes): - cls_indexes = np.where(targets[:, 0] == cls)[0] - roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :] - roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :] - - return means.ravel(), stds.ravel() diff --git a/example/rcnn/rcnn/pycocotools/UPSTREAM_REV b/example/rcnn/rcnn/pycocotools/UPSTREAM_REV index 706219b77d90..9613b145b237 100644 --- a/example/rcnn/rcnn/pycocotools/UPSTREAM_REV +++ b/example/rcnn/rcnn/pycocotools/UPSTREAM_REV @@ -1 +1 @@ -https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574 +https://github.com/pdollar/coco/commit/336d2a27c91e3c0663d2dcf0b13574674d30f88e diff --git a/example/rcnn/rcnn/pycocotools/_mask.pyx b/example/rcnn/rcnn/pycocotools/_mask.pyx index 4e9278af2a03..1c3e127a1c05 100644 --- a/example/rcnn/rcnn/pycocotools/_mask.pyx +++ b/example/rcnn/rcnn/pycocotools/_mask.pyx @@ -10,6 +10,9 @@ __author__ = 'tsungyi' +import sys +PYTHON_VERSION = sys.version_info[0] + # import both Python-level and C-level symbols of Numpy # the API uses Numpy to interface C and Python import numpy as np @@ -38,7 +41,7 @@ cdef extern from "maskApi.h": void rlesInit( RLE **R, siz n ) void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) void rleDecode( const RLE *R, byte *mask, siz n ) - void rleMerge( const RLE *R, RLE *M, siz n, bint intersect ) + void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) void rleArea( const RLE *R, siz n, uint *a ) void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) @@ -119,7 +122,12 @@ def _frString(rleObjs): cdef bytes py_string cdef char* c_string for i, obj in enumerate(rleObjs): - py_string = str(obj['counts']) + if PYTHON_VERSION == 2: + py_string = str(obj['counts']).encode('utf8') + elif PYTHON_VERSION == 3: + py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts'] + else: + raise Exception('Python version must be 2 or 3') c_string = py_string rleFrString( &Rs._R[i], c_string, obj['size'][0], obj['size'][1] ) return Rs @@ -138,10 +146,10 @@ def decode(rleObjs): cdef RLEs Rs = _frString(rleObjs) h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n masks = Masks(h, w, n) - rleDecode( Rs._R, masks._mask, n ); + rleDecode(Rs._R, masks._mask, n); return np.array(masks) -def merge(rleObjs, bint intersect=0): +def merge(rleObjs, intersect=0): cdef RLEs Rs = _frString(rleObjs) cdef RLEs R = RLEs(1) rleMerge(Rs._R, R._R, Rs._n, intersect) @@ -255,7 +263,7 @@ def frPoly( poly, siz h, siz w ): Rs = RLEs(n) for i, p in enumerate(poly): np_poly = np.array(p, dtype=np.double, order='F') - rleFrPoly( &Rs._R[i], np_poly.data, len(np_poly)/2, h, w ) + rleFrPoly( &Rs._R[i], np_poly.data, int(len(p)/2), h, w ) objs = _toString(Rs) return objs @@ -277,15 +285,24 @@ def frUncompressedRLE(ucRles, siz h, siz w): objs.append(_toString(Rs)[0]) return objs -def frPyObjects(pyobj, siz h, w): +def frPyObjects(pyobj, h, w): + # encode rle from a list of python objects if type(pyobj) == np.ndarray: - objs = frBbox(pyobj, h, w ) + objs = frBbox(pyobj, h, w) elif type(pyobj) == list and len(pyobj[0]) == 4: - objs = frBbox(pyobj, h, w ) + objs = frBbox(pyobj, h, w) elif type(pyobj) == list and len(pyobj[0]) > 4: - objs = frPoly(pyobj, h, w ) - elif type(pyobj) == list and type(pyobj[0]) == dict: + objs = frPoly(pyobj, h, w) + elif type(pyobj) == list and type(pyobj[0]) == dict \ + and 'counts' in pyobj[0] and 'size' in pyobj[0]: objs = frUncompressedRLE(pyobj, h, w) + # encode rle from single python object + elif type(pyobj) == list and len(pyobj) == 4: + objs = frBbox([pyobj], h, w)[0] + elif type(pyobj) == list and len(pyobj) > 4: + objs = frPoly([pyobj], h, w)[0] + elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj: + objs = frUncompressedRLE([pyobj], h, w)[0] else: raise Exception('input type is not supported.') return objs diff --git a/example/rcnn/rcnn/pycocotools/coco.py b/example/rcnn/rcnn/pycocotools/coco.py index 44158d21d5a4..ca35cc0b053b 100644 --- a/example/rcnn/rcnn/pycocotools/coco.py +++ b/example/rcnn/rcnn/pycocotools/coco.py @@ -1,5 +1,5 @@ __author__ = 'tylin' -__version__ = '1.0.1' +__version__ = '2.0' # Interface for accessing the Microsoft COCO dataset. # Microsoft COCO is a large image dataset designed for object detection, @@ -27,7 +27,7 @@ # loadAnns - Load anns with the specified ids. # loadCats - Load cats with the specified ids. # loadImgs - Load imgs with the specified ids. -# segToMask - Convert polygon segmentation to binary mask. +# annToMask - Convert segmentation in an annotation to binary mask. # showAnns - Display the specified annotations. # loadRes - Load algorithm results and create API for accessing them. # download - Download COCO images from mscoco.org server. @@ -37,27 +37,30 @@ # See also COCO>decodeMask, # COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds, # COCO>getImgIds, COCO>loadAnns, COCO>loadCats, -# COCO>loadImgs, COCO>segToMask, COCO>showAnns +# COCO>loadImgs, COCO>annToMask, COCO>showAnns # Microsoft COCO Toolbox. version 2.0 # Data, paper, and tutorials available at: http://mscoco.org/ # Code written by Piotr Dollar and Tsung-Yi Lin, 2014. # Licensed under the Simplified BSD License [see bsd.txt] -from __future__ import print_function import json -import datetime import time import matplotlib.pyplot as plt from matplotlib.collections import PatchCollection from matplotlib.patches import Polygon import numpy as np -from skimage.draw import polygon -import urllib import copy import itertools -import mask +from . import mask as maskUtils import os +from collections import defaultdict +import sys +PYTHON_VERSION = sys.version_info[0] +if PYTHON_VERSION == 2: + from urllib import urlretrieve +elif PYTHON_VERSION == 3: + from urllib.request import urlretrieve class COCO: def __init__(self, annotation_file=None): @@ -68,47 +71,38 @@ def __init__(self, annotation_file=None): :return: """ # load dataset - self.dataset = {} - self.anns = [] - self.imgToAnns = {} - self.catToImgs = {} - self.imgs = {} - self.cats = {} - if annotation_file is not None: + self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict() + self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list) + if not annotation_file == None: print('loading annotations into memory...') tic = time.time() dataset = json.load(open(annotation_file, 'r')) - print('Done (t=%0.2fs)'%(time.time()- tic)) + assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset)) + print('Done (t={:0.2f}s)'.format(time.time()- tic)) self.dataset = dataset self.createIndex() def createIndex(self): # create index print('creating index...') - anns = {} - imgToAnns = {} - catToImgs = {} - cats = {} - imgs = {} + anns, cats, imgs = {}, {}, {} + imgToAnns,catToImgs = defaultdict(list),defaultdict(list) if 'annotations' in self.dataset: - imgToAnns = {ann['image_id']: [] for ann in self.dataset['annotations']} - anns = {ann['id']: [] for ann in self.dataset['annotations']} for ann in self.dataset['annotations']: - imgToAnns[ann['image_id']] += [ann] + imgToAnns[ann['image_id']].append(ann) anns[ann['id']] = ann if 'images' in self.dataset: - imgs = {im['id']: {} for im in self.dataset['images']} for img in self.dataset['images']: imgs[img['id']] = img if 'categories' in self.dataset: - cats = {cat['id']: [] for cat in self.dataset['categories']} for cat in self.dataset['categories']: cats[cat['id']] = cat - catToImgs = {cat['id']: [] for cat in self.dataset['categories']} + + if 'annotations' in self.dataset and 'categories' in self.dataset: for ann in self.dataset['annotations']: - catToImgs[ann['category_id']] += [ann['image_id']] + catToImgs[ann['category_id']].append(ann['image_id']) print('index created!') @@ -125,7 +119,7 @@ def info(self): :return: """ for key, value in self.dataset['info'].items(): - print('%s: %s'%(key, value)) + print('{}: {}'.format(key, value)) def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None): """ @@ -143,14 +137,13 @@ def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None): anns = self.dataset['annotations'] else: if not len(imgIds) == 0: - # this can be changed by defaultdict lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns] anns = list(itertools.chain.from_iterable(lists)) else: anns = self.dataset['annotations'] anns = anns if len(catIds) == 0 else [ann for ann in anns if ann['category_id'] in catIds] anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]] - if iscrowd is not None: + if not iscrowd == None: ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd] else: ids = [ann['id'] for ann in anns] @@ -240,39 +233,57 @@ def showAnns(self, anns): """ if len(anns) == 0: return 0 - if 'segmentation' in anns[0]: + if 'segmentation' in anns[0] or 'keypoints' in anns[0]: datasetType = 'instances' elif 'caption' in anns[0]: datasetType = 'captions' + else: + raise Exception('datasetType not supported') if datasetType == 'instances': ax = plt.gca() + ax.set_autoscale_on(False) polygons = [] color = [] for ann in anns: - c = np.random.random((1, 3)).tolist()[0] - if type(ann['segmentation']) == list: - # polygon - for seg in ann['segmentation']: - poly = np.array(seg).reshape((len(seg)/2, 2)) - polygons.append(Polygon(poly, True,alpha=0.4)) - color.append(c) - else: - # mask - t = self.imgs[ann['image_id']] - if type(ann['segmentation']['counts']) == list: - rle = mask.frPyObjects([ann['segmentation']], t['height'], t['width']) + c = (np.random.random((1, 3))*0.6+0.4).tolist()[0] + if 'segmentation' in ann: + if type(ann['segmentation']) == list: + # polygon + for seg in ann['segmentation']: + poly = np.array(seg).reshape((int(len(seg)/2), 2)) + polygons.append(Polygon(poly)) + color.append(c) else: - rle = [ann['segmentation']] - m = mask.decode(rle) - img = np.ones( (m.shape[0], m.shape[1], 3) ) - if ann['iscrowd'] == 1: - color_mask = np.array([2.0,166.0,101.0])/255 - if ann['iscrowd'] == 0: - color_mask = np.random.random((1, 3)).tolist()[0] - for i in range(3): - img[:,:,i] = color_mask[i] - ax.imshow(np.dstack( (img, m*0.5) )) - p = PatchCollection(polygons, facecolors=color, edgecolors=(0,0,0,1), linewidths=3, alpha=0.4) + # mask + t = self.imgs[ann['image_id']] + if type(ann['segmentation']['counts']) == list: + rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width']) + else: + rle = [ann['segmentation']] + m = maskUtils.decode(rle) + img = np.ones( (m.shape[0], m.shape[1], 3) ) + if ann['iscrowd'] == 1: + color_mask = np.array([2.0,166.0,101.0])/255 + if ann['iscrowd'] == 0: + color_mask = np.random.random((1, 3)).tolist()[0] + for i in range(3): + img[:,:,i] = color_mask[i] + ax.imshow(np.dstack( (img, m*0.5) )) + if 'keypoints' in ann and type(ann['keypoints']) == list: + # turn skeleton into zero-based index + sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1 + kp = np.array(ann['keypoints']) + x = kp[0::3] + y = kp[1::3] + v = kp[2::3] + for sk in sks: + if np.all(v[sk]>0): + plt.plot(x[sk],y[sk], linewidth=3, color=c) + plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2) + plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2) + p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4) + ax.add_collection(p) + p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2) ax.add_collection(p) elif datasetType == 'captions': for ann in anns: @@ -286,12 +297,15 @@ def loadRes(self, resFile): """ res = COCO() res.dataset['images'] = [img for img in self.dataset['images']] - # res.dataset['info'] = copy.deepcopy(self.dataset['info']) - # res.dataset['licenses'] = copy.deepcopy(self.dataset['licenses']) - print('Loading and preparing results... ') + print('Loading and preparing results...') tic = time.time() - anns = json.load(open(resFile)) + if type(resFile) == str or type(resFile) == unicode: + anns = json.load(open(resFile)) + elif type(resFile) == np.ndarray: + anns = self.loadNumpyAnnotations(resFile) + else: + anns = resFile assert type(anns) == list, 'results in not an array of objects' annsImgIds = [ann['image_id'] for ann in anns] assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ @@ -315,18 +329,28 @@ def loadRes(self, resFile): res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): # now only support compressed RLE format as segmentation results - ann['area'] = mask.area([ann['segmentation']])[0] + ann['area'] = maskUtils.area(ann['segmentation']) if not 'bbox' in ann: - ann['bbox'] = mask.toBbox([ann['segmentation']])[0] + ann['bbox'] = maskUtils.toBbox(ann['segmentation']) ann['id'] = id+1 ann['iscrowd'] = 0 - print('DONE (t=%0.2fs)'%(time.time()- tic)) + elif 'keypoints' in anns[0]: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + s = ann['keypoints'] + x = s[0::3] + y = s[1::3] + x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y) + ann['area'] = (x1-x0)*(y1-y0) + ann['id'] = id + 1 + ann['bbox'] = [x0,y0,x1-x0,y1-y0] + print('DONE (t={:0.2f}s)'.format(time.time()- tic)) res.dataset['annotations'] = anns res.createIndex() return res - def download(self, tarDir=None, imgIds=[]): + def download(self, tarDir = None, imgIds = [] ): ''' Download COCO images from mscoco.org server. :param tarDir (str): COCO results directory name @@ -347,5 +371,58 @@ def download(self, tarDir=None, imgIds=[]): tic = time.time() fname = os.path.join(tarDir, img['file_name']) if not os.path.exists(fname): - urllib.urlretrieve(img['coco_url'], fname) - print('downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)) + urlretrieve(img['coco_url'], fname) + print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic)) + + def loadNumpyAnnotations(self, data): + """ + Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class} + :param data (numpy.ndarray) + :return: annotations (python nested list) + """ + print('Converting ndarray to lists...') + assert(type(data) == np.ndarray) + print(data.shape) + assert(data.shape[1] == 7) + N = data.shape[0] + ann = [] + for i in range(N): + if i % 1000000 == 0: + print('{}/{}'.format(i,N)) + ann += [{ + 'image_id' : int(data[i, 0]), + 'bbox' : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ], + 'score' : data[i, 5], + 'category_id': int(data[i, 6]), + }] + return ann + + def annToRLE(self, ann): + """ + Convert annotation which can be polygons, uncompressed RLE to RLE. + :return: binary mask (numpy 2D array) + """ + t = self.imgs[ann['image_id']] + h, w = t['height'], t['width'] + segm = ann['segmentation'] + if type(segm) == list: + # polygon -- a single object might consist of multiple parts + # we merge all parts into one mask rle code + rles = maskUtils.frPyObjects(segm, h, w) + rle = maskUtils.merge(rles) + elif type(segm['counts']) == list: + # uncompressed RLE + rle = maskUtils.frPyObjects(segm, h, w) + else: + # rle + rle = ann['segmentation'] + return rle + + def annToMask(self, ann): + """ + Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. + :return: binary mask (numpy 2D array) + """ + rle = self.annToRLE(ann) + m = maskUtils.decode(rle) + return m \ No newline at end of file diff --git a/example/rcnn/rcnn/pycocotools/cocoeval.py b/example/rcnn/rcnn/pycocotools/cocoeval.py index 015c9f4ff8cc..a5dd1852912d 100644 --- a/example/rcnn/rcnn/pycocotools/cocoeval.py +++ b/example/rcnn/rcnn/pycocotools/cocoeval.py @@ -1,11 +1,10 @@ __author__ = 'tsungyi' -from __future__ import print_function import numpy as np import datetime import time from collections import defaultdict -import mask +import mask as maskUtils import copy class COCOeval: @@ -27,8 +26,9 @@ class COCOeval: # recThrs - [0:.01:1] R=101 recall thresholds for evaluation # areaRng - [...] A=4 object area ranges for evaluation # maxDets - [1 10 100] M=3 thresholds on max detections per image - # useSegm - [1] if true evaluate against ground-truth segments - # useCats - [1] if true use category labels for evaluation # Note: if useSegm=0 the evaluation is run on bounding boxes. + # iouType - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints' + # iouType replaced the now DEPRECATED useSegm parameter. + # useCats - [1] if true use category labels for evaluation # Note: if useCats=0 category labels are ignored as in proposal scoring. # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified. # @@ -57,13 +57,15 @@ class COCOeval: # Data, paper, and tutorials available at: http://mscoco.org/ # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. # Licensed under the Simplified BSD License [see coco/license.txt] - def __init__(self, cocoGt=None, cocoDt=None): + def __init__(self, cocoGt=None, cocoDt=None, iouType='segm'): ''' Initialize CocoEval using coco APIs for gt and dt :param cocoGt: coco object with ground truth annotations :param cocoDt: coco object with detection results :return: None ''' + if not iouType: + print('iouType not specified. use default iouType segm') self.cocoGt = cocoGt # ground truth COCO API self.cocoDt = cocoDt # detections COCO API self.params = {} # evaluation parameters @@ -71,7 +73,7 @@ def __init__(self, cocoGt=None, cocoDt=None): self.eval = {} # accumulated evaluation results self._gts = defaultdict(list) # gt for evaluation self._dts = defaultdict(list) # dt for evaluation - self.params = Params() # parameters + self.params = Params(iouType=iouType) # parameters self._paramsEval = {} # parameters for evaluation self.stats = [] # result summarization self.ious = {} # ious between all gts and dts @@ -85,28 +87,11 @@ def _prepare(self): Prepare ._gts and ._dts for evaluation based on params :return: None ''' - # - def _toMask(objs, coco): - # modify segmentation by reference - for obj in objs: - t = coco.imgs[obj['image_id']] - if type(obj['segmentation']) == list: - if type(obj['segmentation'][0]) == dict: - print('debug') - obj['segmentation'] = mask.frPyObjects(obj['segmentation'],t['height'],t['width']) - if len(obj['segmentation']) == 1: - obj['segmentation'] = obj['segmentation'][0] - else: - # an object can have multiple polygon regions - # merge them into one RLE mask - obj['segmentation'] = mask.merge(obj['segmentation']) - elif type(obj['segmentation']) == dict and type(obj['segmentation']['counts']) == list: - obj['segmentation'] = mask.frPyObjects([obj['segmentation']],t['height'],t['width'])[0] - elif type(obj['segmentation']) == dict and \ - type(obj['segmentation']['counts'] == unicode or type(obj['segmentation']['counts']) == str): - pass - else: - raise Exception('segmentation format not supported.') + def _toMask(anns, coco): + # modify ann['segmentation'] by reference + for ann in anns: + rle = coco.annToRLE(ann) + ann['segmentation'] = rle p = self.params if p.useCats: gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) @@ -115,9 +100,16 @@ def _toMask(objs, coco): gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds)) dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds)) - if p.useSegm: + # convert ground truth to mask if iouType == 'segm' + if p.iouType == 'segm': _toMask(gts, self.cocoGt) _toMask(dts, self.cocoDt) + # set ignore flag + for gt in gts: + gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0 + gt['ignore'] = 'iscrowd' in gt and gt['iscrowd'] + if p.iouType == 'keypoints': + gt['ignore'] = (gt['num_keypoints'] == 0) or gt['ignore'] self._gts = defaultdict(list) # gt for evaluation self._dts = defaultdict(list) # dt for evaluation for gt in gts: @@ -133,8 +125,13 @@ def evaluate(self): :return: None ''' tic = time.time() - print('Running per image evaluation... ') + print('Running per image evaluation...') p = self.params + # add backward compatibility if useSegm is specified in params + if not p.useSegm is None: + p.iouType = 'segm' if p.useSegm == 1 else 'bbox' + print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) + print('Evaluate annotation type *{}*'.format(p.iouType)) p.imgIds = list(np.unique(p.imgIds)) if p.useCats: p.catIds = list(np.unique(p.catIds)) @@ -145,7 +142,10 @@ def evaluate(self): # loop through images, area range, max detection number catIds = p.catIds if p.useCats else [-1] - computeIoU = self.computeIoU + if p.iouType == 'segm' or p.iouType == 'bbox': + computeIoU = self.computeIoU + elif p.iouType == 'keypoints': + computeIoU = self.computeOks self.ious = {(imgId, catId): computeIoU(imgId, catId) \ for imgId in p.imgIds for catId in catIds} @@ -159,7 +159,7 @@ def evaluate(self): ] self._paramsEval = copy.deepcopy(self.params) toc = time.time() - print('DONE (t=%0.2fs).'%(toc-tic)) + print('DONE (t={:0.2f}s).'.format(toc-tic)) def computeIoU(self, imgId, catId): p = self.params @@ -171,20 +171,66 @@ def computeIoU(self, imgId, catId): dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]] if len(gt) == 0 and len(dt) ==0: return [] - dt = sorted(dt, key=lambda x: -x['score']) + inds = np.argsort([-d['score'] for d in dt], kind='mergesort') + dt = [dt[i] for i in inds] if len(dt) > p.maxDets[-1]: dt=dt[0:p.maxDets[-1]] - if p.useSegm: + if p.iouType == 'segm': g = [g['segmentation'] for g in gt] d = [d['segmentation'] for d in dt] - else: + elif p.iouType == 'bbox': g = [g['bbox'] for g in gt] d = [d['bbox'] for d in dt] + else: + raise Exception('unknown iouType for iou computation') # compute iou between each dt and gt region iscrowd = [int(o['iscrowd']) for o in gt] - ious = mask.iou(d,g,iscrowd) + ious = maskUtils.iou(d,g,iscrowd) + return ious + + def computeOks(self, imgId, catId): + p = self.params + # dimention here should be Nxm + gts = self._gts[imgId, catId] + dts = self._dts[imgId, catId] + inds = np.argsort([-d['score'] for d in dts], kind='mergesort') + dts = [dts[i] for i in inds] + if len(dts) > p.maxDets[-1]: + dts = dts[0:p.maxDets[-1]] + # if len(gts) == 0 and len(dts) == 0: + if len(gts) == 0 or len(dts) == 0: + return [] + ious = np.zeros((len(dts), len(gts))) + sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0 + vars = (sigmas * 2)**2 + k = len(sigmas) + # compute oks between each detection and ground truth object + for j, gt in enumerate(gts): + # create bounds for ignore regions(double the gt bbox) + g = np.array(gt['keypoints']) + xg = g[0::3]; yg = g[1::3]; vg = g[2::3] + k1 = np.count_nonzero(vg > 0) + bb = gt['bbox'] + x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2 + y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2 + for i, dt in enumerate(dts): + d = np.array(dt['keypoints']) + xd = d[0::3]; yd = d[1::3] + if k1>0: + # measure the per-keypoint distance if keypoints visible + dx = xd - xg + dy = yd - yg + else: + # measure minimum distance to keypoints in (x0,y0) & (x1,y1) + z = np.zeros((k)) + dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0) + dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0) + e = (dx**2 + dy**2) / vars / (gt['area']+np.spacing(1)) / 2 + if k1 > 0: + e=e[vg > 0] + ious[i, j] = np.sum(np.exp(-e)) / e.shape[0] return ious def evaluateImg(self, imgId, catId, aRng, maxDet): @@ -192,7 +238,6 @@ def evaluateImg(self, imgId, catId, aRng, maxDet): perform evaluation for single category and image :return: dict (single image results) ''' - # p = self.params if p.useCats: gt = self._gts[imgId,catId] @@ -204,23 +249,19 @@ def evaluateImg(self, imgId, catId, aRng, maxDet): return None for g in gt: - if 'ignore' not in g: - g['ignore'] = 0 - if g['iscrowd'] == 1 or g['ignore'] or (g['area']aRng[1]): + if g['ignore'] or (g['area']aRng[1]): g['_ignore'] = 1 else: g['_ignore'] = 0 # sort dt highest score first, sort gt ignore last - # gt = sorted(gt, key=lambda x: x['_ignore']) - gtind = [ind for (ind, g) in sorted(enumerate(gt), key=lambda (ind, g): g['_ignore']) ] - - gt = [gt[ind] for ind in gtind] - dt = sorted(dt, key=lambda x: -x['score'])[0:maxDet] + gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort') + gt = [gt[i] for i in gtind] + dtind = np.argsort([-d['score'] for d in dt], kind='mergesort') + dt = [dt[i] for i in dtind[0:maxDet]] iscrowd = [int(o['iscrowd']) for o in gt] # load computed ious - N_iou = len(self.ious[imgId, catId]) - ious = self.ious[imgId, catId][0:maxDet, np.array(gtind)] if N_iou >0 else self.ious[imgId, catId] + ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId] T = len(p.iouThrs) G = len(gt) @@ -245,7 +286,7 @@ def evaluateImg(self, imgId, catId, aRng, maxDet): # continue to next gt unless better match made if ious[dind,gind] < iou: continue - # match successful and best so far, store appropriately + # if match successful and best so far, store appropriately iou=ious[dind,gind] m=gind # if match made store id of match for both dt and gt @@ -278,7 +319,7 @@ def accumulate(self, p = None): :param p: input params for evaluation :return: None ''' - print('Accumulating evaluation results... ') + print('Accumulating evaluation results...') tic = time.time() if not self.evalImgs: print('Please run evaluate() first') @@ -306,7 +347,6 @@ def accumulate(self, p = None): m_list = [m for n, m in enumerate(p.maxDets) if m in setM] a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] i_list = [n for n, i in enumerate(p.imgIds) if i in setI] - # K0 = len(_pe.catIds) I0 = len(_pe.imgIds) A0 = len(_pe.areaRng) # retrieve E at each category, area range, and max number of detections @@ -315,8 +355,8 @@ def accumulate(self, p = None): for a, a0 in enumerate(a_list): Na = a0*I0 for m, maxDet in enumerate(m_list): - E = [self.evalImgs[Nk+Na+i] for i in i_list] - E = filter(None, E) + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if not e is None] if len(E) == 0: continue dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) @@ -327,8 +367,8 @@ def accumulate(self, p = None): dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] - gtIg = np.concatenate([e['gtIgnore'] for e in E]) - npig = len([ig for ig in gtIg if ig == 0]) + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg==0 ) if npig == 0: continue tps = np.logical_and( dtm, np.logical_not(dtIg) ) @@ -357,7 +397,7 @@ def accumulate(self, p = None): if pr[i] > pr[i-1]: pr[i-1] = pr[i] - inds = np.searchsorted(rc, p.recThrs) + inds = np.searchsorted(rc, p.recThrs, side='left') try: for ri, pi in enumerate(inds): q[ri] = pr[pi] @@ -367,12 +407,12 @@ def accumulate(self, p = None): self.eval = { 'params': p, 'counts': [T, R, K, A, M], - 'date': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + 'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'precision': precision, 'recall': recall, } toc = time.time() - print('DONE (t=%0.2fs).'%( toc-tic )) + print('DONE (t={:0.2f}s).'.format( toc-tic)) def summarize(self): ''' @@ -381,15 +421,14 @@ def summarize(self): ''' def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ): p = self.params - iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6} | maxDets={:>3} ] = {}' - titleStr = 'Average Precision' if ap == 1 else 'Average Recall' - typeStr = '(AP)' if ap==1 else '(AR)' - iouStr = '%0.2f:%0.2f'%(p.iouThrs[0], p.iouThrs[-1]) if iouThr is None else '%0.2f'%(iouThr) - areaStr = areaRng - maxDetsStr = '%d'%(maxDets) - - aind = [i for i, aRng in enumerate(['all', 'small', 'medium', 'large']) if aRng == areaRng] - mind = [i for i, mDet in enumerate([1, 10, 100]) if mDet == maxDets] + iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}' + titleStr = 'Average Precision' if ap == 1 else 'Average Recall' + typeStr = '(AP)' if ap==1 else '(AR)' + iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \ + if iouThr is None else '{:0.2f}'.format(iouThr) + + aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] + mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] if ap == 1: # dimension of precision: [TxRxKxAxM] s = self.eval['precision'] @@ -397,34 +436,56 @@ def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ): if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] - # areaRng s = s[:,:,:,aind,mind] else: # dimension of recall: [TxKxAxM] s = self.eval['recall'] + if iouThr is not None: + t = np.where(iouThr == p.iouThrs)[0] + s = s[t] s = s[:,:,aind,mind] if len(s[s>-1])==0: mean_s = -1 else: mean_s = np.mean(s[s>-1]) - print(iStr.format(titleStr, typeStr, iouStr, areaStr, maxDetsStr, '%.3f'%(float(mean_s)))) + print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)) return mean_s - + def _summarizeDets(): + stats = np.zeros((12,)) + stats[0] = _summarize(1) + stats[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2]) + stats[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2]) + stats[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2]) + stats[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2]) + stats[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2]) + stats[6] = _summarize(0, maxDets=self.params.maxDets[0]) + stats[7] = _summarize(0, maxDets=self.params.maxDets[1]) + stats[8] = _summarize(0, maxDets=self.params.maxDets[2]) + stats[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2]) + stats[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2]) + stats[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2]) + return stats + def _summarizeKps(): + stats = np.zeros((10,)) + stats[0] = _summarize(1, maxDets=20) + stats[1] = _summarize(1, maxDets=20, iouThr=.5) + stats[2] = _summarize(1, maxDets=20, iouThr=.75) + stats[3] = _summarize(1, maxDets=20, areaRng='medium') + stats[4] = _summarize(1, maxDets=20, areaRng='large') + stats[5] = _summarize(0, maxDets=20) + stats[6] = _summarize(0, maxDets=20, iouThr=.5) + stats[7] = _summarize(0, maxDets=20, iouThr=.75) + stats[8] = _summarize(0, maxDets=20, areaRng='medium') + stats[9] = _summarize(0, maxDets=20, areaRng='large') + return stats if not self.eval: raise Exception('Please run accumulate() first') - self.stats = np.zeros((12,)) - self.stats[0] = _summarize(1) - self.stats[1] = _summarize(1,iouThr=.5) - self.stats[2] = _summarize(1,iouThr=.75) - self.stats[3] = _summarize(1,areaRng='small') - self.stats[4] = _summarize(1,areaRng='medium') - self.stats[5] = _summarize(1,areaRng='large') - self.stats[6] = _summarize(0,maxDets=1) - self.stats[7] = _summarize(0,maxDets=10) - self.stats[8] = _summarize(0,maxDets=100) - self.stats[9] = _summarize(0,areaRng='small') - self.stats[10] = _summarize(0,areaRng='medium') - self.stats[11] = _summarize(0,areaRng='large') + iouType = self.params.iouType + if iouType == 'segm' or iouType == 'bbox': + summarize = _summarizeDets + elif iouType == 'keypoints': + summarize = _summarizeKps + self.stats = summarize() def __str__(self): self.summarize() @@ -433,13 +494,35 @@ class Params: ''' Params for coco evaluation api ''' - def __init__(self): + def setDetParams(self): + self.imgIds = [] + self.catIds = [] + # np.arange causes trouble. the data point on arange is slightly larger than the true value + self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True) + self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True) + self.maxDets = [1, 10, 100] + self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] + self.areaRngLbl = ['all', 'small', 'medium', 'large'] + self.useCats = 1 + + def setKpParams(self): self.imgIds = [] self.catIds = [] # np.arange causes trouble. the data point on arange is slightly larger than the true value - self.iouThrs = np.linspace(.5, 0.95, np.round((0.95-.5)/.05)+1, endpoint=True) - self.recThrs = np.linspace(.0, 1.00, np.round((1.00-.0)/.01)+1, endpoint=True) - self.maxDets = [1,10,100] - self.areaRng = [ [0**2,1e5**2], [0**2, 32**2], [32**2, 96**2], [96**2, 1e5**2] ] - self.useSegm = 0 - self.useCats = 1 \ No newline at end of file + self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True) + self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True) + self.maxDets = [20] + self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] + self.areaRngLbl = ['all', 'medium', 'large'] + self.useCats = 1 + + def __init__(self, iouType='segm'): + if iouType == 'segm' or iouType == 'bbox': + self.setDetParams() + elif iouType == 'keypoints': + self.setKpParams() + else: + raise Exception('iouType not supported') + self.iouType = iouType + # useSegm is deprecated + self.useSegm = None \ No newline at end of file diff --git a/example/rcnn/rcnn/pycocotools/mask.py b/example/rcnn/rcnn/pycocotools/mask.py index c00e09b6e46e..f49b8736b280 100644 --- a/example/rcnn/rcnn/pycocotools/mask.py +++ b/example/rcnn/rcnn/pycocotools/mask.py @@ -1,6 +1,6 @@ __author__ = 'tsungyi' -import _mask as _mask +import _mask # Interface for manipulating masks stored in RLE format. # @@ -73,10 +73,31 @@ # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. # Licensed under the Simplified BSD License [see coco/license.txt] -encode = _mask.encode -decode = _mask.decode iou = _mask.iou merge = _mask.merge -area = _mask.area -toBbox = _mask.toBbox frPyObjects = _mask.frPyObjects + +def encode(bimask): + if len(bimask.shape) == 3: + return _mask.encode(bimask) + elif len(bimask.shape) == 2: + h, w = bimask.shape + return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] + +def decode(rleObjs): + if type(rleObjs) == list: + return _mask.decode(rleObjs) + else: + return _mask.decode([rleObjs])[:,:,0] + +def area(rleObjs): + if type(rleObjs) == list: + return _mask.area(rleObjs) + else: + return _mask.area([rleObjs])[0] + +def toBbox(rleObjs): + if type(rleObjs) == list: + return _mask.toBbox(rleObjs) + else: + return _mask.toBbox([rleObjs])[0] \ No newline at end of file diff --git a/example/rcnn/rcnn/pycocotools/maskApi.c b/example/rcnn/rcnn/pycocotools/maskApi.c index 2b2d89116574..85e397918278 100644 --- a/example/rcnn/rcnn/pycocotools/maskApi.c +++ b/example/rcnn/rcnn/pycocotools/maskApi.c @@ -13,7 +13,7 @@ uint umax( uint a, uint b ) { return (a>b) ? a : b; } void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) { R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m); - if(cnts) for(siz j=0; jcnts[j]=cnts[j]; + siz j; if(cnts) for(j=0; jcnts[j]=cnts[j]; } void rleFree( RLE *R ) { @@ -21,12 +21,12 @@ void rleFree( RLE *R ) { } void rlesInit( RLE **R, siz n ) { - *R = (RLE*) malloc(sizeof(RLE)*n); - for(siz i=0; i0) { crowd=iscrowd!=NULL && iscrowd[g]; if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; } - siz ka, kb, a, b; uint c, ca, cb, ct, i, u; bool va, vb; + siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb; ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0; cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1; while( ct>0 ) { @@ -95,8 +95,19 @@ void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) { } } +void rleNms( RLE *dt, siz n, uint *keep, double thr ) { + siz i, j; double u; + for( i=0; ithr) keep[j]=0; + } + } +} + void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) { - double h, w, i, u, ga, da; siz g, d; bool crowd; + double h, w, i, u, ga, da; siz g, d; int crowd; for( g=0; gthr) keep[j]=0; + } + } +} + void rleToBbox( const RLE *R, BB bb, siz n ) { - for( siz i=0; i=dy && xs>xe) || (dxye); if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; } s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy; - if(dx>=dy) for( int d=0; d<=dx; d++ ) { + if(dx>=dy) for( d=0; d<=dx; d++ ) { t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++; - } else for( int d=0; d<=dy; d++ ) { + } else for( d=0; d<=dy; d++ ) { t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++; } } - // get points along y-boundary and downsample + /* get points along y-boundary and downsample */ free(x); free(y); k=m; m=0; double xd, yd; x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k); for( j=1; jh) yd=h; yd=ceil(yd); x[m]=(int) xd; y[m]=(int) yd; m++; } - // compute rle encoding given y-boundary points + /* compute rle encoding given y-boundary points */ k=m; a=malloc(sizeof(uint)*(k+1)); for( j=0; jm, p=0; long x; bool more; + /* Similar to LEB128 but using 6 bits/char and ascii chars 48-111. */ + siz i, m=R->m, p=0; long x; int more; char *s=malloc(sizeof(char)*m*6); for( i=0; icnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1; @@ -193,7 +215,7 @@ char* rleToString( const RLE *R ) { } void rleFrString( RLE *R, char *s, siz h, siz w ) { - siz m=0, p=0, k; long x; bool more; uint *cnts; + siz m=0, p=0, k; long x; int more; uint *cnts; while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0; while( s[p] ) { x=0; k=0; more=1; diff --git a/example/rcnn/rcnn/pycocotools/maskApi.h b/example/rcnn/rcnn/pycocotools/maskApi.h index ff16116c4781..ebc7892da382 100644 --- a/example/rcnn/rcnn/pycocotools/maskApi.h +++ b/example/rcnn/rcnn/pycocotools/maskApi.h @@ -5,7 +5,6 @@ * Licensed under the Simplified BSD License [see coco/license.txt] **************************************************************************/ #pragma once -#include typedef unsigned int uint; typedef unsigned long siz; @@ -13,43 +12,49 @@ typedef unsigned char byte; typedef double* BB; typedef struct { siz h, w, m; uint *cnts; } RLE; -// Initialize/destroy RLE. +/* Initialize/destroy RLE. */ void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); void rleFree( RLE *R ); -// Initialize/destroy RLE array. +/* Initialize/destroy RLE array. */ void rlesInit( RLE **R, siz n ); void rlesFree( RLE **R, siz n ); -// Encode binary masks using RLE. +/* Encode binary masks using RLE. */ void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); -// Decode binary masks encoded via RLE. +/* Decode binary masks encoded via RLE. */ void rleDecode( const RLE *R, byte *mask, siz n ); -// Compute union or intersection of encoded masks. -void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ); +/* Compute union or intersection of encoded masks. */ +void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); -// Compute area of encoded masks. +/* Compute area of encoded masks. */ void rleArea( const RLE *R, siz n, uint *a ); -// Compute intersection over union between masks. +/* Compute intersection over union between masks. */ void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); -// Compute intersection over union between bounding boxes. +/* Compute non-maximum suppression between bounding masks */ +void rleNms( RLE *dt, siz n, uint *keep, double thr ); + +/* Compute intersection over union between bounding boxes. */ void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); -// Get bounding boxes surrounding encoded masks. +/* Compute non-maximum suppression between bounding boxes */ +void bbNms( BB dt, siz n, uint *keep, double thr ); + +/* Get bounding boxes surrounding encoded masks. */ void rleToBbox( const RLE *R, BB bb, siz n ); -// Convert bounding boxes to encoded masks. +/* Convert bounding boxes to encoded masks. */ void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); -// Convert polygon to encoded mask. +/* Convert polygon to encoded mask. */ void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); -// Get compressed string representation of encoded mask. +/* Get compressed string representation of encoded mask. */ char* rleToString( const RLE *R ); -// Convert from compressed string representation of encoded mask. +/* Convert from compressed string representation of encoded mask. */ void rleFrString( RLE *R, char *s, siz h, siz w ); diff --git a/example/rcnn/rcnn/symbol/proposal.py b/example/rcnn/rcnn/symbol/proposal.py index 397030db6d7c..dd0bb15f5168 100644 --- a/example/rcnn/rcnn/symbol/proposal.py +++ b/example/rcnn/rcnn/symbol/proposal.py @@ -3,18 +3,16 @@ classification probability and bounding box prediction results, and image size and scale information. """ -from __future__ import print_function import mxnet as mx import numpy as np import numpy.random as npr from distutils.util import strtobool +from rcnn.logger import logger from rcnn.processing.bbox_transform import bbox_pred, clip_boxes from rcnn.processing.generate_anchor import generate_anchors from rcnn.processing.nms import py_nms_wrapper, cpu_nms_wrapper, gpu_nms_wrapper -DEBUG = False - class ProposalOperator(mx.operator.CustomOp): def __init__(self, feat_stride, scales, ratios, output_score, @@ -31,10 +29,8 @@ def __init__(self, feat_stride, scales, ratios, output_score, self._threshold = threshold self._rpn_min_size = rpn_min_size - if DEBUG: - print('feat_stride: {}'.format(self._feat_stride)) - print('anchors:') - print(self._anchors) + logger.debug('feat_stride: %s' % self._feat_stride) + logger.debug('anchors:\n%s' % self._anchors) def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) @@ -64,17 +60,14 @@ def forward(self, is_train, req, in_data, out_data, aux): bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] - if DEBUG: - print('im_size: ({}, {})'.format(im_info[0], im_info[1])) - print('scale: {}'.format(im_info[2])) + logger.debug('im_info: %s' % im_info) # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / self._feat_stride), int(im_info[1] / self._feat_stride) - if DEBUG: - print('score map size: {}'.format(scores.shape)) - print("resudial: {}".format((scores.shape[2] - height, scores.shape[3] - width))) + logger.debug('score map size: (%d, %d)' % (scores.shape[2], scores.shape[3])) + logger.debug('resudial: (%d, %d)' % (scores.shape[2] - height, scores.shape[3] - width)) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride diff --git a/example/rcnn/rcnn/symbol/proposal_target.py b/example/rcnn/rcnn/symbol/proposal_target.py index 3f28cb2cbebb..6f1a6ffbc440 100644 --- a/example/rcnn/rcnn/symbol/proposal_target.py +++ b/example/rcnn/rcnn/symbol/proposal_target.py @@ -2,15 +2,14 @@ Proposal Target Operator selects foreground and background roi and assigns label, bbox_transform to them. """ -from __future__ import print_function +import logging import mxnet as mx import numpy as np from distutils.util import strtobool +from ..logger import logger from rcnn.io.rcnn import sample_rois -DEBUG = False - class ProposalTargetOperator(mx.operator.CustomOp): def __init__(self, num_classes, batch_images, batch_rois, fg_fraction): @@ -20,7 +19,7 @@ def __init__(self, num_classes, batch_images, batch_rois, fg_fraction): self._batch_rois = batch_rois self._fg_fraction = fg_fraction - if DEBUG: + if logger.level == logging.DEBUG: self._count = 0 self._fg_num = 0 self._bg_num = 0 @@ -43,17 +42,17 @@ def forward(self, is_train, req, in_data, out_data, aux): rois, labels, bbox_targets, bbox_weights = \ sample_rois(all_rois, fg_rois_per_image, rois_per_image, self._num_classes, gt_boxes=gt_boxes) - if DEBUG: - print("labels=", labels) - print('num fg: {}'.format((labels > 0).sum())) - print('num bg: {}'.format((labels == 0).sum())) + if logger.level == logging.DEBUG: + logger.debug("labels: %s" % labels) + logger.debug('num fg: {}'.format((labels > 0).sum())) + logger.debug('num bg: {}'.format((labels == 0).sum())) self._count += 1 self._fg_num += (labels > 0).sum() self._bg_num += (labels == 0).sum() - print("self._count=", self._count) - print('num fg avg: {}'.format(self._fg_num / self._count)) - print('num bg avg: {}'.format(self._bg_num / self._count)) - print('ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num))) + logger.debug("self._count: %d" % self._count) + logger.debug('num fg avg: %d' % (self._fg_num / self._count)) + logger.debug('num bg avg: %d' % (self._bg_num / self._count)) + logger.debug('ratio: %.3f' % (float(self._fg_num) / float(self._bg_num))) for ind, val in enumerate([rois, labels, bbox_targets, bbox_weights]): self.assign(out_data[ind], req[ind], val) diff --git a/example/rcnn/rcnn/tools/reeval.py b/example/rcnn/rcnn/tools/reeval.py index a2e6264942de..22e5e206f4d0 100644 --- a/example/rcnn/rcnn/tools/reeval.py +++ b/example/rcnn/rcnn/tools/reeval.py @@ -1,9 +1,9 @@ -from __future__ import print_function import argparse import cPickle import os import mxnet as mx +from ..logger import logger from ..config import config, default, generate_config from ..dataset import * @@ -39,7 +39,7 @@ def parse_args(): def main(): args = parse_args() - print('Called with argument:', args) + logger.info('Called with argument: %s' % args) reeval(args) diff --git a/example/rcnn/rcnn/tools/test_rcnn.py b/example/rcnn/rcnn/tools/test_rcnn.py index 65dca7a6d0f4..83a9fac03e67 100644 --- a/example/rcnn/rcnn/tools/test_rcnn.py +++ b/example/rcnn/rcnn/tools/test_rcnn.py @@ -1,8 +1,8 @@ -from __future__ import print_function import argparse import pprint import mxnet as mx +from ..logger import logger from ..config import config, default, generate_config from ..symbol import * from ..dataset import * @@ -99,8 +99,8 @@ def parse_args(): def main(): args = parse_args() + logger.info('Called with argument: %s' % args) ctx = mx.gpu(args.gpu) - print(args) test_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, ctx, args.prefix, args.epoch, args.vis, args.shuffle, args.has_rpn, args.proposal, args.thresh) diff --git a/example/rcnn/rcnn/tools/test_rpn.py b/example/rcnn/rcnn/tools/test_rpn.py index 9d0ff198e1b4..09f6af74368f 100644 --- a/example/rcnn/rcnn/tools/test_rpn.py +++ b/example/rcnn/rcnn/tools/test_rpn.py @@ -1,8 +1,8 @@ -from __future__ import print_function import argparse import pprint import mxnet as mx +from ..logger import logger from ..config import config, default, generate_config from ..symbol import * from ..dataset import * @@ -89,7 +89,7 @@ def parse_args(): def main(): args = parse_args() - print('Called with argument:', args) + logger.info('Called with argument: %s' % args) ctx = mx.gpu(args.gpu) test_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, ctx, args.prefix, args.epoch, diff --git a/example/rcnn/rcnn/tools/train_rcnn.py b/example/rcnn/rcnn/tools/train_rcnn.py index 0669af047819..3f1cde380e8c 100644 --- a/example/rcnn/rcnn/tools/train_rcnn.py +++ b/example/rcnn/rcnn/tools/train_rcnn.py @@ -1,8 +1,8 @@ import argparse -import logging import pprint import mxnet as mx +from ..logger import logger from ..config import config, default, generate_config from ..symbol import * from ..core import callback, metric @@ -17,11 +17,6 @@ def train_rcnn(network, dataset, image_set, root_path, dataset_path, frequent, kvstore, work_load_list, no_flip, no_shuffle, resume, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, train_shared, lr, lr_step, proposal): - # set up logger - logging.basicConfig() - logger = logging.getLogger() - logger.setLevel(logging.INFO) - # set up config config.TRAIN.BATCH_IMAGES = 2 config.TRAIN.BATCH_ROIS = 128 @@ -36,7 +31,7 @@ def train_rcnn(network, dataset, image_set, root_path, dataset_path, input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config - pprint.pprint(config) + logger.info(pprint.pformat(config)) # load dataset and prepare imdb for training image_sets = [iset for iset in image_set.split('+')] @@ -53,6 +48,7 @@ def train_rcnn(network, dataset, image_set, root_path, dataset_path, # infer max shape max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] + logger.info('providing maximum shape %s' % max_data_shape) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) @@ -60,8 +56,7 @@ def train_rcnn(network, dataset, image_set, root_path, dataset_path, arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) - print('output shape') - pprint.pprint(out_shape_dict) + logger.info('output shape %s' % pprint.pformat(out_shape_dict)) # load and initialize params if resume: @@ -115,7 +110,7 @@ def train_rcnn(network, dataset, image_set, root_path, dataset_path, lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] - print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) + logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = {'momentum': 0.9, @@ -166,7 +161,7 @@ def parse_args(): def main(): args = parse_args() - print('Called with argument:', args) + logger.info('Called with argument: %s' % args) ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] train_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, diff --git a/example/rcnn/rcnn/tools/train_rpn.py b/example/rcnn/rcnn/tools/train_rpn.py index 2c7267ea36ef..87b92c8229ef 100644 --- a/example/rcnn/rcnn/tools/train_rpn.py +++ b/example/rcnn/rcnn/tools/train_rpn.py @@ -1,9 +1,8 @@ -from __future__ import print_function import argparse -import logging import pprint import mxnet as mx +from ..logger import logger from ..config import config, default, generate_config from ..symbol import * from ..core import callback, metric @@ -17,11 +16,6 @@ def train_rpn(network, dataset, image_set, root_path, dataset_path, frequent, kvstore, work_load_list, no_flip, no_shuffle, resume, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, train_shared, lr, lr_step): - # set up logger - logging.basicConfig() - logger = logging.getLogger() - logger.setLevel(logging.INFO) - # setup config config.TRAIN.BATCH_IMAGES = 1 @@ -34,7 +28,7 @@ def train_rpn(network, dataset, image_set, root_path, dataset_path, input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config - pprint.pprint(config) + logger.info(pprint.pformat(config)) # load dataset and prepare imdb for training image_sets = [iset for iset in image_set.split('+')] @@ -53,7 +47,7 @@ def train_rpn(network, dataset, image_set, root_path, dataset_path, # infer max shape max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) - print('providing maximum shape', max_data_shape, max_label_shape) + logger.info('providing maximum shape %s %s' % (max_data_shape, max_label_shape)) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) @@ -61,8 +55,7 @@ def train_rpn(network, dataset, image_set, root_path, dataset_path, arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) - print('output shape') - pprint.pprint(out_shape_dict) + logger.info('output shape %s' % pprint.pformat(out_shape_dict)) # load and initialize params if resume: @@ -118,7 +111,7 @@ def train_rpn(network, dataset, image_set, root_path, dataset_path, lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] - print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) + logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = {'momentum': 0.9, @@ -168,7 +161,7 @@ def parse_args(): def main(): args = parse_args() - print('Called with argument:', args) + logger.info('Called with argument: %s' % args) ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] train_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, diff --git a/example/rcnn/rcnn/utils/caffe_convert.py b/example/rcnn/rcnn/utils/caffe_convert.py deleted file mode 100644 index b5f0fbe27d14..000000000000 --- a/example/rcnn/rcnn/utils/caffe_convert.py +++ /dev/null @@ -1,75 +0,0 @@ -# This script will not work unless all paths are set right - -from __future__ import print_function -import os -import sys -import mxnet as mx -import numpy as np -fast_rcnn_path = None -sys.path.insert(0, os.path.join(fast_rcnn_path, 'caffe-fast-rcnn', 'python')) -sys.path.insert(0, os.path.join(fast_rcnn_path, 'lib')) -import caffe -from rcnn.symbol import get_symbol_vgg_test - -def load_model(caffeproto, caffemodel, arg_shape_dic): - def get_caffe_iter(layer_names, layers): - for layer_idx, layer in enumerate(layers): - layer_name = layer_names[layer_idx].replace('/', '_') - layer_type = layer.type - layer_blobs = layer.blobs - yield (layer_name, layer_type, layer_blobs) - - net_caffe = caffe.Net(caffeproto, caffemodel, caffe.TEST) - layer_names = net_caffe._layer_names - layers = net_caffe.layers - iter = '' - iter = get_caffe_iter(layer_names, layers) - first_conv = True - - arg_params = {} - for layer_name, layer_type, layer_blobs in iter: - if layer_type == 'Convolution' or layer_type == 'InnerProduct' or layer_type == 4 or layer_type == 14: - assert(len(layer_blobs) == 2) - wmat = np.array(layer_blobs[0].data).reshape(layer_blobs[0].num, layer_blobs[0].channels, layer_blobs[0].height, layer_blobs[0].width) - bias = np.array(layer_blobs[1].data) - if first_conv: - print('Swapping BGR of caffe into RGB in mxnet') - wmat[:, [0, 2], :, :] = wmat[:, [2, 0], :, :] - - assert(wmat.flags['C_CONTIGUOUS'] is True) - assert(bias.flags['C_CONTIGUOUS'] is True) - print('converting layer {0}, wmat shape = {1}, bias shape = {2}'.format(layer_name, wmat.shape, bias.shape)) - wmat = wmat.reshape((wmat.shape[0], -1)) - bias = bias.reshape((bias.shape[0], 1)) - weight_name = layer_name + "_weight" - bias_name = layer_name + "_bias" - - if weight_name not in arg_shape_dic: - print(weight_name + ' not found in arg_shape_dic.') - continue - wmat = wmat.reshape(arg_shape_dic[weight_name]) - arg_params[weight_name] = mx.nd.zeros(wmat.shape) - arg_params[weight_name][:] = wmat - - bias = bias.reshape(arg_shape_dic[bias_name]) - arg_params[bias_name] = mx.nd.zeros(bias.shape) - arg_params[bias_name][:] = bias - - if first_conv and (layer_type == 'Convolution' or layer_type == 4): - first_conv = False - - return arg_params - -proto_path = os.path.join(fast_rcnn_path, 'models', 'VGG16', 'test.prototxt') -model_path = os.path.join(fast_rcnn_path, 'data', 'fast_rcnn_models', 'vgg16_fast_rcnn_iter_40000.caffemodel') - -symbol = get_symbol_vgg_test() -arg_shapes, out_shapes, aux_shapes = symbol.infer_shape(**{'data': (1, 3, 224, 224), 'rois': (1, 5)}) -arg_shape_dic = { name: shape for name, shape in zip(symbol.list_arguments(), arg_shapes) } - -arg_params = load_model(proto_path, model_path, arg_shape_dic) - -model = mx.model.FeedForward(ctx=mx.cpu(), symbol=symbol, arg_params=arg_params, - aux_params={}, num_epoch=1, - learning_rate=0.01, momentum=0.9, wd=0.0001) -model.save('model/ref') diff --git a/example/rcnn/rcnn/utils/load_data.py b/example/rcnn/rcnn/utils/load_data.py index d56882a5c9d8..4700229e65af 100644 --- a/example/rcnn/rcnn/utils/load_data.py +++ b/example/rcnn/rcnn/utils/load_data.py @@ -1,5 +1,5 @@ -from __future__ import print_function import numpy as np +from ..logger import logger from ..config import config from ..dataset import * @@ -47,6 +47,6 @@ def is_valid(entry): num = len(roidb) filtered_roidb = [entry for entry in roidb if is_valid(entry)] num_after = len(filtered_roidb) - print('filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after)) + logger.info('load data: filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after)) return filtered_roidb diff --git a/example/rcnn/test.py b/example/rcnn/test.py index 708efc8c7ddb..12fe6973fbcf 100644 --- a/example/rcnn/test.py +++ b/example/rcnn/test.py @@ -1,6 +1,6 @@ -from __future__ import print_function import argparse import mxnet as mx +from rcnn.logger import logger from rcnn.config import config, default, generate_config from rcnn.tools.test_rcnn import test_rcnn @@ -31,8 +31,8 @@ def parse_args(): def main(): args = parse_args() + logger.info('Called with argument: %s' % args) ctx = mx.gpu(args.gpu) - print(args) test_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, ctx, args.prefix, args.epoch, args.vis, args.shuffle, args.has_rpn, args.proposal, args.thresh) diff --git a/example/rcnn/train_alternate.py b/example/rcnn/train_alternate.py index 991fb237d085..74f16b9980aa 100644 --- a/example/rcnn/train_alternate.py +++ b/example/rcnn/train_alternate.py @@ -1,9 +1,7 @@ -from __future__ import print_function import argparse -import logging - import mxnet as mx +from rcnn.logger import logger from rcnn.config import config, default, generate_config from rcnn.tools.train_rpn import train_rpn from rcnn.tools.test_rpn import test_rpn @@ -14,41 +12,36 @@ def alternate_train(args, ctx, pretrained, epoch, rpn_epoch, rpn_lr, rpn_lr_step, rcnn_epoch, rcnn_lr, rcnn_lr_step): - # set up logger - logging.basicConfig() - logger = logging.getLogger() - logger.setLevel(logging.INFO) - # basic config begin_epoch = 0 config.TRAIN.BG_THRESH_LO = 0.0 - logging.info('########## TRAIN RPN WITH IMAGENET INIT') + logger.info('########## TRAIN RPN WITH IMAGENET INIT') train_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, ctx, pretrained, epoch, 'model/rpn1', begin_epoch, rpn_epoch, train_shared=False, lr=rpn_lr, lr_step=rpn_lr_step) - logging.info('########## GENERATE RPN DETECTION') + logger.info('########## GENERATE RPN DETECTION') image_sets = [iset for iset in args.image_set.split('+')] for image_set in image_sets: test_rpn(args.network, args.dataset, image_set, args.root_path, args.dataset_path, ctx[0], 'model/rpn1', rpn_epoch, vis=False, shuffle=False, thresh=0) - logging.info('########## TRAIN RCNN WITH IMAGENET INIT AND RPN DETECTION') + logger.info('########## TRAIN RCNN WITH IMAGENET INIT AND RPN DETECTION') train_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, ctx, pretrained, epoch, 'model/rcnn1', begin_epoch, rcnn_epoch, train_shared=False, lr=rcnn_lr, lr_step=rcnn_lr_step, proposal='rpn') - logging.info('########## TRAIN RPN WITH RCNN INIT') + logger.info('########## TRAIN RPN WITH RCNN INIT') train_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, ctx, 'model/rcnn1', rcnn_epoch, 'model/rpn2', begin_epoch, rpn_epoch, train_shared=True, lr=rpn_lr, lr_step=rpn_lr_step) - logging.info('########## GENERATE RPN DETECTION') + logger.info('########## GENERATE RPN DETECTION') image_sets = [iset for iset in args.image_set.split('+')] for image_set in image_sets: test_rpn(args.network, args.dataset, image_set, args.root_path, args.dataset_path, @@ -101,7 +94,7 @@ def parse_args(): def main(): args = parse_args() - print('Called with argument:', args) + logger.info('Called with argument: %s' % args) ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] alternate_train(args, ctx, args.pretrained, args.pretrained_epoch, args.rpn_epoch, args.rpn_lr, args.rpn_lr_step, diff --git a/example/rcnn/train_end2end.py b/example/rcnn/train_end2end.py index ac00120131c9..b8b1c5c3a410 100644 --- a/example/rcnn/train_end2end.py +++ b/example/rcnn/train_end2end.py @@ -1,10 +1,9 @@ -from __future__ import print_function import argparse -import logging import pprint import mxnet as mx import numpy as np +from rcnn.logger import logger from rcnn.config import config, default, generate_config from rcnn.symbol import * from rcnn.core import callback, metric @@ -16,11 +15,6 @@ def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step='5'): - # set up logger - logging.basicConfig() - logger = logging.getLogger() - logger.setLevel(logging.INFO) - # setup config config.TRAIN.BATCH_IMAGES = 1 config.TRAIN.BATCH_ROIS = 128 @@ -36,7 +30,7 @@ def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config - pprint.pprint(config) + logger.info(pprint.pformat(config)) # load dataset and prepare imdb for training image_sets = [iset for iset in args.image_set.split('+')] @@ -56,7 +50,7 @@ def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5))) - print('providing maximum shape', max_data_shape, max_label_shape) + logger.info('providing maximum shape %s %s' % (max_data_shape, max_label_shape)) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) @@ -64,8 +58,7 @@ def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) - print('output shape') - pprint.pprint(out_shape_dict) + logger.info('output shape %s' % pprint.pformat(out_shape_dict)) # load and initialize params if args.resume: @@ -127,7 +120,7 @@ def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] - print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) + logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = {'momentum': 0.9, @@ -176,7 +169,7 @@ def parse_args(): def main(): args = parse_args() - print('Called with argument:', args) + logger.info('Called with argument: %s' % args) ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] train_net(args, ctx, args.pretrained, args.pretrained_epoch, args.prefix, args.begin_epoch, args.end_epoch, lr=args.lr, lr_step=args.lr_step) diff --git a/src/operator/contrib/proposal-inl.h b/src/operator/contrib/proposal-inl.h index ed0ec826588f..686a8a354ff9 100644 --- a/src/operator/contrib/proposal-inl.h +++ b/src/operator/contrib/proposal-inl.h @@ -267,7 +267,7 @@ inline void _Transform(float scale, float ratio, const std::vector& base_anchor, std::vector *out_anchors) { - float w = base_anchor[2] - base_anchor[1] + 1.0f; + float w = base_anchor[2] - base_anchor[0] + 1.0f; float h = base_anchor[3] - base_anchor[1] + 1.0f; float x_ctr = base_anchor[0] + 0.5 * (w - 1.0f); float y_ctr = base_anchor[1] + 0.5 * (h - 1.0f);