Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/detection.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
"input": [
"!mkdir -p _temp\n",
"!echo `pwd`/images/fish-bike.jpg > _temp/det_input.txt\n",
"!../python/detect.py --crop_mode=selective_search --pretrained_model=imagenet/caffe_rcnn_imagenet_model --model_def=imagenet/rcnn_imagenet_deploy.prototxt --gpu _temp/det_input.txt _temp/det_output.h5"
"!../python/detect.py --crop_mode=selective_search --pretrained_model=imagenet/caffe_rcnn_imagenet_model --model_def=imagenet/rcnn_imagenet_deploy.prototxt --gpu --raw_scale=255 _temp/det_input.txt _temp/det_output.h5"
],
"language": "python",
"metadata": {},
Expand Down
92 changes: 38 additions & 54 deletions examples/filter_visualization.ipynb

Large diffs are not rendered by default.

79 changes: 36 additions & 43 deletions examples/imagenet_classification.ipynb

Large diffs are not rendered by default.

29 changes: 17 additions & 12 deletions examples/net_surgery.ipynb

Large diffs are not rendered by default.

36 changes: 23 additions & 13 deletions python/caffe/_caffe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@


using namespace caffe; // NOLINT(build/namespaces)
using boost::python::dict;
using boost::python::extract;
using boost::python::len;
using boost::python::list;
Expand Down Expand Up @@ -274,6 +275,11 @@ struct CaffeNet {

// The pointer to the internal caffe::Net instant.
shared_ptr<Net<float> > net_;
// Input preprocessing configuration attributes.
dict mean_;
dict input_scale_;
dict raw_scale_;
dict channel_swap_;
// if taking input from an ndarray, we need to hold references
object input_data_;
object input_labels_;
Expand Down Expand Up @@ -311,19 +317,23 @@ BOOST_PYTHON_MODULE(_caffe) {
boost::python::class_<CaffeNet, shared_ptr<CaffeNet> >(
"Net", boost::python::init<string, string>())
.def(boost::python::init<string>())
.def("_forward", &CaffeNet::Forward)
.def("_backward", &CaffeNet::Backward)
.def("set_mode_cpu", &CaffeNet::set_mode_cpu)
.def("set_mode_gpu", &CaffeNet::set_mode_gpu)
.def("set_phase_train", &CaffeNet::set_phase_train)
.def("set_phase_test", &CaffeNet::set_phase_test)
.def("set_device", &CaffeNet::set_device)
.add_property("_blobs", &CaffeNet::blobs)
.add_property("layers", &CaffeNet::layers)
.add_property("inputs", &CaffeNet::inputs)
.add_property("outputs", &CaffeNet::outputs)
.def("_set_input_arrays", &CaffeNet::set_input_arrays)
.def("save", &CaffeNet::save);
.def("_forward", &CaffeNet::Forward)
.def("_backward", &CaffeNet::Backward)
.def("set_mode_cpu", &CaffeNet::set_mode_cpu)
.def("set_mode_gpu", &CaffeNet::set_mode_gpu)
.def("set_phase_train", &CaffeNet::set_phase_train)
.def("set_phase_test", &CaffeNet::set_phase_test)
.def("set_device", &CaffeNet::set_device)
.add_property("_blobs", &CaffeNet::blobs)
.add_property("layers", &CaffeNet::layers)
.add_property("inputs", &CaffeNet::inputs)
.add_property("outputs", &CaffeNet::outputs)
.add_property("mean", &CaffeNet::mean_)
.add_property("input_scale", &CaffeNet::input_scale_)
.add_property("raw_scale", &CaffeNet::raw_scale_)
.add_property("channel_swap", &CaffeNet::channel_swap_)
.def("_set_input_arrays", &CaffeNet::set_input_arrays)
.def("save", &CaffeNet::save);

boost::python::class_<CaffeBlob, CaffeBlobWrap>(
"Blob", boost::python::no_init)
Expand Down
36 changes: 22 additions & 14 deletions python/caffe/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@ class Classifier(caffe.Net):
by scaling, center cropping, or oversampling.
"""
def __init__(self, model_file, pretrained_file, image_dims=None,
gpu=False, mean_file=None, input_scale=None, channel_swap=None):
gpu=False, mean=None, input_scale=None, raw_scale=None,
channel_swap=None):
"""
Take
image_dims: dimensions to scale input for cropping/sampling.
Default is to scale to net input size for whole-image crop.
gpu, mean_file, input_scale, channel_swap: convenience params for
setting mode, mean, input scale, and channel order.
Default is to scale to net input size for whole-image crop.
gpu, mean, input_scale, raw_scale, channel_swap: params for
preprocessing options.
"""
caffe.Net.__init__(self, model_file, pretrained_file)
self.set_phase_test()
Expand All @@ -30,11 +31,13 @@ def __init__(self, model_file, pretrained_file, image_dims=None,
else:
self.set_mode_cpu()

if mean_file:
self.set_mean(self.inputs[0], mean_file)
if input_scale:
if mean is not None:
self.set_mean(self.inputs[0], mean)
if input_scale is not None:
self.set_input_scale(self.inputs[0], input_scale)
if channel_swap:
if raw_scale is not None:
self.set_raw_scale(self.inputs[0], raw_scale)
if channel_swap is not None:
self.set_channel_swap(self.inputs[0], channel_swap)

self.crop_dims = np.array(self.blobs[self.inputs[0]].data.shape[2:])
Expand All @@ -57,24 +60,29 @@ def predict(self, inputs, oversample=True):
for N images and C classes.
"""
# Scale to standardize input dimensions.
inputs = np.asarray([caffe.io.resize_image(im, self.image_dims)
for im in inputs])
input_ = np.zeros((len(inputs),
self.image_dims[0], self.image_dims[1], inputs[0].shape[2]),
dtype=np.float32)
for ix, in_ in enumerate(inputs):
input_[ix] = caffe.io.resize_image(in_, self.image_dims)

if oversample:
# Generate center, corner, and mirrored crops.
inputs = caffe.io.oversample(inputs, self.crop_dims)
input_ = caffe.io.oversample(input_, self.crop_dims)
else:
# Take center crop.
center = np.array(self.image_dims) / 2.0
crop = np.tile(center, (1, 2))[0] + np.concatenate([
-self.crop_dims / 2.0,
self.crop_dims / 2.0
])
inputs = inputs[:, crop[0]:crop[2], crop[1]:crop[3], :]
input_ = input_[:, crop[0]:crop[2], crop[1]:crop[3], :]

# Classify
caffe_in = np.asarray([self.preprocess(self.inputs[0], in_)
for in_ in inputs])
caffe_in = np.zeros(np.array(input_.shape)[[0,3,1,2]],
dtype=np.float32)
for ix, in_ in enumerate(input_):
caffe_in[ix] = self.preprocess(self.inputs[0], in_)
out = self.forward_all(**{self.inputs[0]: caffe_in})
predictions = out[self.outputs[0]].squeeze(axis=(2,3))

Expand Down
47 changes: 30 additions & 17 deletions python/caffe/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@ class Detector(caffe.Net):
Detector extends Net for windowed detection by a list of crops or
selective search proposals.
"""
def __init__(self, model_file, pretrained_file, gpu=False, mean_file=None,
input_scale=None, channel_swap=None, context_pad=None):
def __init__(self, model_file, pretrained_file, gpu=False, mean=None,
input_scale=None, raw_scale=None, channel_swap=None,
context_pad=None):
"""
Take
gpu, mean_file, input_scale, channel_swap: convenience params for
setting mode, mean, input scale, and channel order.
gpu, mean, input_scale, raw_scale, channel_swap: params for
preprocessing options.
context_pad: amount of surrounding context to take s.t. a `context_pad`
sized border of pixels in the network input image is context, as in
R-CNN feature extraction.
Expand All @@ -42,11 +43,13 @@ def __init__(self, model_file, pretrained_file, gpu=False, mean_file=None,
else:
self.set_mode_cpu()

if mean_file:
self.set_mean(self.inputs[0], mean_file)
if input_scale:
if mean is not None:
self.set_mean(self.inputs[0], mean)
if input_scale is not None:
self.set_input_scale(self.inputs[0], input_scale)
if channel_swap:
if raw_scale is not None:
self.set_raw_scale(self.inputs[0], raw_scale)
if channel_swap is not None:
self.set_channel_swap(self.inputs[0], channel_swap)

self.configure_crop(context_pad)
Expand All @@ -73,8 +76,11 @@ def detect_windows(self, images_windows):
window_inputs.append(self.crop(image, window))

# Run through the net (warping windows to input dimensions).
caffe_in = np.asarray([self.preprocess(self.inputs[0], window_in)
for window_in in window_inputs])
caffe_in = np.zeros((len(window_inputs), window_inputs[0].shape[2])
+ self.blobs[self.inputs[0]].data.shape[2:],
dtype=np.float32)
for ix, window_in in enumerate(window_inputs):
caffe_in[ix] = self.preprocess(self.inputs[0], window_in)
out = self.forward_all(**{self.inputs[0]: caffe_in})
predictions = out[self.outputs[0]].squeeze(axis=(2,3))

Expand Down Expand Up @@ -180,12 +186,19 @@ def configure_crop(self, context_pad):
"""
self.context_pad = context_pad
if self.context_pad:
input_scale = self.input_scale.get(self.inputs[0])
raw_scale = self.raw_scale.get(self.inputs[0])
channel_order = self.channel_swap.get(self.inputs[0])
# Padding context crops needs the mean in unprocessed input space.
self.crop_mean = self.mean[self.inputs[0]].copy()
self.crop_mean = self.crop_mean.transpose((1,2,0))
channel_order_inverse = [channel_order.index(i)
for i in range(self.crop_mean.shape[2])]
self.crop_mean = self.crop_mean[:,:, channel_order_inverse]
self.crop_mean /= input_scale
mean = self.mean.get(self.inputs[0])
if mean is not None:
crop_mean = mean.copy().transpose((1,2,0))
if channel_order is not None:
channel_order_inverse = [channel_order.index(i)
for i in range(crop_mean.shape[2])]
crop_mean = crop_mean[:,:, channel_order_inverse]
if raw_scale is not None:
crop_mean /= raw_scale
self.crop_mean = crop_mean
else:
self.crop_mean = np.zeros(self.blobs[self.inputs[0]].data.shape,
dtype=np.float32)
18 changes: 15 additions & 3 deletions python/caffe/io.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
import skimage.io
import skimage.transform
from scipy.ndimage import zoom
from skimage.transform import resize

from caffe.proto import caffe_pb2

Expand All @@ -15,7 +16,8 @@ def load_image(filename, color=True):
loads as intensity (if image is already grayscale).

Give
image: an image with type np.float32 of size (H x W x 3) in RGB or
image: an image with type np.float32 in range [0, 1]
of size (H x W x 3) in RGB or
of size (H x W x 1) in grayscale.
"""
img = skimage.img_as_float(skimage.io.imread(filename)).astype(np.float32)
Expand All @@ -40,7 +42,17 @@ def resize_image(im, new_dims, interp_order=1):
Give
im: resized ndarray with shape (new_dims[0], new_dims[1], K)
"""
return skimage.transform.resize(im, new_dims, order=interp_order)
if im.shape[-1] == 1 or im.shape[-1] == 3:
# skimage is fast but only understands {1,3} channel images in [0, 1].
im_min, im_max = im.min(), im.max()
im_std = (im - im_min) / (im_max - im_min)
resized_std = resize(im_std, new_dims, order=interp_order)
resized_im = resized_std * (im_max - im_min) + im_min
else:
# ndimage interpolates anything but more slowly.
scale = tuple(np.array(new_dims) / np.array(im.shape[:2]))
resized_im = zoom(im, scale + (1,), order=interp_order)
return resized_im.astype(np.float32)


def oversample(images, crop_dims):
Expand Down
Loading