Skip to content

Commit 52d7a48

Browse files
committed
Merge pull request BVLC#816 from shelhamer/pycaffe-labels-grayscale-attrs-examples
Improve and polish pycaffe
2 parents d1d499d + 0db9478 commit 52d7a48

11 files changed

+257
-196
lines changed

examples/detection.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
"input": [
3737
"!mkdir -p _temp\n",
3838
"!echo `pwd`/images/fish-bike.jpg > _temp/det_input.txt\n",
39-
"!../python/detect.py --crop_mode=selective_search --pretrained_model=imagenet/caffe_rcnn_imagenet_model --model_def=imagenet/rcnn_imagenet_deploy.prototxt --gpu _temp/det_input.txt _temp/det_output.h5"
39+
"!../python/detect.py --crop_mode=selective_search --pretrained_model=imagenet/caffe_rcnn_imagenet_model --model_def=imagenet/rcnn_imagenet_deploy.prototxt --gpu --raw_scale=255 _temp/det_input.txt _temp/det_output.h5"
4040
],
4141
"language": "python",
4242
"metadata": {},

examples/filter_visualization.ipynb

+38-54
Large diffs are not rendered by default.

examples/imagenet_classification.ipynb

+36-43
Large diffs are not rendered by default.

examples/net_surgery.ipynb

+17-12
Large diffs are not rendered by default.

python/caffe/_caffe.cpp

+23-13
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626

2727
using namespace caffe; // NOLINT(build/namespaces)
28+
using boost::python::dict;
2829
using boost::python::extract;
2930
using boost::python::len;
3031
using boost::python::list;
@@ -274,6 +275,11 @@ struct CaffeNet {
274275

275276
// The pointer to the internal caffe::Net instant.
276277
shared_ptr<Net<float> > net_;
278+
// Input preprocessing configuration attributes.
279+
dict mean_;
280+
dict input_scale_;
281+
dict raw_scale_;
282+
dict channel_swap_;
277283
// if taking input from an ndarray, we need to hold references
278284
object input_data_;
279285
object input_labels_;
@@ -311,19 +317,23 @@ BOOST_PYTHON_MODULE(_caffe) {
311317
boost::python::class_<CaffeNet, shared_ptr<CaffeNet> >(
312318
"Net", boost::python::init<string, string>())
313319
.def(boost::python::init<string>())
314-
.def("_forward", &CaffeNet::Forward)
315-
.def("_backward", &CaffeNet::Backward)
316-
.def("set_mode_cpu", &CaffeNet::set_mode_cpu)
317-
.def("set_mode_gpu", &CaffeNet::set_mode_gpu)
318-
.def("set_phase_train", &CaffeNet::set_phase_train)
319-
.def("set_phase_test", &CaffeNet::set_phase_test)
320-
.def("set_device", &CaffeNet::set_device)
321-
.add_property("_blobs", &CaffeNet::blobs)
322-
.add_property("layers", &CaffeNet::layers)
323-
.add_property("inputs", &CaffeNet::inputs)
324-
.add_property("outputs", &CaffeNet::outputs)
325-
.def("_set_input_arrays", &CaffeNet::set_input_arrays)
326-
.def("save", &CaffeNet::save);
320+
.def("_forward", &CaffeNet::Forward)
321+
.def("_backward", &CaffeNet::Backward)
322+
.def("set_mode_cpu", &CaffeNet::set_mode_cpu)
323+
.def("set_mode_gpu", &CaffeNet::set_mode_gpu)
324+
.def("set_phase_train", &CaffeNet::set_phase_train)
325+
.def("set_phase_test", &CaffeNet::set_phase_test)
326+
.def("set_device", &CaffeNet::set_device)
327+
.add_property("_blobs", &CaffeNet::blobs)
328+
.add_property("layers", &CaffeNet::layers)
329+
.add_property("inputs", &CaffeNet::inputs)
330+
.add_property("outputs", &CaffeNet::outputs)
331+
.add_property("mean", &CaffeNet::mean_)
332+
.add_property("input_scale", &CaffeNet::input_scale_)
333+
.add_property("raw_scale", &CaffeNet::raw_scale_)
334+
.add_property("channel_swap", &CaffeNet::channel_swap_)
335+
.def("_set_input_arrays", &CaffeNet::set_input_arrays)
336+
.def("save", &CaffeNet::save);
327337

328338
boost::python::class_<CaffeBlob, CaffeBlobWrap>(
329339
"Blob", boost::python::no_init)

python/caffe/classifier.py

+22-14
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,14 @@ class Classifier(caffe.Net):
1414
by scaling, center cropping, or oversampling.
1515
"""
1616
def __init__(self, model_file, pretrained_file, image_dims=None,
17-
gpu=False, mean_file=None, input_scale=None, channel_swap=None):
17+
gpu=False, mean=None, input_scale=None, raw_scale=None,
18+
channel_swap=None):
1819
"""
1920
Take
2021
image_dims: dimensions to scale input for cropping/sampling.
21-
Default is to scale to net input size for whole-image crop.
22-
gpu, mean_file, input_scale, channel_swap: convenience params for
23-
setting mode, mean, input scale, and channel order.
22+
Default is to scale to net input size for whole-image crop.
23+
gpu, mean, input_scale, raw_scale, channel_swap: params for
24+
preprocessing options.
2425
"""
2526
caffe.Net.__init__(self, model_file, pretrained_file)
2627
self.set_phase_test()
@@ -30,11 +31,13 @@ def __init__(self, model_file, pretrained_file, image_dims=None,
3031
else:
3132
self.set_mode_cpu()
3233

33-
if mean_file:
34-
self.set_mean(self.inputs[0], mean_file)
35-
if input_scale:
34+
if mean is not None:
35+
self.set_mean(self.inputs[0], mean)
36+
if input_scale is not None:
3637
self.set_input_scale(self.inputs[0], input_scale)
37-
if channel_swap:
38+
if raw_scale is not None:
39+
self.set_raw_scale(self.inputs[0], raw_scale)
40+
if channel_swap is not None:
3841
self.set_channel_swap(self.inputs[0], channel_swap)
3942

4043
self.crop_dims = np.array(self.blobs[self.inputs[0]].data.shape[2:])
@@ -57,24 +60,29 @@ def predict(self, inputs, oversample=True):
5760
for N images and C classes.
5861
"""
5962
# Scale to standardize input dimensions.
60-
inputs = np.asarray([caffe.io.resize_image(im, self.image_dims)
61-
for im in inputs])
63+
input_ = np.zeros((len(inputs),
64+
self.image_dims[0], self.image_dims[1], inputs[0].shape[2]),
65+
dtype=np.float32)
66+
for ix, in_ in enumerate(inputs):
67+
input_[ix] = caffe.io.resize_image(in_, self.image_dims)
6268

6369
if oversample:
6470
# Generate center, corner, and mirrored crops.
65-
inputs = caffe.io.oversample(inputs, self.crop_dims)
71+
input_ = caffe.io.oversample(input_, self.crop_dims)
6672
else:
6773
# Take center crop.
6874
center = np.array(self.image_dims) / 2.0
6975
crop = np.tile(center, (1, 2))[0] + np.concatenate([
7076
-self.crop_dims / 2.0,
7177
self.crop_dims / 2.0
7278
])
73-
inputs = inputs[:, crop[0]:crop[2], crop[1]:crop[3], :]
79+
input_ = input_[:, crop[0]:crop[2], crop[1]:crop[3], :]
7480

7581
# Classify
76-
caffe_in = np.asarray([self.preprocess(self.inputs[0], in_)
77-
for in_ in inputs])
82+
caffe_in = np.zeros(np.array(input_.shape)[[0,3,1,2]],
83+
dtype=np.float32)
84+
for ix, in_ in enumerate(input_):
85+
caffe_in[ix] = self.preprocess(self.inputs[0], in_)
7886
out = self.forward_all(**{self.inputs[0]: caffe_in})
7987
predictions = out[self.outputs[0]].squeeze(axis=(2,3))
8088

python/caffe/detector.py

+30-17
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,13 @@ class Detector(caffe.Net):
2424
Detector extends Net for windowed detection by a list of crops or
2525
selective search proposals.
2626
"""
27-
def __init__(self, model_file, pretrained_file, gpu=False, mean_file=None,
28-
input_scale=None, channel_swap=None, context_pad=None):
27+
def __init__(self, model_file, pretrained_file, gpu=False, mean=None,
28+
input_scale=None, raw_scale=None, channel_swap=None,
29+
context_pad=None):
2930
"""
3031
Take
31-
gpu, mean_file, input_scale, channel_swap: convenience params for
32-
setting mode, mean, input scale, and channel order.
32+
gpu, mean, input_scale, raw_scale, channel_swap: params for
33+
preprocessing options.
3334
context_pad: amount of surrounding context to take s.t. a `context_pad`
3435
sized border of pixels in the network input image is context, as in
3536
R-CNN feature extraction.
@@ -42,11 +43,13 @@ def __init__(self, model_file, pretrained_file, gpu=False, mean_file=None,
4243
else:
4344
self.set_mode_cpu()
4445

45-
if mean_file:
46-
self.set_mean(self.inputs[0], mean_file)
47-
if input_scale:
46+
if mean is not None:
47+
self.set_mean(self.inputs[0], mean)
48+
if input_scale is not None:
4849
self.set_input_scale(self.inputs[0], input_scale)
49-
if channel_swap:
50+
if raw_scale is not None:
51+
self.set_raw_scale(self.inputs[0], raw_scale)
52+
if channel_swap is not None:
5053
self.set_channel_swap(self.inputs[0], channel_swap)
5154

5255
self.configure_crop(context_pad)
@@ -73,8 +76,11 @@ def detect_windows(self, images_windows):
7376
window_inputs.append(self.crop(image, window))
7477

7578
# Run through the net (warping windows to input dimensions).
76-
caffe_in = np.asarray([self.preprocess(self.inputs[0], window_in)
77-
for window_in in window_inputs])
79+
caffe_in = np.zeros((len(window_inputs), window_inputs[0].shape[2])
80+
+ self.blobs[self.inputs[0]].data.shape[2:],
81+
dtype=np.float32)
82+
for ix, window_in in enumerate(window_inputs):
83+
caffe_in[ix] = self.preprocess(self.inputs[0], window_in)
7884
out = self.forward_all(**{self.inputs[0]: caffe_in})
7985
predictions = out[self.outputs[0]].squeeze(axis=(2,3))
8086

@@ -180,12 +186,19 @@ def configure_crop(self, context_pad):
180186
"""
181187
self.context_pad = context_pad
182188
if self.context_pad:
183-
input_scale = self.input_scale.get(self.inputs[0])
189+
raw_scale = self.raw_scale.get(self.inputs[0])
184190
channel_order = self.channel_swap.get(self.inputs[0])
185191
# Padding context crops needs the mean in unprocessed input space.
186-
self.crop_mean = self.mean[self.inputs[0]].copy()
187-
self.crop_mean = self.crop_mean.transpose((1,2,0))
188-
channel_order_inverse = [channel_order.index(i)
189-
for i in range(self.crop_mean.shape[2])]
190-
self.crop_mean = self.crop_mean[:,:, channel_order_inverse]
191-
self.crop_mean /= input_scale
192+
mean = self.mean.get(self.inputs[0])
193+
if mean is not None:
194+
crop_mean = mean.copy().transpose((1,2,0))
195+
if channel_order is not None:
196+
channel_order_inverse = [channel_order.index(i)
197+
for i in range(crop_mean.shape[2])]
198+
crop_mean = crop_mean[:,:, channel_order_inverse]
199+
if raw_scale is not None:
200+
crop_mean /= raw_scale
201+
self.crop_mean = crop_mean
202+
else:
203+
self.crop_mean = np.zeros(self.blobs[self.inputs[0]].data.shape,
204+
dtype=np.float32)

python/caffe/io.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
import skimage.io
3-
import skimage.transform
3+
from scipy.ndimage import zoom
4+
from skimage.transform import resize
45

56
from caffe.proto import caffe_pb2
67

@@ -15,7 +16,8 @@ def load_image(filename, color=True):
1516
loads as intensity (if image is already grayscale).
1617
1718
Give
18-
image: an image with type np.float32 of size (H x W x 3) in RGB or
19+
image: an image with type np.float32 in range [0, 1]
20+
of size (H x W x 3) in RGB or
1921
of size (H x W x 1) in grayscale.
2022
"""
2123
img = skimage.img_as_float(skimage.io.imread(filename)).astype(np.float32)
@@ -40,7 +42,17 @@ def resize_image(im, new_dims, interp_order=1):
4042
Give
4143
im: resized ndarray with shape (new_dims[0], new_dims[1], K)
4244
"""
43-
return skimage.transform.resize(im, new_dims, order=interp_order)
45+
if im.shape[-1] == 1 or im.shape[-1] == 3:
46+
# skimage is fast but only understands {1,3} channel images in [0, 1].
47+
im_min, im_max = im.min(), im.max()
48+
im_std = (im - im_min) / (im_max - im_min)
49+
resized_std = resize(im_std, new_dims, order=interp_order)
50+
resized_im = resized_std * (im_max - im_min) + im_min
51+
else:
52+
# ndimage interpolates anything but more slowly.
53+
scale = tuple(np.array(new_dims) / np.array(im.shape[:2]))
54+
resized_im = zoom(im, scale + (1,), order=interp_order)
55+
return resized_im.astype(np.float32)
4456

4557

4658
def oversample(images, crop_dims):

0 commit comments

Comments
 (0)