diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp index 6cf80a37bc1..79cdc975f63 100644 --- a/include/caffe/common.hpp +++ b/include/caffe/common.hpp @@ -147,6 +147,8 @@ class Caffe { // Sets the device. Since we have cublas and curand stuff, set device also // requires us to reset those values. static void SetDevice(const int device_id); + // Get the device. + static int GetDevice(); // Prints the current GPU status. static void DeviceQuery(); diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py index 37e8956da4f..75fa524c3b9 100644 --- a/python/caffe/__init__.py +++ b/python/caffe/__init__.py @@ -1,5 +1,12 @@ -from .pycaffe import Net, SGDSolver -from ._caffe import set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver +from .pycaffe import Net, SGDSolver, LayerParameter +from ._caffe import ( + set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, + get_device, + check_mode_cpu, check_mode_gpu, + set_random_seed, + Blob, + create_layer, +) from .proto.caffe_pb2 import TRAIN, TEST from .classifier import Classifier from .detector import Detector diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index dff7f627016..2e1e584250c 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -3,6 +3,8 @@ // Produce deprecation warnings (needs to come before arrayobject.h inclusion). #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include + #include #include #include @@ -15,6 +17,8 @@ #include // NOLINT #include "caffe/caffe.hpp" +#include "caffe/layer_factory.hpp" +#include "caffe/proto/caffe.pb.h" #include "caffe/python_layer.hpp" // Temporary solution for numpy < 1.7 versions: old macro, no promises. @@ -35,6 +39,9 @@ const int NPY_DTYPE = NPY_FLOAT32; // Selecting mode. void set_mode_cpu() { Caffe::set_mode(Caffe::CPU); } void set_mode_gpu() { Caffe::set_mode(Caffe::GPU); } +// Checking current mode. +bool check_mode_cpu() { return Caffe::mode() == Caffe::CPU; } +bool check_mode_gpu() { return Caffe::mode() == Caffe::GPU; } // For convenience, check that input files can be opened, and raise an // exception that boost will send to Python if not (caffe could still crash @@ -176,6 +183,34 @@ struct NdarrayCallPolicies : public bp::default_call_policies { } }; +// Blob constructor with shape iterable +shared_ptr > Blob_Init(bp::object shape_object) { + size_t ndim; + try { + ndim = bp::len(shape_object); + } catch(...) { + throw std::runtime_error("1st arg must be iterable."); + } + vector shape(ndim); + try { + for (int i = 0; i < ndim; ++i) { + shape[i] = bp::extract(shape_object[i]); + } + } catch(...) { + throw std::runtime_error("All element in shape iterable must be integer."); + } + return shared_ptr >(new Blob(shape)); +} + +bp::tuple Blob_Shape(const Blob* self) { + const vector &shape = self->shape(); + bp::list shape_list; + BOOST_FOREACH(int s, shape) { + shape_list.append(s); + } + return bp::tuple(shape_list); +} + bp::object Blob_Reshape(bp::tuple args, bp::dict kwargs) { if (bp::len(kwargs) > 0) { throw std::runtime_error("Blob.reshape takes no kwargs"); @@ -190,6 +225,85 @@ bp::object Blob_Reshape(bp::tuple args, bp::dict kwargs) { return bp::object(); } +// Layer +template +vector py_to_vector(bp::object pyiter) { + vector vec; + for (int i = 0; i < bp::len(pyiter); ++i) { + vec.push_back(bp::extract(pyiter[i])); + } + return vec; +} +void Layer_SetUp(Layer *layer, bp::object py_bottom, bp::object py_top) { + vector*> bottom = py_to_vector*>(py_bottom); + vector*> top = py_to_vector*>(py_top); + layer->SetUp(bottom, top); +} +void Layer_Reshape( + Layer *layer, bp::object py_bottom, bp::object py_top) { + vector*> bottom = py_to_vector*>(py_bottom); + vector*> top = py_to_vector*>(py_top); + layer->Reshape(bottom, top); +} +Dtype Layer_Forward( + Layer *layer, bp::object py_bottom, bp::object py_top) { + vector*> bottom = py_to_vector*>(py_bottom); + vector*> top = py_to_vector*>(py_top); + Dtype loss; + loss = layer->Forward(bottom, top); + return loss; +} +void Layer_Backward( + Layer *layer, bp::object py_top, bp::object py_propagate_down, + bp::object py_bottom) { + vector*> top = py_to_vector*>(py_top); + vector propagate_down = py_to_vector(py_propagate_down); + vector*> bottom = py_to_vector*>(py_bottom); + layer->Backward(top, propagate_down, bottom); +} + +// LayerParameter +shared_ptr LayerParameter_Init(bp::object py_layer_param) { + shared_ptr layer_param(new LayerParameter); + if (PyObject_HasAttrString(py_layer_param.ptr(), "SerializeToString")) { + string dump = bp::extract( + py_layer_param.attr("SerializeToString")()); + layer_param->ParseFromString(dump); + } else { + try { + string dump = bp::extract(py_layer_param); + google::protobuf::TextFormat::ParseFromString(dump, layer_param.get()); + } catch(...) { + throw std::runtime_error("1st arg must be LayerPrameter or string."); + } + } + if (!layer_param->IsInitialized()) { + throw std::runtime_error( + "LayerParameter not initialized: Missing required fields."); + } + return layer_param; +} +void LayerParameter_FromPython( + LayerParameter *layer_param, bp::object py_layer_param) { + shared_ptr copy = \ + LayerParameter_Init(py_layer_param); + layer_param->Clear(); + layer_param->CopyFrom(*copy); +} +bp::object LayerParameter_ToPython( + const LayerParameter *layer_param, bp::object py_layer_param) { + string dump; + layer_param->SerializeToString(&dump); + py_layer_param.attr("ParseFromString")(bp::object(dump)); + return py_layer_param; +} + +// Create layer from caffe_pb2.LayerParameter in Python +shared_ptr > create_layer(bp::object py_layer_param) { + shared_ptr layer_param(LayerParameter_Init(py_layer_param)); + return LayerRegistry::CreateLayer(*layer_param.get()); +} + BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolveOverloads, Solve, 0, 1); BOOST_PYTHON_MODULE(_caffe) { @@ -198,7 +312,11 @@ BOOST_PYTHON_MODULE(_caffe) { // Caffe utility functions bp::def("set_mode_cpu", &set_mode_cpu); bp::def("set_mode_gpu", &set_mode_gpu); + bp::def("check_mode_cpu", &check_mode_cpu); + bp::def("check_mode_gpu", &check_mode_gpu); bp::def("set_device", &Caffe::SetDevice); + bp::def("get_device", &Caffe::GetDevice); + bp::def("set_random_seed", &Caffe::set_random_seed); bp::class_, shared_ptr >, boost::noncopyable >("Net", bp::no_init) @@ -229,13 +347,15 @@ BOOST_PYTHON_MODULE(_caffe) { .def("save", &Net_Save); bp::class_, shared_ptr >, boost::noncopyable>( - "Blob", bp::no_init) + "Blob", bp::no_init) + .def("__init__", bp::make_constructor(&Blob_Init)) .add_property("num", &Blob::num) .add_property("channels", &Blob::channels) .add_property("height", &Blob::height) .add_property("width", &Blob::width) .add_property("count", static_cast::*)() const>( &Blob::count)) + .add_property("shape", &Blob_Shape) .def("reshape", bp::raw_function(&Blob_Reshape)) .add_property("data", bp::make_function(&Blob::mutable_cpu_data, NdarrayCallPolicies())) @@ -243,15 +363,26 @@ BOOST_PYTHON_MODULE(_caffe) { NdarrayCallPolicies())); bp::class_, shared_ptr >, - boost::noncopyable>("Layer", bp::init()) + boost::noncopyable>( + "Layer", bp::init()) .add_property("blobs", bp::make_function(&Layer::blobs, bp::return_internal_reference<>())) .def("setup", &Layer::LayerSetUp) + .def("SetUp", &Layer_SetUp) .def("reshape", &Layer::Reshape) + .def("Reshape", &Layer_Reshape) + .def("Forward", &Layer_Forward) + .def("Backward", &Layer_Backward) .add_property("type", bp::make_function(&Layer::type)); bp::register_ptr_to_python > >(); - bp::class_("LayerParameter", bp::no_init); + bp::class_ >( + "LayerParameter", bp::no_init) + .def("__init__", bp::make_constructor(&LayerParameter_Init)) + .def("from_python", &LayerParameter_FromPython) + .def("_to_python", &LayerParameter_ToPython); + + bp::def("create_layer", &create_layer); bp::class_, shared_ptr >, boost::noncopyable>( "Solver", bp::no_init) diff --git a/python/caffe/gradient_check_util.py b/python/caffe/gradient_check_util.py new file mode 100644 index 00000000000..0ecb264ca10 --- /dev/null +++ b/python/caffe/gradient_check_util.py @@ -0,0 +1,91 @@ +import numpy as np +import caffe + + +class GradientChecker: + + def __init__(self, stepsize, threshold, seed=1701, kink=0., kink_range=-1): + for k, v in locals().iteritems(): + if k == 'self': + continue + self.__dict__[k + '_'] = v + pass + + def get_obj_and_gradient(self, layer, top, top_id, top_data_id): + for b in top: + b.diff[...] = 0 + loss_weight = 2 + loss = top[top_id].data.flat[top_data_id] * loss_weight + top[top_id].diff.flat[top_data_id] = loss_weight + return loss + + def check_gradient_single( + self, layer, bottom, top, check_bottom='all', top_id=0, + top_data_id=0): + """""" + # Retrieve Blobs to check + propagate_down = [False for i in xrange(len(bottom))] + blobs_to_check = [] + for blob in layer.blobs: + blobs_to_check += [blob] + if check_bottom == 'all': + check_bottom = range(len(bottom)) + assert len(check_bottom) <= len(bottom) + for cb in check_bottom: + blobs_to_check += [bottom[cb]] + propagate_down[cb] = True + + # Compute the gradient analytically using Backward + caffe.set_random_seed(self.seed_) + layer.Reshape(bottom, top) + layer.Forward(bottom, top) + self.get_obj_and_gradient(layer, top, top_id, top_data_id) + layer.Backward(top, propagate_down, bottom) + + # Store computed diff + ana_grads = [b.diff.copy() for b in blobs_to_check] + + # Compute finite diff + for bi, (ana_grad, blob) in enumerate(zip(ana_grads, blobs_to_check)): + for fi in xrange(blob.count): + step = self.stepsize_ + # L(fi <-- fi+step) + blob.data.flat[fi] += step + caffe.set_random_seed(self.seed_) + layer.Reshape(bottom, top) + layer.Forward(bottom, top) + ploss = self.get_obj_and_gradient( + layer, top, top_id, top_data_id) + # L(fi <-- fi-step) + blob.data.flat[fi] -= 2 * step + caffe.set_random_seed(self.seed_) + layer.Reshape(bottom, top) + layer.Forward(bottom, top) + nloss = self.get_obj_and_gradient( + layer, top, top_id, top_data_id) + grad = (ploss - nloss) / (2. * step) + agrad = ana_grad.flat[fi] + feat = blob.data.flat[fi] + if self.kink_ - self.kink_range_ > np.abs(feat) \ + or np.abs(feat) > self.kink_ + self.kink_range_: + scale = max( + max(np.abs(agrad), np.abs(grad)), 1.0) + assert np.isclose( + agrad, grad, rtol=0, atol=self.threshold_ * scale), ( + "(top_id, top_data_id, blob_id, feat_id)" + "=(%d, %d, %d, %d); feat=%g; " + "objective+ = %g; objective- = %g; " + "analitical_grad=%g; finite_grad=%g" % ( + top_id, top_data_id, bi, fi, feat, ploss, nloss, + agrad, grad) + ) + + def check_gradient_exhaustive( + self, layer, bottom, top, check_bottom='all'): + """""" + layer.SetUp(bottom, top) + assert len(top) > 0 + for i in xrange(len(top)): + for j in xrange(top[i].count): + self.check_gradient_single( + layer, bottom, top, check_bottom, i, j) diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py index 3c19261f690..5e3db0c44f9 100644 --- a/python/caffe/pycaffe.py +++ b/python/caffe/pycaffe.py @@ -10,7 +10,7 @@ from itertools import zip_longest as izip_longest import numpy as np -from ._caffe import Net, SGDSolver +from ._caffe import Net, SGDSolver, LayerParameter import caffe.io # We directly update methods from Net here (rather than using composition or @@ -267,3 +267,11 @@ def _Net_batch(self, blobs): Net._batch = _Net_batch Net.inputs = _Net_inputs Net.outputs = _Net_outputs + +# LayerParameter +def _LayerParameter_to_python(self): + from caffe.proto import caffe_pb2 + layer_param = caffe_pb2.LayerParameter() + return self._to_python(layer_param) +# Attach method +LayerParameter.to_python = _LayerParameter_to_python diff --git a/python/caffe/test/test_blob.py b/python/caffe/test/test_blob.py new file mode 100644 index 00000000000..25c079b4057 --- /dev/null +++ b/python/caffe/test/test_blob.py @@ -0,0 +1,25 @@ +import unittest +import numpy as np + +import caffe + +class TestBlob(unittest.TestCase): + def setUp(self): + pass + + def test_constructor(self): + # empty shape blob + b = caffe.Blob([]) + self.assertEqual(b.shape, ()) + # init with list + b = caffe.Blob([1, 2, 3]) + self.assertEqual(b.shape, (1, 2, 3)) + a = np.random.randn(1, 2, 3) + b.data[...] = a + self.assertTrue(np.all(a.astype('float32') == b.data)) + # init with tuple + b = caffe.Blob((1, 2, 3)) + self.assertEqual(b.shape, (1, 2, 3)) + # init with generator + b = caffe.Blob(xrange(2, 6)) + self.assertEqual(b.shape, tuple(xrange(2, 6))) diff --git a/python/caffe/test/test_create_layer.py b/python/caffe/test/test_create_layer.py new file mode 100644 index 00000000000..e9a9f24a784 --- /dev/null +++ b/python/caffe/test/test_create_layer.py @@ -0,0 +1,61 @@ +import os +import tempfile +import unittest + +import numpy as np + +import caffe +from caffe.proto import caffe_pb2 + + +def create_blob(shape): + net_file = None + with tempfile.NamedTemporaryFile(delete=False) as f: + f.write( + "input: 'data' input_shape { %s }" % ( + ' '.join(['dim: %d' % i for i in shape]))) + net_file = f.name + net = caffe.Net(net_file, caffe.TRAIN) + os.remove(net_file) + return net.blobs['data'] + + +class TestCreateLayer(unittest.TestCase): + + def setUp(self): + self.shapei = [2, 2, 4, 4] + self.blobi = create_blob(self.shapei) + self.blobo = create_blob([1]) + + def test_create_conv_layer(self): + # Setting layer parameter for convolution + layer_param = caffe_pb2.LayerParameter() + layer_param.type = 'Convolution' + layer_param.name = 'conv1' + cparam = layer_param.convolution_param + cparam.num_output = 3 + cparam.kernel_size = 2 + wfiller = cparam.weight_filler + wfiller.type = "uniform" + wfiller.max = 3 + wfiller.min = 1.5 + # Create layer + conv_layer = caffe.create_layer(layer_param) + self.assertEqual(conv_layer.type, 'Convolution') + # Set up layer + conv_layer.SetUp([self.blobi], [self.blobo]) + weights = conv_layer.blobs[0] + self.assertTrue(np.all(weights.data >= 1.5)) + self.assertTrue(np.all(weights.data <= 3.0)) + # Reshape out blobs + conv_layer.Reshape([self.blobi], [self.blobo]) + shapei = self.shapei + shapeo = self.blobo.data.shape + self.assertEqual( + shapeo, + (shapei[0], cparam.num_output, + shapei[2] - cparam.kernel_size + 1, + shapei[3] - cparam.kernel_size + 1)) + # Forward, Backward + conv_layer.Forward([self.blobi], [self.blobo]) + conv_layer.Backward([self.blobo], [True], [self.blobi]) diff --git a/python/caffe/test/test_gradient_checker.py b/python/caffe/test/test_gradient_checker.py new file mode 100644 index 00000000000..f02338aead2 --- /dev/null +++ b/python/caffe/test/test_gradient_checker.py @@ -0,0 +1,55 @@ +import unittest +import numpy as np + +import caffe +from caffe.proto import caffe_pb2 +from caffe.gradient_check_util import GradientChecker + +class TestGradientChecker(unittest.TestCase): + + def setUp(self): + shape = [10, 5, 1, 1] + pred = caffe.Blob(shape) + label = caffe.Blob(shape) + self.rng = np.random.RandomState(313) + pred.data[...] = self.rng.randn(*shape) + label.data[...] = self.rng.randn(*shape) + self.bottom = [pred, label] + self.top = [caffe.Blob([])] + + def test_euclidean(self): + lp = caffe_pb2.LayerParameter() + lp.type = "EuclideanLoss" + layer = caffe.create_layer(lp) + layer.SetUp(self.bottom, self.top) + layer.Reshape(self.bottom, self.top) + layer.Forward(self.bottom, self.top) + # manual computation + loss = np.sum((self.bottom[0].data - self.bottom[1].data) ** 2) \ + / self.bottom[0].shape[0] / 2.0 + self.assertAlmostEqual(float(self.top[0].data), loss, 5) + checker = GradientChecker(1e-2, 1e-2) + checker.check_gradient_exhaustive( + layer, self.bottom, self.top, check_bottom='all') + + def test_inner_product(self): + lp = caffe_pb2.LayerParameter() + lp.type = "InnerProduct" + lp.inner_product_param.num_output = 3 + layer = caffe.create_layer(lp) + layer.SetUp([self.bottom[0]], self.top) + w = self.rng.randn(*layer.blobs[0].shape) + b = self.rng.randn(*layer.blobs[1].shape) + layer.blobs[0].data[...] = w + layer.blobs[1].data[...] = b + layer.Reshape([self.bottom[0]], self.top) + layer.Forward([self.bottom[0]], self.top) + assert np.allclose( + self.top[0].data, + np.dot( + self.bottom[0].data.reshape(self.bottom[0].shape[0], -1), w.T + ) + b + ) + checker = GradientChecker(1e-2, 1e-1) + checker.check_gradient_exhaustive( + layer, [self.bottom[0]], self.top, check_bottom=[0]) diff --git a/python/caffe/test/test_layer_parameter.py b/python/caffe/test/test_layer_parameter.py new file mode 100644 index 00000000000..1d38accab38 --- /dev/null +++ b/python/caffe/test/test_layer_parameter.py @@ -0,0 +1,30 @@ +import unittest + +import caffe +from caffe.proto import caffe_pb2 + + +class TestLayerParameter(unittest.TestCase): + + def setUp(self): + pass + + def test_create_from_string(self): + lp = caffe.LayerParameter("type: 'Convolution' name: 'conv1'") + + def test_create_from_py_layer_parameter(self): + plp = caffe_pb2.LayerParameter() + plp.type = 'Convolution' + plp.name = 'conv1' + lp = caffe.LayerParameter(plp) + plp2 = lp.to_python() + self.assertEqual(plp, plp2) + + def test_from_python(self): + plp = caffe_pb2.LayerParameter() + plp.type = 'Convolution' + plp.name = 'conv1' + lp = caffe.LayerParameter("") + lp.from_python(plp) + plp2 = lp.to_python() + self.assertEqual(plp, plp2) diff --git a/src/caffe/common.cpp b/src/caffe/common.cpp index af96cac40aa..1c252d572c3 100644 --- a/src/caffe/common.cpp +++ b/src/caffe/common.cpp @@ -55,6 +55,10 @@ void Caffe::SetDevice(const int device_id) { NO_GPU; } +int Caffe::GetDevice() { + NO_GPU; +} + void Caffe::DeviceQuery() { NO_GPU; } @@ -146,6 +150,12 @@ void Caffe::SetDevice(const int device_id) { cluster_seedgen())); } +int Caffe::GetDevice() { + int current_device; + CUDA_CHECK(cudaGetDevice(¤t_device)); + return current_device; +} + void Caffe::DeviceQuery() { cudaDeviceProp prop; int device;