diff --git a/example/cifar10/cifar10.py b/example/cifar10/cifar10.py index 14d9bd1b8971..10a7c40eea03 100644 --- a/example/cifar10/cifar10.py +++ b/example/cifar10/cifar10.py @@ -2,6 +2,11 @@ import numpy as np import mxnet as mx import copy +import sys +sys.path.append("../../tests/python") +import get_data + + """ CXXNET Result: step1: wmat_lr = 0.05, bias_lr = 0.1, mom = 0.9 @@ -49,6 +54,10 @@ [39] train-error:0.00125879 val-error:0.0833 [40] train-error:0.000699329 val-error:0.0842 """ +def CalAcc(out, label): + pred = np.argmax(out, axis=1) + return np.sum(pred == label) * 1.0 / out.shape[0] + np.random.seed(1812) @@ -62,6 +71,7 @@ def ConvFactory(**kwargs): act = param["act_type"] del param["act_type"] param["name"] = "conv%d" % conv_cnt + param["nstep"] = 64 conv = mx.symbol.Convolution(**param) bn = mx.symbol.BatchNorm(data = conv, name="bn%d" % conv_cnt) relu = mx.symbol.Activation(data = bn, name = "%s%d" % (act, conv_cnt), act_type=act) @@ -96,6 +106,7 @@ def DownsampleFactory(data, ch_3x3, stride = 2): concat_cnt += 1 return concat + def SimpleFactory(data, ch_1x1, ch_3x3): global concat_cnt param = {} @@ -106,7 +117,7 @@ def SimpleFactory(data, ch_1x1, ch_3x3): param["stride"] = (1, 1) param["act_type"] = "relu" param["data"] = data - param["nstep"] = 100 + param["nstep"] = 128 conv1x1 = ConvFactory(**param) # 3x3 @@ -121,12 +132,11 @@ def SimpleFactory(data, ch_1x1, ch_3x3): return concat def RandomInit(narray): - in_num = narray.numpy.shape[1] - out_num = narray.numpy.shape[0] + in_num = narray.shape[1] + out_num = narray.shape[0] a = np.sqrt(3.0 / (in_num + out_num)) - tmp = mx.narray.create((narray.numpy.shape)) - tmp.numpy[:] = np.random.uniform(-a, a, narray.numpy.shape) - tmp.copyto(narray) + tmp = mx.narray.array(np.random.uniform(-a, a, narray.shape)) + narray[:] = tmp data = mx.symbol.Variable(name="data") conv1 = ConvFactory(data=data, kernel=(3,3), pad=(1,1), num_filter=96, act_type="relu") @@ -143,110 +153,129 @@ def RandomInit(narray): pool = mx.symbol.Pooling(data=in5b, pool_type="avg", kernel=(7,7), name="pool%d" % pool_cnt) flatten = mx.symbol.Flatten(data=pool, name="flatten1") fc = mx.symbol.FullyConnected(data=flatten, num_hidden=10, name="fc1") -loss = mx.symbol.Softmax(data=fc, name="softmax") +loss = mx.symbol.Softmax(data=fc, name="sm") + args_list = loss.list_arguments() -data_shape = (128, 3, 28, 28) + +batch_size = 128 +data_shape = (batch_size, 3, 28, 28) arg_shapes, out_shapes, aux_shapes = loss.infer_shape(data=data_shape) -arg_narrays = [mx.narray.create(shape, ctx=mx.Context("gpu")) for shape in arg_shapes] -grad_narrays = [mx.narray.create(shape, ctx=mx.Context("gpu")) for shape in arg_shapes] +arg_narrays = [mx.narray.zeros(shape, ctx=mx.Context("gpu")) for shape in arg_shapes] +grad_narrays = [mx.narray.zeros(shape, ctx=mx.Context("gpu")) for shape in arg_shapes] +mom_narrays = [mx.narray.zeros(shape, ctx=mx.Context("gpu")) for shape in arg_shapes] +aux_narrays = [mx.narray.zeros(shape, ctx=mx.Context("gpu")) for shape in aux_shapes] inputs = dict(zip(args_list, arg_narrays)) name2shape = dict(zip(args_list, arg_shapes)) -pred = mx.narray.create(out_shapes[0]) +pred = mx.narray.zeros(out_shapes[0]) np.random.seed(0) # set random weight + for name, narray in inputs.items(): if "weight" in name: - tmp = mx.narray.create(name2shape[name]) - tmp.numpy[:] = np.random.uniform(-0.07, 0.07, name2shape[name]) - tmp.copyto(narray) + narray[:] = np.random.uniform(-0.1, 0.1, narray.shape) if "bias" in name: narray[:] = 0.0 + if "gamma" in name: + narray[:] = 1.0 + if "beta" in name: + narray[:] = 0.0 # bind executer # TODO(bing): think of a better bind interface -executor = loss.bind(mx.Context('gpu'), arg_narrays, grad_narrays) +executor = loss.bind(mx.Context('gpu'), arg_narrays, grad_narrays, 'write', aux_narrays) # update out_narray = executor.heads()[0] -grad_narray = mx.narray.create(out_narray.shape) epoch = 9 -lr = 0.1 -wd = 0.0004 +lr = 0.05 +wd = 0.0001 +momentum = 0.9 -def Update(grad, weight): - weight[:] -= lr * grad / batch_size +def Update(grad, weight, mom): + mom[:] *= momentum + mom[:] += -lr * (grad / batch_size + wd * weight) + weight[:] += mom -block = list(zip(grad_narrays, arg_narrays)) +block = list(zip(grad_narrays, arg_narrays, mom_narrays)) #check data get_data.GetCifar10() + train_dataiter = mx.io.ImageRecordIter( path_imgrec="data/cifar/train.rec", mean_img="data/cifar/cifar_mean.bin", rand_crop=True, rand_mirror=True, input_shape=(3,28,28), - batch_size=128, + batch_size=batch_size, nthread=1) test_dataiter = mx.io.ImageRecordIter( path_imgrec="data/cifar/test.rec", mean_img="data/cifar/cifar_mean.bin", - rand_crop=True, - rand_mirror=True, + rand_crop=False, + rand_mirror=False, input_shape=(3,28,28), - batch_size=100, + batch_size=batch_size, nthread=1) -tmp_label = mx.narray.create(name2shape["sm_label"]) +tmp_label = mx.narray.zeros(name2shape["sm_label"]) + +def progress(count, total, suffix=''): + bar_len = 80 + filled_len = int(round(bar_len * count / float(total))) + + percents = round(100.0 * count / float(total), 1) + bar = '=' * filled_len + '-' * (bar_len - filled_len) + + sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', suffix)) def test_cifar(): acc_train = 0. acc_val = 0. + print("Start training...") for i in range(epoch): # train - print("Epoch %d" % i) train_acc = 0.0 val_acc = 0.0 train_nbatch = 0 val_nbatch = 0 + all_train_bacth = 50000 / float(batch_size) for data, label in train_dataiter: - data = data - tmp_label.numpy[:] = label.numpy.reshape(tmp_label.shape) - data.copyto(inputs["data"]) - tmp_label.copyto(inputs["sm_label"]) + progress(train_nbatch, all_train_bacth, "Epoch %d" % i) + label = label.asnumpy().flatten() + tmp_label[:] = label + inputs["data"][:] = data + inputs["sm_label"][:] = tmp_label executor.forward() - out_narray.copyto(pred) - train_acc += CalAcc(pred.numpy, label.numpy.flatten()) + pred[:] = out_narray + train_acc += CalAcc(pred.asnumpy(), label) train_nbatch += 1 - out_narray.copyto(grad_narray) - executor.backward([grad_narray]) + executor.backward([out_narray]) - for grad, weight in block: - Update(grad, weight) + for grad, weight, mom in block: + Update(grad, weight, mom) # evaluate - for data, label in val_dataiter: - data = data - label = label.numpy.flatten() - data.copyto(inputs["data"]) + for data, label in test_dataiter: + label = label.asnumpy().flatten() + inputs["data"][:] = data executor.forward() - out_narray.copyto(pred) - val_acc += CalAcc(pred.numpy, label) + pred[:] = out_narray + val_acc += CalAcc(pred.asnumpy(), label) val_nbatch += 1 acc_train = train_acc / train_nbatch acc_val = val_acc / val_nbatch + sys.stdout.write('\n') print("Train Acc: ", train_acc / train_nbatch) print("Valid Acc: ", val_acc / val_nbatch) train_dataiter.reset() - val_dataiter.reset() - assert(acc_train > 0.98) - assert(acc_val > 0.97) + test_dataiter.reset() if __name__ == "__main__": test_cifar() diff --git a/example/mnist/mlp_gpu.py b/example/mnist/mlp_gpu.py index 8fc44d0f5af4..010d09912523 100644 --- a/example/mnist/mlp_gpu.py +++ b/example/mnist/mlp_gpu.py @@ -1,11 +1,15 @@ # pylint: skip-file + import mxnet as mx import numpy as np import os, gzip import pickle as pickle import sys +sys.path.append("../../tests/python") import get_data + + def CalAcc(out, label): pred = np.argmax(out, axis=1) return np.sum(pred == label) * 1.0 / out.shape[0] @@ -20,40 +24,42 @@ def CalAcc(out, label): fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10) softmax = mx.symbol.Softmax(data = fc3, name = 'sm') args_list = softmax.list_arguments() + # infer shape data_shape = (batch_size, 784) arg_shapes, out_shapes, aux_shapes = softmax.infer_shape(data=data_shape) -arg_narrays = [mx.narray.create(shape, ctx=mx.Context("gpu")) for shape in arg_shapes] -grad_narrays = [mx.narray.create(shape, ctx=mx.Context("gpu")) for shape in arg_shapes] - +# create GPU NArray for data +arg_narrays = [mx.narray.zeros(shape, ctx=mx.Context("gpu")) for shape in arg_shapes] +grad_narrays = [mx.narray.zeros(shape, ctx=mx.Context("gpu")) for shape in arg_shapes] inputs = dict(zip(args_list, arg_narrays)) +# create CPU NArray for result stat name2shape = dict(zip(args_list, arg_shapes)) -pred = mx.narray.create(out_shapes[0]) +pred = mx.narray.zeros(out_shapes[0]) + -np.random.seed(0) # set random weight +np.random.seed(0) for name, narray in inputs.items(): if "weight" in name: - tmp = mx.narray.create(name2shape[name]) - tmp.numpy[:] = np.random.uniform(-0.07, 0.07, name2shape[name]) + tmp = mx.narray.array(np.random.uniform(-0.07, 0.07, name2shape[name])) tmp.copyto(narray) - if "bias" in name: - narray[:] = 0.0 # bind executer # TODO(bing): think of a better bind interface executor = softmax.bind(mx.Context('gpu'), arg_narrays, grad_narrays) -# update - +# create gradient NArray out_narray = executor.heads()[0] -grad_narray = mx.narray.create(out_narray.shape) +grad_narray = mx.narray.zeros(out_narray.shape, ctx=mx.Context("gpu")) + +# update epoch = 9 lr = 0.1 wd = 0.0004 +# SGD Update rule def Update(grad, weight): weight[:] -= lr * grad / batch_size @@ -71,7 +77,7 @@ def Update(grad, weight): label="data/t10k-labels-idx1-ubyte", batch_size=batch_size, shuffle=True, flat=True, silent=False) -tmp_label = mx.narray.create(name2shape["sm_label"]) +tmp_label = mx.narray.zeros(name2shape["sm_label"]) def test_mlp(): acc_train = 0. @@ -84,15 +90,15 @@ def test_mlp(): train_nbatch = 0 val_nbatch = 0 for data, label in train_dataiter: - data = data - tmp_label.numpy[:] = label.numpy.reshape(tmp_label.shape) - data.copyto(inputs["data"]) - tmp_label.copyto(inputs["sm_label"]) + label = label.asnumpy().reshape(tmp_label.shape) + tmp_label[:] = label + inputs["data"][:] = data + inputs["sm_label"][:] = tmp_label executor.forward() - out_narray.copyto(pred) - train_acc += CalAcc(pred.numpy, label.numpy.flatten()) + pred[:] = out_narray + train_acc += CalAcc(pred.asnumpy(), label) train_nbatch += 1 - out_narray.copyto(grad_narray) + grad_narray[:] = out_narray executor.backward([grad_narray]) for grad, weight in block: @@ -100,12 +106,11 @@ def test_mlp(): # evaluate for data, label in val_dataiter: - data = data - label = label.numpy.flatten() - data.copyto(inputs["data"]) + label = label.asnumpy().flatten() + inputs["data"][:] = data executor.forward() - out_narray.copyto(pred) - val_acc += CalAcc(pred.numpy, label) + pred[:] = out_narray + val_acc += CalAcc(pred.asnumpy(), label) val_nbatch += 1 acc_train = train_acc / train_nbatch acc_val = val_acc / val_nbatch diff --git a/python/mxnet/narray.py b/python/mxnet/narray.py index 6706bd4fecc1..acc05d08d546 100644 --- a/python/mxnet/narray.py +++ b/python/mxnet/narray.py @@ -333,6 +333,52 @@ def empty(shape, ctx=None): ctx = Context.default_ctx return NArray(handle=_new_alloc_handle(shape, ctx, False)) +def zeros(shape, ctx=None): + """Create a new NArray filled with 0, with specified shape. + + Parameters + ---------- + shape : tuple + shape of the NArray. + + ctx : Context, optional + The context of the NArray, default to current default context. + + Returns + ------- + out: Array + The created NArray. + """ + if ctx is None: + ctx = Context.default_ctx + arr = NArray(handle=_new_alloc_handle(shape, ctx, False)) + arr[:] = 0.0 + return arr + +def ones(shape, ctx=None): + """Create a new NArray filled with 1, with specified shape. + + Parameters + ---------- + shape : tuple + shape of the NArray. + + ctx : Context, optional + The context of the NArray, default to current default context. + + Returns + ------- + out: Array + The created NArray. + """ + if ctx is None: + ctx = Context.default_ctx + arr = NArray(handle=_new_alloc_handle(shape, ctx, False)) + arr[:] = 1.0 + return arr + + + def array(source_array, ctx=None): """Create a new NArray that copies content from source_array. diff --git a/src/c_api.cc b/src/c_api.cc index 2e59613829bc..a01ba5cd75ee 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -756,9 +756,7 @@ int MXExecutorBind(SymbolHandle symbol_handle, if (arg_grad_ptr[i] == nullptr) { arg_grad_vec.push_back(NArray()); grad_req_vec.push_back(kNullOp); - LOG(INFO) << "nop"; } else { - LOG(INFO) << "grad=" << grad_req_type[i]; arg_grad_vec.push_back(*(arg_grad_ptr[i])); grad_req_vec.push_back(static_cast(grad_req_type[i])); } diff --git a/src/operator/convolution-inl.h b/src/operator/convolution-inl.h index e04935fd106e..b313aab14f94 100644 --- a/src/operator/convolution-inl.h +++ b/src/operator/convolution-inl.h @@ -178,7 +178,7 @@ class ConvolutionOp : public Operator { gwmat[gid] += dot(temp_dst_[gid], tmpc.T()); } } - if (req[kData] == kWriteTo) { + if (req[kData] == kWriteTo || req[kData] == kWriteInplace) { for (uint32_t gid = 0; gid < param_.num_group; ++gid) { Tensor tmpc = temp_col_.Slice(gstride * gid, gstride * (gid + 1)); tmpc = dot(wmat[gid].T(), temp_dst_[gid]); @@ -283,6 +283,8 @@ class ConvolutionProp : public OperatorProperty { const index_t ksize_x = static_cast(param_.kernel[1]); const index_t kstride = static_cast(param_.stride[0]); // TODO(bing) : support dual stride + CHECK_EQ(param_.stride[0], param_.stride[1]) + << "Only support same stride now"; CHECK_EQ(dshape[1] % param_.num_group, 0) \ << "input num_filter must divide group size"; CHECK_EQ(param_.num_filter % param_.num_group, 0) \ diff --git a/src/operator/pooling-inl.h b/src/operator/pooling-inl.h index 491d21dbe810..b0d483ef0217 100644 --- a/src/operator/pooling-inl.h +++ b/src/operator/pooling-inl.h @@ -74,6 +74,8 @@ class PoolingOp : public Operator { Tensor out = out_data[kOut].get(s); mshadow::Shape<2> out_shape = Shape2(out.shape_[2], out.shape_[3]); // TODO(bing): dual stride in mshadow + CHECK_EQ(param_.stride[0], param_.stride[1]) + << "Only same stride is supported now"; if (param_.pool_type == kMaxPooling || param_.pool_type == kSumPooling) { Assign(out, req[kOut], @@ -81,7 +83,7 @@ class PoolingOp : public Operator { out_shape, param_.kernel[0], param_.kernel[1], - param_.kernel[0])); + param_.stride[0])); } else if (param_.pool_type == kAvgPooling) { Assign(out, req[kOut], @@ -90,7 +92,7 @@ class PoolingOp : public Operator { out_shape, param_.kernel[0], param_.kernel[1], - param_.kernel[0])); + param_.stride[0])); } } diff --git a/tests/python/get_data.py b/tests/python/get_data.py index 828809f3e757..260dd27ea932 100644 --- a/tests/python/get_data.py +++ b/tests/python/get_data.py @@ -33,4 +33,6 @@ def GetCifar10(): os.system("mkdir data/") if not os.path.exists('data/cifar10.zip'): os.system("wget http://webdocs.cs.ualberta.ca/~bx3/data/cifar10.zip -P data/") - os.system("unzip data/cifar10.zip") + os.chdir("./data") + os.system("unzip cifar10.zip") + os.chidr("..")