apache · winstywang · Dec 30, 2015 · Dec 19, 2015 · Dec 21, 2015 · Dec 21, 2015
diff --git a/dmlc-core b/dmlc-core
@@ -0,0 +1,66 @@
+FCN-xs EXAMPLES
+---------------
+This folder contains the examples of image segmentation in MXNet.
+
+## Sample results
+![fcn-xs pasval_voc result](https://github.com/dmlc/web-data/blob/master/mxnet/image/fcnxs-example-result.jpg)
+
+we have trained a simple fcn-xs model, the parameter is below:
+
+| model | lr (fixed) | epoch |
+| ---- | ----: | ---------: |
+| fcn-32s | 1e-10 | 31 |
+| fcn-16s | 1e-12 | 27 |
+| fcn-8s | 1e-14 | 19 |
+
+the training image number is only : 2027, and the Validation image number is: 462  
+
+## How to train fcn-xs in mxnet
+#### step1: download the vgg16fc model and experiment data
+* vgg16fc model : you can download the ```VGG_FC_ILSVRC_16_layers-symbol.json``` and ```VGG_FC_ILSVRC_16_layers-0074.params``` from [yun.baidu](http://pan.baidu.com/s/1bgz4PC).  
+this is the fully convolution style of the origin
+[VGG_ILSVRC_16_layers.caffemodel](http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel), and the corresponding [VGG_ILSVRC_16_layers_deploy.prototxt](https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-vgg_ilsvrc_16_layers_deploy-prototxt), the vgg16 model has [license](http://creativecommons.org/licenses/by-nc/4.0/) for non-commercial use only.
+* experiment data : you can download the ```VOC2012.rar``` from [yun.baidu](http://pan.baidu.com/s/1bgz4PC), and Extract it. the file/folder will be like:  
+```JPEGImages folder```, ```SegmentationClass folder```, ```train.lst```, ```val.lst```, ```test.lst```
+
+#### step2: train fcn-xs model
+* if you want to train the fcn-8s model, it's better for you trained the fcn-32s and fcn-16s model firstly.
+when training the fcn-32s model, run in shell ```./run_fcnxs.sh```, the script in it is:
+```shell
+python -u fcn_xs.py --model=fcn32s --prefix=VGG_FC_ILSVRC_16_layers --epoch=74 --init-type=vgg16
+```
+* in the fcn_xs.py, you may need to change the directory ```root_dir```, ```flist_name```, ``fcnxs_model_prefix``` for your own data.
+* when you train fcn-16s or fcn-8s model, you should change the code in ```run_fcnxs.sh``` corresponding, such as when train fcn-16s, comment out the fcn32s script, then it will like this:
+```shell
+ python -u fcn_xs.py --model=fcn16s --prefix=FCN32s_VGG16 --epoch=31 --init-type=fcnxs
+```
+* the output log may like this(when training fcn-8s):
+```c++
+INFO:root:Start training with gpu(3)
+INFO:root:Epoch[0] Batch [50]   Speed: 1.16 samples/sec Train-accuracy=0.894318
+INFO:root:Epoch[0] Batch [100]  Speed: 1.11 samples/sec Train-accuracy=0.904681
+INFO:root:Epoch[0] Batch [150]  Speed: 1.13 samples/sec Train-accuracy=0.908053
+INFO:root:Epoch[0] Batch [200]  Speed: 1.12 samples/sec Train-accuracy=0.912219
+INFO:root:Epoch[0] Batch [250]  Speed: 1.13 samples/sec Train-accuracy=0.914238
+INFO:root:Epoch[0] Batch [300]  Speed: 1.13 samples/sec Train-accuracy=0.912170
+INFO:root:Epoch[0] Batch [350]  Speed: 1.12 samples/sec Train-accuracy=0.912080
+```
+
+## Using the pre-trained model for image segmentation
+* similarly, you should firstly download the pre-trained model from  [yun.baidu](http://pan.baidu.com/s/1bgz4PC), the symbol and model file is ```FCN8s_VGG16-symbol.json```, ```FCN8s_VGG16-0019.params```
+* then put the image in your directory for segmentation, and change the ```img = YOUR_IMAGE_NAME``` in ```image_segmentaion.py```
+* lastly, use ```image_segmentaion.py``` to segmentation one image by run in shell ```python image_segmentaion.py```, then you will get the segmentation image like the sample result above.
+
+## Tips
+* this is the whole image size training, that is to say, we do not need resize/crop the image to the same size, so the batch_size during training is set to 1.
+* the fcn-xs model is baed on vgg16 model, with some crop, deconv, element-sum layer added, so the model is some big, moreover, the example is using whole image size training, if the input image is some large(such as 700*500), then it may very memory consumption, so I suggest you using the GPU with 12G memory.
+* if you don't have GPU with 12G memory, maybe you shoud change the ```cut_off_size``` to be a small value when you construct your FileIter, like this:  
+```python
+train_dataiter = FileIter(
+      root_dir             = "./VOC2012",
+      flist_name           = "train.lst",
+      cut_off_size         = 400,
+      rgb_mean             = (123.68, 116.779, 103.939),
+      )
+```
+* we are looking forward you to make this example more powerful, thanks.
@@ -0,0 +1,122 @@
+# pylint: skip-file
+""" file iterator for pasval voc 2012"""
+import mxnet as mx
+import numpy as np
+import sys, os
+from mxnet.io import DataIter
+from PIL import Image
+
+class FileIter(DataIter):
+    """FileIter object in fcn-xs example. Taking a file list file to get dataiter.
+    in this example, we use the whole image training for fcn-xs, that is to say
+    we do not need resize/crop the image to the same size, so the batch_size is
+    set to 1 here
+    Parameters
+    ----------
+    root_dir : string
+        the root dir of image/label lie in
+    flist_name : string
+        the list file of iamge and label, every line owns the form:
+        index \t image_data_path \t image_label_path
+    cut_off_size : int
+        if the maximal size of one image is larger than cut_off_size, then it will
+        crop the image with the minimal size of that image
+    data_name : string
+        the data name used in symbol data(default data name)
+    label_name : string
+        the label name used in symbol softmax_label(default label name)
+    """
+    def __init__(self, root_dir, flist_name,
+                 rgb_mean = (117, 117, 117),
+                 cut_off_size = None,
+                 data_name = "data",
+                 label_name = "softmax_label"):
+        super(FileIter, self).__init__()
+        self.root_dir = root_dir
+        self.flist_name = os.path.join(self.root_dir, flist_name)
+        self.mean = np.array(rgb_mean)  # (R, G, B)
+        self.cut_off_size = cut_off_size
+        self.data_name = data_name
+        self.label_name = label_name
+
+        self.num_data = len(open(self.flist_name, 'r').readlines())
+        self.f = open(self.flist_name, 'r')
+        self.data, self.label = self._read()
+        self.cursor = -1
+
+    def _read(self):
+        """get two list, each list contains two elements: name and nd.array value"""
+        _, data_img_name, label_img_name = self.f.readline().strip('\n').split("\t")
+        data = {}
+        label = {}
+        data[self.data_name], label[self.label_name] = self._read_img(data_img_name, label_img_name)
+        return list(data.items()), list(label.items())
+
+    def _read_img(self, img_name, label_name):
+        img = Image.open(os.path.join(self.root_dir, img_name))
+        label = Image.open(os.path.join(self.root_dir, label_name))
+        assert img.size == label.size
+        img = np.array(img, dtype=np.float32)  # (h, w, c)
+        label = np.array(label)  # (h, w)
+        if self.cut_off_size is not None:
+            max_hw = max(img.shape[0], img.shape[1])
+            min_hw = min(img.shape[0], img.shape[1])
+            if min_hw > self.cut_off_size:
+                rand_start_max = round(np.random.uniform(0, max_hw - self.cut_off_size - 1))
+                rand_start_min = round(np.random.uniform(0, min_hw - self.cut_off_size - 1))
+                if img.shape[0] == max_hw :
+                    img = img[rand_start_max : rand_start_max + self.cut_off_size, rand_start_min : rand_start_min + self.cut_off_size]
+                    label = label[rand_start_max : rand_start_max + self.cut_off_size, rand_start_min : rand_start_min + self.cut_off_size]
+                else :
+                    img = img[rand_start_min : rand_start_min + self.cut_off_size, rand_start_max : rand_start_max + self.cut_off_size]
+                    label = label[rand_start_min : rand_start_min + self.cut_off_size, rand_start_max : rand_start_max + self.cut_off_size]
+            elif max_hw > self.cut_off_size:
+                rand_start = round(np.random.uniform(0, max_hw - min_hw - 1))
+                if img.shape[0] == max_hw :
+                    img = img[rand_start : rand_start + min_hw, :]
+                    label = label[rand_start : rand_start + min_hw, :]
+                else :
+                    img = img[:, rand_start : rand_start + min_hw]
+                    label = label[:, rand_start : rand_start + min_hw]
+        reshaped_mean = self.mean.reshape(1, 1, 3)
+        img = img - reshaped_mean
+        img = np.swapaxes(img, 0, 2)
+        img = np.swapaxes(img, 1, 2)  # (c, h, w)
+        img = np.expand_dims(img, axis=0)  # (1, c, h, w)
+        label = np.array(label)  # (h, w)
+        label = np.expand_dims(label, axis=0)  # (1, h, w)
+        return (img, label)
+
+    @property
+    def provide_data(self):
+        """The name and shape of data provided by this iterator"""
+        return [(k, tuple([1] + list(v.shape[1:]))) for k, v in self.data]
+
+    @property
+    def provide_label(self):
+        """The name and shape of label provided by this iterator"""
+        return [(k, tuple([1] + list(v.shape[1:]))) for k, v in self.label]
+
+    def get_batch_size(self):
+        return 1
+
+    def reset(self):
+        self.cursor = -1
+        self.f.close()
+        self.f = open(self.flist_name, 'r')
+
+    def iter_next(self):
+        self.cursor += 1
+        if(self.cursor < self.num_data-1):
+            return True
+        else:
+            return False
+
+    def next(self):
+        """return one dict which contains "data" and "label" """
+        if self.iter_next():
+            self.data, self.label = self._read()
+            return {self.data_name  :  self.data[0][1],
+                    self.label_name :  self.label[0][1]}
+        else:
+            raise StopIteration
@@ -0,0 +1,79 @@
+# pylint: skip-file
+import sys, os
+import argparse
+# mxnet_train = "/home/work/wuwei/tools/mxnet/lib/python2.7/site-packages/mxnet-0.5.0-py2.7.egg"
+mxnet_train = "/home/work/wuwei/.local/lib/python2.7/site-packages/mxnet-0.5.0-py2.7.egg"
+if mxnet_train in sys.path:
+    sys.path.remove(mxnet_train)
+sys.path.insert(0, mxnet_train)
+import mxnet as mx
+import numpy as np
+import logging
+import symbol_fcnxs
+import init_fcnxs
+from data import FileIter
+from solver import Solver
+
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+ctx = mx.gpu(0)
+
+def main():
+    fcnxs = symbol_fcnxs.get_fcn32s_symbol(numclass=21, workspace_default=1536)
+    fcnxs_model_prefix = "model_pascal/FCN32s_VGG16"
+    if args.model == "fcn16s":
+        fcnxs = symbol_fcnxs.get_fcn16s_symbol(numclass=21, workspace_default=1536)
+        fcnxs_model_prefix = "model_pascal/FCN16s_VGG16"
+    elif args.model == "fcn8s":
+        fcnxs = symbol_fcnxs.get_fcn8s_symbol(numclass=21, workspace_default=1536)
+        fcnxs_model_prefix = "model_pascal/FCN8s_VGG16"
+    arg_names = fcnxs.list_arguments()
+    _, fcnxs_args, fcnxs_auxs = mx.model.load_checkpoint(args.prefix, args.epoch)
+    if not args.retrain:
+        if args.init_type == "vgg16":
+            fcnxs_args, fcnxs_auxs = init_fcnxs.init_from_vgg16(ctx, fcnxs, fcnxs_args, fcnxs_auxs)
+        elif args.init_type == "fcnxs":
+            fcnxs_args, fcnxs_auxs = init_fcnxs.init_from_fcnxs(ctx, fcnxs, fcnxs_args, fcnxs_auxs)
+    train_dataiter = FileIter(
+        root_dir             = "./VOC2012",
+        flist_name           = "train.lst",
+        # cut_off_size         = 400,
+        rgb_mean             = (123.68, 116.779, 103.939),
+        )
+    val_dataiter = FileIter(
+        root_dir             = "./VOC2012",
+        flist_name           = "val.lst",
+        # cut_off_size         = 400,
+        rgb_mean             = (123.68, 116.779, 103.939),
+        )
+    model = Solver(
+        ctx                 = ctx,
+        symbol              = fcnxs,
+        begin_epoch         = 0,
+        num_epoch           = 50,
+        arg_params          = fcnxs_args,
+        aux_params          = fcnxs_auxs,
+        learning_rate       = 1e-10,
+        momentum            = 0.99,
+        wd                  = 0.0005)
+    model.fit(
+        train_data          = train_dataiter,
+        eval_data           = val_dataiter,
+        batch_end_callback  = mx.callback.Speedometer(1, 10),
+        epoch_end_callback  = mx.callback.do_checkpoint(fcnxs_model_prefix))
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Convert vgg16 model to vgg16fc model.')
+    parser.add_argument('--model', default='fcnxs',
+        help='The type of fcn-xs model, e.g. fcnxs, fcn16s, fcn8s.')
+    parser.add_argument('--prefix', default='VGG_FC_ILSVRC_16_layers',
+        help='The prefix(include path) of vgg16 model with mxnet format.')
+    parser.add_argument('--epoch', type=int, default=74,
+        help='The epoch number of vgg16 model.')
+    parser.add_argument('--init-type', default="vgg16",
+        help='the init type of fcn-xs model, e.g. vgg16, fcnxs')
+    parser.add_argument('--retrain', action='store_true', default=False,
+        help='true means continue training.')
+    args = parser.parse_args()
+    logging.info(args)
+    main()
@@ -0,0 +1,60 @@
+# pylint: skip-file
+import numpy as np
+import mxnet as mx
+from PIL import Image
+
+pallete = [ 0,0,0,
+            128,0,0,
+            0,128,0,
+            128,128,0,
+            0,0,128,
+            128,0,128,
+            0,128,128,
+            128,128,128,
+            64,0,0,
+            192,0,0,
+            64,128,0,
+            192,128,0,
+            64,0,128,
+            192,0,128,
+            64,128,128,
+            192,128,128,
+            0,64,0,
+            128,64,0,
+            0,192,0,
+            128,192,0,
+            0,64,128 ]
+img = "./person_bicycle.jpg"
+seg = img.replace("jpg", "png")
+model_previx = "FCN8s_VGG16"
+epoch = 19
+ctx = mx.gpu(0)
+
+def get_data(img_path):
+    """get the (1, 3, h, w) np.array data for the img_path"""
+    mean = np.array([123.68, 116.779, 103.939])  # (R,G,B)
+    img = Image.open(img_path)
+    img = np.array(img, dtype=np.float32)
+    reshaped_mean = mean.reshape(1, 1, 3)
+    img = img - reshaped_mean
+    img = np.swapaxes(img, 0, 2)
+    img = np.swapaxes(img, 1, 2)
+    img = np.expand_dims(img, axis=0)
+    return img
+
+def main():
+    fcnxs, fcnxs_args, fcnxs_auxs = mx.model.load_checkpoint(model_previx, epoch)
+    fcnxs_args["data"] = mx.nd.array(get_data(img), ctx)
+    data_shape = fcnxs_args["data"].shape
+    label_shape = (1, data_shape[2]*data_shape[3])
+    fcnxs_args["softmax_label"] = mx.nd.empty(label_shape, ctx)
+    exector = fcnxs.bind(ctx, fcnxs_args ,args_grad=None, grad_req="null", aux_states=fcnxs_args)
+    exector.forward(is_train=False)
+    output = exector.outputs[0]
+    out_img = np.uint8(np.squeeze(output.asnumpy().argmax(axis=1)))
+    out_img = Image.fromarray(out_img)
+    out_img.putpalette(pallete)
+    out_img.save(seg)
+
+if __name__ == "__main__":
+    main()
+0 −1		.gitignore
+1 −4		Makefile
+6 −19		README.md
+0 −1		doc/index.md
+0 −198		doc/parameter.md
+0 −9		example/dmlc_example.mk
+0 −60		example/parameter.cc
+0 −1		make/dmlc.mk
+0 −1		scripts/packages.mk
+0 −12		src/data.cc
+0 −73		src/data/csv_parser.h
+98 −8		src/data/libsvm_parser.h
+3 −4		src/data/strtonum.h
+0 −126		src/data/text_parser.h
+21 −56		src/io/s3_filesys.cc
+0 −10		src/io/s3_filesys.h
+1 −2		test/.gitignore
+0 −60		test/csv_parser_test.cc
+1 −3		test/dmlc_test.mk