From fd00aa853b6e4140735f091c3fb79e980520c730 Mon Sep 17 00:00:00 2001
From: shufan <shufan.wu@intel.com>
Date: Fri, 16 Feb 2018 00:11:58 +0800
Subject: [PATCH 1/3] Enable the reporting of cross-entropy or nll loss value
 during training

---
 example/image-classification/common/fit.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/example/image-classification/common/fit.py b/example/image-classification/common/fit.py
index d9f96d0eba5b..80b814db33e3 100755
--- a/example/image-classification/common/fit.py
+++ b/example/image-classification/common/fit.py
@@ -117,6 +117,8 @@ def add_fit_args(parser):
                        help='load the model on an epoch using the model-load-prefix')
     train.add_argument('--top-k', type=int, default=0,
                        help='report the top-k accuracy. 0 means no report.')
+    train.add_argument('--loss', type=str,
+                       help='show the cross-entropy or nll loss. ce strands for cross-entropy, nll-loss stands for likelihood loss')
     train.add_argument('--test-io', type=int, default=0,
                        help='1 means test reading speed without training')
     train.add_argument('--dtype', type=str, default='float32',
@@ -260,6 +262,23 @@ def fit(args, network, data_loader, **kwargs):
         eval_metrics.append(mx.metric.create(
             'top_k_accuracy', top_k=args.top_k))
 
+    supported_loss = ['ce', 'nll_loss']
+    if len(args.loss) > 0:
+        # ce or nll loss is only applicable to softmax output
+        loss_type_list = args.loss.split(',')
+        if 'softmax_output' in network.list_outputs():
+            for loss_type in loss_type_list:
+                loss_type = loss_type.strip()
+                if loss_type == 'nll':
+                    loss_type = 'nll_loss'
+                if loss_type not in supported_loss:
+                    logging.warning(loss_type + ' is not an valid loss type, only cross-entropy or ' \
+                                    'negative likelihood loss is supported!')
+                else:
+                    eval_metrics.append(mx.metric.create(loss_type))
+        else:
+            logging.warning("The output is not softmax_output, loss argument will be skipped!")
+
     # callbacks that run after each batch
     batch_end_callbacks = [mx.callback.Speedometer(
         args.batch_size, args.disp_batches)]

From 36940a2ace411cde20a4cf6995a7cc20cc75cd52 Mon Sep 17 00:00:00 2001
From: shufan <shufan.wu@intel.com>
Date: Mon, 19 Feb 2018 01:10:09 +0800
Subject: [PATCH 2/3] Set the default value of loss as a '' to avoid a Python
 runtime issue when loss argument is not set

---
 example/image-classification/common/fit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/example/image-classification/common/fit.py b/example/image-classification/common/fit.py
index 80b814db33e3..0e0cd521f28d 100755
--- a/example/image-classification/common/fit.py
+++ b/example/image-classification/common/fit.py
@@ -117,7 +117,7 @@ def add_fit_args(parser):
                        help='load the model on an epoch using the model-load-prefix')
     train.add_argument('--top-k', type=int, default=0,
                        help='report the top-k accuracy. 0 means no report.')
-    train.add_argument('--loss', type=str,
+    train.add_argument('--loss', type=str, default='',
                        help='show the cross-entropy or nll loss. ce strands for cross-entropy, nll-loss stands for likelihood loss')
     train.add_argument('--test-io', type=int, default=0,
                        help='1 means test reading speed without training')

From 3df418cd424d5d1d3ddc0bbb75949ee4cc180b87 Mon Sep 17 00:00:00 2001
From: shufan <shufan.wu@intel.com>
Date: Thu, 22 Feb 2018 14:50:58 +0800
Subject: [PATCH 3/3] Applying the Xavier with "uniform" type to initialize
 weight when network is VGG

---
 example/image-classification/common/fit.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/example/image-classification/common/fit.py b/example/image-classification/common/fit.py
index 0e0cd521f28d..9412b6f9371b 100755
--- a/example/image-classification/common/fit.py
+++ b/example/image-classification/common/fit.py
@@ -237,6 +237,9 @@ def fit(args, network, data_loader, **kwargs):
         if args.network == 'alexnet':
             # AlexNet will not converge using Xavier
             initializer = mx.init.Normal()
+            # VGG will not trend to converge using Xavier-Gaussian
+        elif 'vgg' in args.network:
+            initializer = mx.init.Xavier()
         else:
             initializer = mx.init.Xavier(
                 rnd_type='gaussian', factor_type="in", magnitude=2)