Update documentation for deconvolution operation. (#6184)

indhub · piiswrong · commit 780009ccaafa · 2017-05-25T17:25:12.000-07:00
* Update documentation for deconvolution operation.

* Add examples.

* Remove the example.
diff --git a/src/operator/deconvolution-inl.h b/src/operator/deconvolution-inl.h
@@ -43,26 +43,30 @@ struct DeconvolutionParam : public dmlc::Parameter<DeconvolutionParam> {
   bool cudnn_off;
   dmlc::optional<int> layout;
   DMLC_DECLARE_PARAMETER(DeconvolutionParam) {
-    DMLC_DECLARE_FIELD(kernel).describe("deconvolution kernel size: (h, w) or (d, h, w)");
+    DMLC_DECLARE_FIELD(kernel).describe("Deconvolution kernel size: (h, w) or (d, h, w). "
+                  "This is same as the kernel size used for the corresponding convolution");
     DMLC_DECLARE_FIELD(stride).set_default(TShape())
-        .describe("deconvolution stride: (h, w) or (d, h, w)");
+        .describe("The stride used for the corresponding convolution: (h, w) or (d, h, w).");
     DMLC_DECLARE_FIELD(dilate).set_default(TShape())
-    .describe("deconvolution dilate: (h, w) or (d, h, w)");
+        .describe("Dilation factor for each dimension of the input: (h, w) or (d, h, w).");
     DMLC_DECLARE_FIELD(pad).set_default(TShape())
-        .describe("pad for deconvolution: (h, w) or (d, h, w). "
-                  "A good number is : (kernel-1)/2. "
-                  "If target_shape is set, "
-                  "pad will be ignored and computed accordingly");
+        .describe("The amount of implicit zero padding added during convolution for each "
+                  "dimension of the input: "
+                  "(h, w) or (d, h, w). "
+                  "``(kernel-1)/2`` is usually a good choice. "
+                  "If `target_shape` is set, "
+                  "`pad` will be ignored and a padding that will generate the target shape "
+                  "will be used.");
     DMLC_DECLARE_FIELD(adj).set_default(TShape())
-        .describe("adjustment for output shape: (h, w) or (d, h, w). "
-                  "If target_shape is set, "
-                  "ad will be ignored and computed accordingly");
+        .describe("Adjustment for output shape: (h, w) or (d, h, w). "
+                  "If `target_shape` is set, "
+                  "`adj` will be ignored and computed accordingly.");
     DMLC_DECLARE_FIELD(target_shape).set_default(TShape())
-        .describe("output shape with target shape : (h, w) or (d, h, w)");
+        .describe("Shape of the output tensor: (h, w) or (d, h, w).");
     DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000)
-        .describe("deconvolution filter(channel) number");
+        .describe("Number of output filters.");
     DMLC_DECLARE_FIELD(num_group).set_default(1)
-        .describe("number of groups partition");
+        .describe("Number of groups partition.");
     DMLC_DECLARE_FIELD(workspace).set_default(512).set_range(0, 8192)
       .describe("Maximum temporal workspace allowed for deconvolution (MB).");
     DMLC_DECLARE_FIELD(no_bias).set_default(true)
@@ -72,7 +76,7 @@ struct DeconvolutionParam : public dmlc::Parameter<DeconvolutionParam> {
       .add_enum("limited_workspace", deconv::kLimited)
       .add_enum("fastest", deconv::kFastest)
       .set_default(dmlc::optional<int>())
-      .describe("Whether to pick convolution algo by running performance test.");
+      .describe("Whether to pick convolution algorithm by running performance test.");
     DMLC_DECLARE_FIELD(cudnn_off).set_default(false)
     .describe("Turn off cudnn for this layer.");
     DMLC_DECLARE_FIELD(layout)
@@ -82,8 +86,8 @@ struct DeconvolutionParam : public dmlc::Parameter<DeconvolutionParam> {
       .add_enum("NHWC", mshadow::kNHWC)
       .add_enum("NDHWC", mshadow::kNDHWC)
       .set_default(dmlc::optional<int>())
-      .describe("Set layout for input, output and weight. Empty for\n    "
-                "default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.");
+      .describe("Set layout for input, output and weight. Empty for "
+                "default layout, NCW for 1d, NCHW for 2d and NCDHW for 3d.");
   }
 
   template<size_t ndim>
diff --git a/src/operator/deconvolution.cc b/src/operator/deconvolution.cc
@@ -33,11 +33,16 @@ Operator* DeconvolutionProp::CreateOperatorEx(Context ctx, std::vector<TShape> *
 DMLC_REGISTER_PARAMETER(DeconvolutionParam);
 
 MXNET_REGISTER_OP_PROPERTY(Deconvolution, DeconvolutionProp)
-.add_argument("data", "NDArray-or-Symbol", "Input data to the DeconvolutionOp.")
-.add_argument("weight", "NDArray-or-Symbol", "Weight matrix.")
-.add_argument("bias", "NDArray-or-Symbol", "Bias parameter.")
+.add_argument("data", "NDArray-or-Symbol", "Input tensor to the deconvolution operation.")
+.add_argument("weight", "NDArray-or-Symbol", "Weights representing the kernel.")
+.add_argument("bias", "NDArray-or-Symbol", "Bias added to the result after the deconvolution "
+    "operation.")
 .add_arguments(DeconvolutionParam::__FIELDS__())
-.describe("Applies deconvolution to input and adds a bias.");
+.describe("Computes 2D transposed convolution (aka fractionally strided convolution) of the "
+    "input tensor. This operation can be seen as the gradient of Convolution operation with "
+    "respect to its input. Convolution usually reduces the size of the input. Transposed "
+    "convolution works the other way, going from a smaller input to a larger output while "
+    "preserving the connectivity pattern.");
 
 }  // namespace op
 }  // namespace mxnet