From 0b4ba3f08e0d0f957db809f38f9904cd7dc7a900 Mon Sep 17 00:00:00 2001
From: Zhilin Wang <wangzhilin12061996@hotmail.com>
Date: Thu, 19 May 2022 15:53:34 -0700
Subject: [PATCH 1/2] fix bugs for dialogue tutorial

Signed-off-by: Zhilin Wang <wangzhilin12061996@hotmail.com>
---
 tutorials/nlp/Dialogue.ipynb | 52 ++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/tutorials/nlp/Dialogue.ipynb b/tutorials/nlp/Dialogue.ipynb
index aca5e11ce0a1..aaaf8eb09ce2 100644
--- a/tutorials/nlp/Dialogue.ipynb
+++ b/tutorials/nlp/Dialogue.ipynb
@@ -28,7 +28,7 @@
       "source": [
         "import os \n",
         "!apt-get update && apt-get install -y libsndfile1 ffmpeg\n",
-        "!git clone https://github.com/NVIDIA/NeMo --branch main\n",
+        "!git clone https://github.com/NVIDIA/NeMo --branch r1.9.0\n",
         "os.chdir('NeMo')\n",
         "!./reinstall.sh\n",
         "os.chdir('..')\n"
@@ -87,7 +87,7 @@
         "\n",
         "An example is:\n",
         "\n",
-        "* utterance: what alarms have i set for tomorrow intent: \n",
+        "* utterance: what alarms have i set for tomorrow \n",
         "* intent: alarm_query\n",
         "* slots: date(tomorrow)\n",
         "\n",
@@ -287,15 +287,20 @@
     },
     {
       "cell_type": "markdown",
-      "source": [
-        "## 1.4 (Optional) To train/ test a GPT2 model on the assistant dataset, run the cell below "
-      ],
       "metadata": {
         "id": "-44x5PqyrOeQ"
-      }
+      },
+      "source": [
+        "## 1.4 (Optional) To train/ test a GPT2 model on the assistant dataset, run the cell below "
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QyqQbpR4rNHT"
+      },
+      "outputs": [],
       "source": [
         "# model.dataset.data_dir: folder to load data from\n",
         "# model.dataset.dialogues_example_dir: folder that stores predictions for each sample\n",
@@ -312,15 +317,13 @@
         "  model.dataset.target_template=with_slots \\\n",
         "  model.dataset.eval_mode=generation \\\n",
         "  exp_manager.create_wandb_logger=False)"
-      ],
-      "metadata": {
-        "id": "QyqQbpR4rNHT"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "FbQ-6TVM1yQg"
+      },
       "source": [
         "**After 1 epoch:**\n",
         "\n",
@@ -417,10 +420,7 @@
         "     test_loss_epoch       0.019178826361894608\n",
         "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n",
         "```"
-      ],
-      "metadata": {
-        "id": "FbQ-6TVM1yQg"
-      }
+      ]
     },
     {
       "cell_type": "markdown",
@@ -437,7 +437,7 @@
         "\n",
         "An example is:\n",
         "\n",
-        "* utterance: I will be eating there at 11:30 am so make it for then.\n",
+        "* utterance: I will be eating there at 11:30 am so make the reservation for then.\n",
         "* intent: ReserveRestaurant\n",
         "* slots: {\"time\": \"11:30 am\"}\n",
         "\n",
@@ -498,14 +498,14 @@
     },
     {
       "cell_type": "code",
-      "source": [
-        "!ls sgd_gpt2_predictions"
-      ],
+      "execution_count": null,
       "metadata": {
         "id": "kGDlV5HvI2PQ"
       },
-      "execution_count": null,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "!ls sgd_gpt2_predictions"
+      ]
     },
     {
       "cell_type": "markdown",
@@ -515,7 +515,7 @@
       "source": [
         "**After 1 epoch:**\n",
         "\n",
-        "More epoches would needed to reach convergence.\n",
+        "More epochs would needed to reach convergence.\n",
         "\n",
         "\n",
         "```\n",
@@ -590,7 +590,7 @@
         "An example is \n",
         "\n",
         "\n",
-        "*   question: what county is nine mile in\n",
+        "*   question: What county is Nine Mile in?\n",
         "*   extracted_answer: Onondaga\n",
         "*   fluent_answer: Nine Mile is in Onondaga county.\n"
       ]
@@ -667,7 +667,7 @@
       "source": [
         "**After 1 epoch:**\n",
         "\n",
-        "Train more epoches for optimal performance\n",
+        "Train more epochs for optimal performance\n",
         "\n",
         "```\n",
         "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n",
@@ -713,4 +713,4 @@
   },
   "nbformat": 4,
   "nbformat_minor": 0
-}
\ No newline at end of file
+}

From 7495f126479dc8993f031809a5976b8866718ec9 Mon Sep 17 00:00:00 2001
From: Zhilin Wang <wangzhilin12061996@hotmail.com>
Date: Thu, 19 May 2022 21:01:10 -0700
Subject: [PATCH 2/2] update path for convert_datasets.py due to conflict PR

Signed-off-by: Zhilin Wang <wangzhilin12061996@hotmail.com>
---
 tutorials/nlp/Dialogue.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tutorials/nlp/Dialogue.ipynb b/tutorials/nlp/Dialogue.ipynb
index aaaf8eb09ce2..853fb0345b4f 100644
--- a/tutorials/nlp/Dialogue.ipynb
+++ b/tutorials/nlp/Dialogue.ipynb
@@ -107,7 +107,7 @@
         "!wget https://github.com/xliuhw/NLU-Evaluation-Data/archive/master.zip\n",
         "!unzip master.zip\n",
         "# convert the dataset to the NeMo format\n",
-        "!python NeMo/examples/nlp/intent_slot_classification/data/import_datasets.py --dataset_name=assistant --source_data_dir=./NLU-Evaluation-Data-master --target_data_dir=./assistant\n"
+        "!python NeMo/scripts/dataset_processing/nlp/intent_and_slot/convert_datasets.py --dataset_name=assistant --source_data_dir=./NLU-Evaluation-Data-master --target_data_dir=./assistant"
       ]
     },
     {