NVIDIA-NeMo · fayejf · May 16, 2022 · May 12, 2022 · May 12, 2022 · May 13, 2022
diff --git a/docs/source/asr/speech_classification/results.rst b/docs/source/asr/speech_classification/results.rst
@@ -51,12 +51,12 @@ Learn how to fine tune on your own data or on subset classes in ``<NeMo_git_root
 
 .. code-block:: bash 
 
-  python <NeMo-git-root>/examples/asr/speech_classification/vad_infer.py --config-path="../conf/VAD" --config-name="vad_inference_postprocessing.yaml" dataset=<Path of json file of evaluation data. Audio files should have unique names>
+  python <NeMo-git-root>/examples/asr/speech_classification/vad_infer.py --config-path="../conf/vad" --config-name="vad_inference_postprocessing.yaml" dataset=<Path of json file of evaluation data. Audio files should have unique names>
 
 
 This script will perform vad frame-level prediction and will help you perform postprocessing and generate speech segments as well if needed.
 
-Have a look at configuration file ``<NeMo-git-root>/examples/asr/conf/VAD/vad_inference_postprocessing.yaml`` and scripts under ``<NeMo-git-root>/scripts/voice_activity_detection`` for details regarding posterior processing, postprocessing and threshold tuning.
+Have a look at configuration file ``<NeMo-git-root>/examples/asr/conf/vad/vad_inference_postprocessing.yaml`` and scripts under ``<NeMo-git-root>/scripts/voice_activity_detection`` for details regarding posterior processing, postprocessing and threshold tuning.
 
 Posterior processing includes generating predictions with overlapping input segments. Then a smoothing filter is applied to decide the label for a frame spanned by multiple segments.
 

diff --git a/...onf/VAD/vad_inference_postprocessing.yaml → ...onf/vad/vad_inference_postprocessing.yaml b/...onf/VAD/vad_inference_postprocessing.yaml → ...onf/vad/vad_inference_postprocessing.yaml
diff --git a/examples/asr/speech_classification/vad_infer.py b/examples/asr/speech_classification/vad_infer.py
@@ -27,7 +27,7 @@
 This script will also help you perform postprocessing and generate speech segments if needed
 
 Usage:
-python vad_infer.py --config-path="../conf/VAD" --config-name="vad_inference_postprocessing.yaml" dataset=<Path of json file of evaluation data. Audio files should have unique names>
+python vad_infer.py --config-path="../conf/vad" --config-name="vad_inference_postprocessing.yaml" dataset=<Path of json file of evaluation data. Audio files should have unique names>
 
 """
 import json
@@ -49,7 +49,7 @@
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 
-@hydra_runner(config_path="../conf/VAD", config_name="vad_inference_postprocessing.yaml")
+@hydra_runner(config_path="../conf/vad", config_name="vad_inference_postprocessing.yaml")
 def main(cfg):
     if not cfg.dataset:
         raise ValueError("You must input the path of json file of evaluation data")

diff --git a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb
@@ -123,9 +123,9 @@
                 "if not os.path.exists(\"scripts/transcribe_speech.py\"):\n",
                 "  !wget -P scripts/ https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/transcribe_speech.py\n",
                 "    \n",
-                "!mkdir -p conf/VAD\n",
-                "if not os.path.exists(\"conf/VAD/vad_inference_postprocessing.yaml\"):\n",
-                "    !wget -P conf/VAD/ https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/conf/VAD/vad_inference_postprocessing.yaml"
+                "!mkdir -p conf/vad\n",
+                "if not os.path.exists(\"conf/vad/vad_inference_postprocessing.yaml\"):\n",
+                "    !wget -P conf/vad/ https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/conf/vad/vad_inference_postprocessing.yaml"
             ]
         },
         {
@@ -166,7 +166,7 @@
             "outputs": [],
             "source": [
                 "# if run locally, vad_infer.py is located in <NeMo_git_root>/examples/asr/speech_classification/vad_infer.py\n",
-                "%run -i scripts/vad_infer.py --config-path=\"../conf/VAD\" --config-name=\"vad_inference_postprocessing.yaml\" \\\n",
+                "%run -i scripts/vad_infer.py --config-path=\"../conf/vad\" --config-name=\"vad_inference_postprocessing.yaml\" \\\n",
                 "dataset=$input_manifest \\\n",
                 "vad.model_path=$vad_model \\\n",
                 "frame_out_dir=\"chris_demo\" \\\n",

diff --git a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb
@@ -202,7 +202,7 @@
                 "You can find all necessary steps about inference in \n",
                 "```python\n",
                 "    Script: <NeMo_git_root>/examples/asr/speech_classification/vad_infer.py  \n",
-                "    Config: <NeMo_git_root>/examples/asr/conf/VAD/vad_inference_postprocessing.yaml\n",
+                "    Config: <NeMo_git_root>/examples/asr/conf/vad/vad_inference_postprocessing.yaml\n",
                 "```\n",
                 "Duration inference, we generate frame-level prediction by two approaches:\n",
                 "\n",