From 723364f7f2c641903c1bc6407b20c28e7d8e54c1 Mon Sep 17 00:00:00 2001 From: fayejf Date: Thu, 12 May 2022 15:09:35 -0700 Subject: [PATCH] rename folder VAD->vad Signed-off-by: fayejf --- docs/source/asr/speech_classification/results.rst | 4 ++-- .../conf/{VAD => vad}/vad_inference_postprocessing.yaml | 0 examples/asr/speech_classification/vad_infer.py | 4 ++-- tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb | 8 ++++---- tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) rename examples/asr/conf/{VAD => vad}/vad_inference_postprocessing.yaml (100%) diff --git a/docs/source/asr/speech_classification/results.rst b/docs/source/asr/speech_classification/results.rst index c00e1392c358..44fc1fb6a910 100644 --- a/docs/source/asr/speech_classification/results.rst +++ b/docs/source/asr/speech_classification/results.rst @@ -51,12 +51,12 @@ Learn how to fine tune on your own data or on subset classes in ``/examples/asr/speech_classification/vad_infer.py --config-path="../conf/VAD" --config-name="vad_inference_postprocessing.yaml" dataset= + python /examples/asr/speech_classification/vad_infer.py --config-path="../conf/vad" --config-name="vad_inference_postprocessing.yaml" dataset= This script will perform vad frame-level prediction and will help you perform postprocessing and generate speech segments as well if needed. -Have a look at configuration file ``/examples/asr/conf/VAD/vad_inference_postprocessing.yaml`` and scripts under ``/scripts/voice_activity_detection`` for details regarding posterior processing, postprocessing and threshold tuning. +Have a look at configuration file ``/examples/asr/conf/vad/vad_inference_postprocessing.yaml`` and scripts under ``/scripts/voice_activity_detection`` for details regarding posterior processing, postprocessing and threshold tuning. Posterior processing includes generating predictions with overlapping input segments. Then a smoothing filter is applied to decide the label for a frame spanned by multiple segments. diff --git a/examples/asr/conf/VAD/vad_inference_postprocessing.yaml b/examples/asr/conf/vad/vad_inference_postprocessing.yaml similarity index 100% rename from examples/asr/conf/VAD/vad_inference_postprocessing.yaml rename to examples/asr/conf/vad/vad_inference_postprocessing.yaml diff --git a/examples/asr/speech_classification/vad_infer.py b/examples/asr/speech_classification/vad_infer.py index b59352ef75b7..675a1e11cf6f 100644 --- a/examples/asr/speech_classification/vad_infer.py +++ b/examples/asr/speech_classification/vad_infer.py @@ -27,7 +27,7 @@ This script will also help you perform postprocessing and generate speech segments if needed Usage: -python vad_infer.py --config-path="../conf/VAD" --config-name="vad_inference_postprocessing.yaml" dataset= +python vad_infer.py --config-path="../conf/vad" --config-name="vad_inference_postprocessing.yaml" dataset= """ import json @@ -49,7 +49,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -@hydra_runner(config_path="../conf/VAD", config_name="vad_inference_postprocessing.yaml") +@hydra_runner(config_path="../conf/vad", config_name="vad_inference_postprocessing.yaml") def main(cfg): if not cfg.dataset: raise ValueError("You must input the path of json file of evaluation data") diff --git a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb index af6c8ffc477e..561a406a226b 100644 --- a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb +++ b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb @@ -123,9 +123,9 @@ "if not os.path.exists(\"scripts/transcribe_speech.py\"):\n", " !wget -P scripts/ https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/transcribe_speech.py\n", " \n", - "!mkdir -p conf/VAD\n", - "if not os.path.exists(\"conf/VAD/vad_inference_postprocessing.yaml\"):\n", - " !wget -P conf/VAD/ https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/conf/VAD/vad_inference_postprocessing.yaml" + "!mkdir -p conf/vad\n", + "if not os.path.exists(\"conf/vad/vad_inference_postprocessing.yaml\"):\n", + " !wget -P conf/vad/ https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/conf/vad/vad_inference_postprocessing.yaml" ] }, { @@ -166,7 +166,7 @@ "outputs": [], "source": [ "# if run locally, vad_infer.py is located in /examples/asr/speech_classification/vad_infer.py\n", - "%run -i scripts/vad_infer.py --config-path=\"../conf/VAD\" --config-name=\"vad_inference_postprocessing.yaml\" \\\n", + "%run -i scripts/vad_infer.py --config-path=\"../conf/vad\" --config-name=\"vad_inference_postprocessing.yaml\" \\\n", "dataset=$input_manifest \\\n", "vad.model_path=$vad_model \\\n", "frame_out_dir=\"chris_demo\" \\\n", diff --git a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb index e34e30329e4e..ee865c705b29 100644 --- a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb @@ -202,7 +202,7 @@ "You can find all necessary steps about inference in \n", "```python\n", " Script: /examples/asr/speech_classification/vad_infer.py \n", - " Config: /examples/asr/conf/VAD/vad_inference_postprocessing.yaml\n", + " Config: /examples/asr/conf/vad/vad_inference_postprocessing.yaml\n", "```\n", "Duration inference, we generate frame-level prediction by two approaches:\n", "\n",