From 5fc04590fc43479a13d6b8715c346b0bdef5bbb7 Mon Sep 17 00:00:00 2001
From: ericharper <complex451@gmail.com>
Date: Mon, 24 Oct 2022 23:26:43 -0600
Subject: [PATCH 01/20] update branch

Signed-off-by: ericharper <complex451@gmail.com>
---
 Jenkinsfile                                   | 288 +++++++++---------
 nemo/package_info.py                          |   2 +-
 tutorials/00_NeMo_Primer.ipynb                |   2 +-
 tutorials/01_NeMo_Models.ipynb                |   2 +-
 tutorials/02_NeMo_Adapters.ipynb              |   2 +-
 tutorials/AudioTranslationSample.ipynb        |   2 +-
 ...blish_NeMo_Model_On_Hugging_Face_Hub.ipynb |   2 +-
 tutorials/VoiceSwapSample.ipynb               |   2 +-
 .../asr/ASR_CTC_Language_Finetuning.ipynb     |   2 +-
 tutorials/asr/ASR_for_telephony_speech.ipynb  |   2 +-
 tutorials/asr/ASR_with_NeMo.ipynb             |   4 +-
 .../asr/ASR_with_Subword_Tokenization.ipynb   |   2 +-
 tutorials/asr/ASR_with_Transducers.ipynb      |   2 +-
 .../asr/Buffered_Transducer_Inference.ipynb   |   2 +-
 ..._Transducer_Inference_with_LCS_Merge.ipynb |   2 +-
 tutorials/asr/Intro_to_Transducers.ipynb      |   2 +-
 tutorials/asr/Multilang_ASR.ipynb             |   2 +-
 tutorials/asr/Offline_ASR.ipynb               |   2 +-
 .../Offline_ASR_with_VAD_for_CTC_models.ipynb |   2 +-
 .../asr/Online_ASR_Microphone_Demo.ipynb      |   2 +-
 tutorials/asr/Online_Noise_Augmentation.ipynb |   2 +-
 .../Online_Offline_Microphone_VAD_Demo.ipynb  |   2 +-
 .../Online_Offline_Speech_Commands_Demo.ipynb |   2 +-
 .../asr/Self_Supervised_Pre_Training.ipynb    |   2 +-
 tutorials/asr/Speech_Commands.ipynb           |   2 +-
 tutorials/asr/Streaming_ASR.ipynb             |   2 +-
 tutorials/asr/Voice_Activity_Detection.ipynb  |   2 +-
 .../asr/asr_adapters/ASR_with_Adapters.ipynb  |   2 +-
 ...Language_Models_for_Downstream_Tasks.ipynb |   2 +-
 tutorials/nlp/02_NLP_Tokenizers.ipynb         |   4 +-
 ...a_Preprocessing_and_Cleaning_for_NMT.ipynb |   2 +-
 tutorials/nlp/Dialogue.ipynb                  |   2 +-
 tutorials/nlp/Entity_Linking_Medical.ipynb    |   2 +-
 tutorials/nlp/GLUE_Benchmark.ipynb            |   2 +-
 ...Joint_Intent_and_Slot_Classification.ipynb |   2 +-
 tutorials/nlp/MegatronBert_export.ipynb       |   2 +-
 ...on_Synthetic_Tabular_Data_Generation.ipynb |   2 +-
 .../nlp/Multitask_Prompt_and_PTuning.ipynb    |   2 +-
 .../Non_English_Downstream_Tasks_(NER).ipynb  |   2 +-
 .../nlp/Punctuation_and_Capitalization.ipynb  |   2 +-
 ...ion_and_Capitalization_Lexical_Audio.ipynb |   2 +-
 tutorials/nlp/Question_Answering.ipynb        |   2 +-
 .../nlp/Relation_Extraction-BioMegatron.ipynb |   2 +-
 tutorials/nlp/Text2Sparql.ipynb               |   4 +-
 ...xt_Classification_Sentiment_Analysis.ipynb |   2 +-
 .../Token_Classification-BioMegatron.ipynb    |   2 +-
 ...ssification_Named_Entity_Recognition.ipynb |   4 +-
 .../nlp/Zero_Shot_Intent_Recognition.ipynb    |   2 +-
 .../ASR_with_SpeakerDiarization.ipynb         |   2 +-
 .../Speaker_Diarization_Inference.ipynb       |   2 +-
 .../Speaker_Diarization_Training.ipynb        |   2 +-
 .../Speaker_Identification_Verification.ipynb |   2 +-
 .../ITN_with_Thutmose_Tagger.ipynb            |   2 +-
 .../Text_(Inverse)_Normalization.ipynb        |   2 +-
 tutorials/text_processing/WFST_Tutorial.ipynb |   2 +-
 .../tools/CTC_Segmentation_Tutorial.ipynb     |   2 +-
 tutorials/tools/Multispeaker_Simulator.ipynb  |   2 +-
 .../tts/Aligner_Inference_Examples.ipynb      |   2 +-
 tutorials/tts/FastPitch_Finetuning.ipynb      |   2 +-
 .../tts/FastPitch_GermanTTS_Training.ipynb    |   2 +-
 .../tts/FastPitch_MixerTTS_Training.ipynb     |   2 +-
 .../tts/FastPitch_Speaker_Interpolation.ipynb |   2 +-
 .../tts/Inference_DurationPitchControl.ipynb  |   2 +-
 tutorials/tts/Inference_ModelSelect.ipynb     |   2 +-
 tutorials/tts/NeMo_TTS_Primer.ipynb           |   2 +-
 tutorials/tts/Tacotron2_Training.ipynb        |   2 +-
 66 files changed, 213 insertions(+), 213 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 3da4efea155b..212eec02697b 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -93,8 +93,8 @@ pipeline {
     stage('L0: Unit Tests CPU') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       steps {
@@ -106,8 +106,8 @@ pipeline {
     stage('L0: TN/ITN Tests CPU') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -139,8 +139,8 @@ pipeline {
     stage('L2: NeMo text processing') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -191,8 +191,8 @@ pipeline {
     stage('L2: ASR dev run') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -291,8 +291,8 @@ pipeline {
     stage('L2: ASR dev run - part two') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -322,8 +322,8 @@ pipeline {
     stage('L2: Speaker dev run') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -444,8 +444,8 @@ pipeline {
     // stage('L2: ASR DALI dev run') {
     //   when {
     //     anyOf {
-    //       branch 'main'
-    //       changeRequest target: 'main'
+    //       branch 'r1.13.0'
+    //       changeRequest target: 'r1.13.0'
     //     }
     //   }
     //   failFast true
@@ -512,8 +512,8 @@ pipeline {
     // stage('L2: ASR RNNT dev run') {
     //   when {
     //     anyOf {
-    //       branch 'main'
-    //       changeRequest target: 'main'
+    //       branch 'r1.13.0'
+    //       changeRequest target: 'r1.13.0'
     //     }
     //   }
     //   failFast true
@@ -604,8 +604,8 @@ pipeline {
     stage('L2: ASR Multi-dataloader dev run') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -652,8 +652,8 @@ pipeline {
     stage('L2: ASR Adapters') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -680,8 +680,8 @@ pipeline {
     stage('L2: Megatron T5 Adapter PP=2') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -724,8 +724,8 @@ pipeline {
     stage('L2: Megatron T5 Adapter TP=2') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -766,8 +766,8 @@ pipeline {
     stage('L2: Megatron T5 IA3 PP=2') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -810,8 +810,8 @@ pipeline {
     stage('L2: Megatron T5 IA3 TP=2') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -852,8 +852,8 @@ pipeline {
     stage('L2: Megatron GPT Adapter TP=2') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -893,8 +893,8 @@ pipeline {
     stage('L2: Megatron GPT Adapter PP=2') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -935,8 +935,8 @@ pipeline {
     stage('L2: Speech Transcription') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -957,8 +957,8 @@ pipeline {
     stage('L2: Segmentation Tool') {
       when {
             anyOf {
-              branch 'main'
-              changeRequest target: 'main'
+              branch 'r1.13.0'
+              changeRequest target: 'r1.13.0'
             }
       }
       stages {
@@ -1013,8 +1013,8 @@ pipeline {
     stage('L2: G2P Models') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -1094,8 +1094,8 @@ pipeline {
     // stage('L2: Multi-GPU Megatron finetuning') {
     //   when {
     //     anyOf {
-    //       branch 'main'
-    //       changeRequest target: 'main'
+    //       branch 'r1.13.0'
+    //       changeRequest target: 'r1.13.0'
     //     }
     //   }
     //   failFast true
@@ -1121,8 +1121,8 @@ pipeline {
     stage('L2: STS-b') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -1181,8 +1181,8 @@ pipeline {
     stage('L2: Dialogue Classification') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -1352,8 +1352,8 @@ pipeline {
     stage('L2: Dialogue Generation') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -1418,8 +1418,8 @@ pipeline {
 //     stage('L2: Dialogue Generation Part 2') {
 //       when {
 //         anyOf {
-//           branch 'main'
-//           changeRequest target: 'main'
+//           branch 'r1.13.0'
+//           changeRequest target: 'r1.13.0'
 //         }
 //       }
 //       failFast true
@@ -1448,8 +1448,8 @@ pipeline {
     stage('L2: COPY') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -1478,8 +1478,8 @@ pipeline {
     stage('L2: Duplex Text Normalization') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -1516,8 +1516,8 @@ pipeline {
     // stage('L2: MegaBERT Token Classification') {
     //   when {
     //     anyOf {
-    //       branch 'main'
-    //       changeRequest target: 'main'
+    //       branch 'r1.13.0'
+    //       changeRequest target: 'r1.13.0'
     //     }
     //   }
     //   failFast true
@@ -1542,8 +1542,8 @@ pipeline {
     stage('L2: BERT Text Classification') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -1571,8 +1571,8 @@ pipeline {
     stage('L2: Parallel BERT Question-Answering SQUAD v1.1 & v2.0') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -1630,8 +1630,8 @@ pipeline {
     stage('L2: Parallel BART Question-Answering SQUAD v1.1 & v2.0') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -1691,8 +1691,8 @@ pipeline {
     stage('L2: Parallel GPT2 Question-Answering SQUAD v1.1 & v2.0') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -1752,8 +1752,8 @@ pipeline {
     stage('L2: Intent and Slot Classification Tasks') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -1792,8 +1792,8 @@ pipeline {
     // stage('L2: Model Parallel Size 2 Megatron Text Classification') {
     //   when {
     //     anyOf{
-    //       branch 'main'
-    //       changeRequest target: 'main'
+    //       branch 'r1.13.0'
+    //       changeRequest target: 'r1.13.0'
     //     }
     //   }
     //   failFast true
@@ -1821,8 +1821,8 @@ pipeline {
     // stage('L2: Model Parallel Size 2 Megatron Autoresume') {
     //   when {
     //     anyOf{
-    //       branch 'main'
-    //       changeRequest target: 'main'
+    //       branch 'r1.13.0'
+    //       changeRequest target: 'r1.13.0'
     //     }
     //   }
     //   failFast true
@@ -1852,8 +1852,8 @@ pipeline {
     // stage('L2: Model Parallel Size 2 Megatron Evaluation from .nemo') {
     //   when {
     //     anyOf{
-    //       branch 'main'
-    //       changeRequest target: 'main'
+    //       branch 'r1.13.0'
+    //       changeRequest target: 'r1.13.0'
     //     }
     //   }
     //   failFast true
@@ -1873,8 +1873,8 @@ pipeline {
     // stage('L2: Model Parallel Size 2 Megatron Train from .nemo') {
     //   when {
     //     anyOf{
-    //       branch 'main'
-    //       changeRequest target: 'main'
+    //       branch 'r1.13.0'
+    //       changeRequest target: 'r1.13.0'
     //     }
     //   }
     //   failFast true
@@ -1896,8 +1896,8 @@ pipeline {
     stage('L2: Parallel NLP Examples 2') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -2021,8 +2021,8 @@ pipeline {
     stage('Punctuation & Capitalization tarred dataset') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -2080,8 +2080,8 @@ pipeline {
     stage('Punctuation & Capitalization, Different ways of passing labels to model') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -2188,8 +2188,8 @@ pipeline {
     stage('Punctuation & Capitalization inference') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -2214,8 +2214,8 @@ pipeline {
     stage('L2: Parallel Pretraining BERT pretraining from Text/Preprocessed') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -2276,8 +2276,8 @@ pipeline {
     stage('L2: Entity Linking') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -2304,8 +2304,8 @@ pipeline {
     stage('L2: NMT Attention is All You Need Training') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -2427,8 +2427,8 @@ pipeline {
     stage('L2: NMT Attention is All You Need Inference') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -2463,8 +2463,8 @@ pipeline {
     stage('L2: NMT Attention is All You Need Finetuning') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -2497,8 +2497,8 @@ pipeline {
     stage('L2: NMT with HuggingFace') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -2574,8 +2574,8 @@ pipeline {
     stage('L2: NMT Tarred Dataset Creation') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -2628,8 +2628,8 @@ pipeline {
     stage('L2: Megatron NMT Training TP=2') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -2723,8 +2723,8 @@ pipeline {
     // stage('L2: NMT Bottleneck Fallback') {
     //   when {
     //     anyOf {
-    //       branch 'main'
-    //       changeRequest target: 'main'
+    //       branch 'r1.13.0'
+    //       changeRequest target: 'r1.13.0'
     //     }
     //   }
     //   failFast true
@@ -2770,8 +2770,8 @@ pipeline {
     // stage('L2: NMT Bottleneck Architecture') {
     //   when {
     //     anyOf {
-    //       branch 'main'
-    //       changeRequest target: 'main'
+    //       branch 'r1.13.0'
+    //       changeRequest target: 'r1.13.0'
     //     }
     //   }
     //   failFast true
@@ -2853,8 +2853,8 @@ pipeline {
     // stage('L2: NMT Bottleneck LVM') {
     //   when {
     //     anyOf {
-    //       branch 'main'
-    //       changeRequest target: 'main'
+    //       branch 'r1.13.0'
+    //       changeRequest target: 'r1.13.0'
     //     }
     //   }
     //   failFast true
@@ -3006,8 +3006,8 @@ pipeline {
     stage('L2: Megatron Bert Pretraining and Resume Training') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -3077,8 +3077,8 @@ pipeline {
     stage('L2: Megatron RETRO Pretraining and Resume Training') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -3149,8 +3149,8 @@ pipeline {
     stage('L2: Megatron RETRO muTransfer Pretraining Performance') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -3232,8 +3232,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: BioMegatron Bert NER Task') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -3250,8 +3250,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron GPT Pretraining and Resume Training TP=2') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -3322,8 +3322,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron GPT Pretraining and Resume Training PP=2') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -3394,8 +3394,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron GPT Eval') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -3411,8 +3411,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron GPT Eval PP2') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -3460,8 +3460,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron GPT Prompt Learning') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -3533,8 +3533,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     // stage('L2: Megatron GPT Convert from Megatron-LM checkpoing and Eval') {
     //   when {
     //     anyOf {
-    //       branch 'main'
-    //       changeRequest target: 'main'
+    //       branch 'r1.13.0'
+    //       changeRequest target: 'r1.13.0'
     //     }
     //   }
     //   failFast true
@@ -3560,8 +3560,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron Change Partitions') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -3599,8 +3599,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron T5 Pretraining and Resume Training TP=2') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -3695,8 +3695,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron T5 Pretraining and Resume Training PP=2') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -3809,8 +3809,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron T5 Prompt Learning') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -3908,8 +3908,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron UL2 Pretraining and Resume Training TP=2') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -3988,8 +3988,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron T5 Eval') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -4005,8 +4005,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron BART Pretraining and Resume Training, TP=2') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -4074,8 +4074,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron BART Pretraining and Resume Training, PP=2') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -4147,8 +4147,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron T5 GLUE/XNLI Finetuning') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
@@ -4220,8 +4220,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: TTS Fast dev runs 1') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       parallel {
@@ -4366,8 +4366,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L??: Speech Checkpoints tests') {
       when {
         anyOf {
-          branch 'main'
-          changeRequest target: 'main'
+          branch 'r1.13.0'
+          changeRequest target: 'r1.13.0'
         }
       }
       failFast true
diff --git a/nemo/package_info.py b/nemo/package_info.py
index 071179db59c3..3570f1ff1f6f 100644
--- a/nemo/package_info.py
+++ b/nemo/package_info.py
@@ -16,7 +16,7 @@
 MAJOR = 1
 MINOR = 14
 PATCH = 0
-PRE_RELEASE = 'rc0'
+PRE_RELEASE = ''
 
 # Use the following formatting: (major, minor, patch, pre-release)
 VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE)
diff --git a/tutorials/00_NeMo_Primer.ipynb b/tutorials/00_NeMo_Primer.ipynb
index 5e5dcbb92c1e..aac1ee3b72c6 100644
--- a/tutorials/00_NeMo_Primer.ipynb
+++ b/tutorials/00_NeMo_Primer.ipynb
@@ -42,7 +42,7 @@
         "!pip install text-unidecode\n",
         "\n",
         "# ## Install NeMo\n",
-        "BRANCH = 'main'\n",
+        "BRANCH = 'r1.13.0'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "## Install TorchAudio\n",
diff --git a/tutorials/01_NeMo_Models.ipynb b/tutorials/01_NeMo_Models.ipynb
index 6f230e62c1a3..c537f2c86855 100644
--- a/tutorials/01_NeMo_Models.ipynb
+++ b/tutorials/01_NeMo_Models.ipynb
@@ -37,7 +37,7 @@
         "!pip install text-unidecode\n",
         "\n",
         "# ## Install NeMo\n",
-        "BRANCH = 'main'\n",
+        "BRANCH = 'r1.13.0'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "## Install TorchAudio\n",
diff --git a/tutorials/02_NeMo_Adapters.ipynb b/tutorials/02_NeMo_Adapters.ipynb
index 75942c6bf4af..c7c6bd32137e 100644
--- a/tutorials/02_NeMo_Adapters.ipynb
+++ b/tutorials/02_NeMo_Adapters.ipynb
@@ -25,7 +25,7 @@
         "!pip install text-unidecode\n",
         "\n",
         "# ## Install NeMo\n",
-        "BRANCH = 'main'\n",
+        "BRANCH = 'r1.13.0'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "## Grab the config we'll use in this example\n",
diff --git a/tutorials/AudioTranslationSample.ipynb b/tutorials/AudioTranslationSample.ipynb
index c4fec16c4181..f0ab7df20199 100644
--- a/tutorials/AudioTranslationSample.ipynb
+++ b/tutorials/AudioTranslationSample.ipynb
@@ -38,7 +38,7 @@
             },
             "outputs": [],
             "source": [
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
                 "\n",
                 "# install Pynini for text normalization\n",
diff --git a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb
index 1b951e7b9e8c..a13174033e0c 100644
--- a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb
+++ b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb
@@ -41,7 +41,7 @@
         "!pip install text-unidecode\n",
         "\n",
         "### Install NeMo\n",
-        "BRANCH = 'main'\n",
+        "BRANCH = 'r1.13.0'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]"
       ]
     },
diff --git a/tutorials/VoiceSwapSample.ipynb b/tutorials/VoiceSwapSample.ipynb
index 016737f26a9f..7c895e4e6681 100644
--- a/tutorials/VoiceSwapSample.ipynb
+++ b/tutorials/VoiceSwapSample.ipynb
@@ -39,7 +39,7 @@
             },
             "outputs": [],
             "source": [
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
                 "\n",
                 "# install Pynini for text normalization\n",
diff --git a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb
index aad696e667b9..27b229af8a4c 100644
--- a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb
+++ b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb
@@ -39,7 +39,7 @@
         "!pip install matplotlib>=3.3.2\n",
         "\n",
         "## Install NeMo\n",
-        "BRANCH = 'main'\n",
+        "BRANCH = 'r1.13.0'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "\"\"\"\n",
diff --git a/tutorials/asr/ASR_for_telephony_speech.ipynb b/tutorials/asr/ASR_for_telephony_speech.ipynb
index 5be3b50502b3..787b448620f7 100644
--- a/tutorials/asr/ASR_for_telephony_speech.ipynb
+++ b/tutorials/asr/ASR_for_telephony_speech.ipynb
@@ -27,7 +27,7 @@
                 "!pip install matplotlib>=3.3.2\n",
                 "\n",
                 "## Install NeMo\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
                 "\n",
                 "## Grab the config we'll use in this example\n",
diff --git a/tutorials/asr/ASR_with_NeMo.ipynb b/tutorials/asr/ASR_with_NeMo.ipynb
index 519456a012af..9b86fab7e900 100644
--- a/tutorials/asr/ASR_with_NeMo.ipynb
+++ b/tutorials/asr/ASR_with_NeMo.ipynb
@@ -53,7 +53,7 @@
                 "!pip install matplotlib>=3.3.2\n",
                 "\n",
                 "## Install NeMo\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
                 "\n",
                 "\"\"\"\n",
@@ -587,7 +587,7 @@
                 "\n",
                 "if not os.path.exists(config_path):\n",
                 "    # Grab the config we'll use in this example\n",
-                "    BRANCH = 'main'\n",
+                "    BRANCH = 'r1.13.0'\n",
                 "    !mkdir configs\n",
                 "    !wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/config.yaml\n",
                 "\n",
diff --git a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb
index 50e4f4536908..224984b64cca 100644
--- a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb
+++ b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb
@@ -40,7 +40,7 @@
         "!pip install matplotlib>=3.3.2\n",
         "\n",
         "## Install NeMo\n",
-        "BRANCH = 'main'\n",
+        "BRANCH = 'r1.13.0'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "## Grab the config we'll use in this example\n",
diff --git a/tutorials/asr/ASR_with_Transducers.ipynb b/tutorials/asr/ASR_with_Transducers.ipynb
index f0efdf1cb363..b59153517558 100644
--- a/tutorials/asr/ASR_with_Transducers.ipynb
+++ b/tutorials/asr/ASR_with_Transducers.ipynb
@@ -28,7 +28,7 @@
         "!pip install matplotlib>=3.3.2\n",
         "\n",
         "## Install NeMo\n",
-        "BRANCH = 'main'\n",
+        "BRANCH = 'r1.13.0'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "## Grab the config we'll use in this example\n",
diff --git a/tutorials/asr/Buffered_Transducer_Inference.ipynb b/tutorials/asr/Buffered_Transducer_Inference.ipynb
index 2d42749524d9..939355de6368 100644
--- a/tutorials/asr/Buffered_Transducer_Inference.ipynb
+++ b/tutorials/asr/Buffered_Transducer_Inference.ipynb
@@ -27,7 +27,7 @@
     "!pip install matplotlib>=3.3.2\n",
     "\n",
     "## Install NeMo\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
     "\n",
     "# Update numba and restart (this is required to update internal numba version of Colab)\n",
diff --git a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb
index 9a6b7b2380cf..eb4676b6e01f 100644
--- a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb
+++ b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb
@@ -45,7 +45,7 @@
         "!pip install matplotlib>=3.3.2\n",
         "\n",
         "## Install NeMo\n",
-        "BRANCH = 'main'\n",
+        "BRANCH = 'r1.13.0'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "# Update numba and restart (this is required to update internal numba version of Colab)\n",
diff --git a/tutorials/asr/Intro_to_Transducers.ipynb b/tutorials/asr/Intro_to_Transducers.ipynb
index a82a4804ca56..d03508f52168 100644
--- a/tutorials/asr/Intro_to_Transducers.ipynb
+++ b/tutorials/asr/Intro_to_Transducers.ipynb
@@ -43,7 +43,7 @@
         "!pip install matplotlib>=3.3.2\n",
         "\n",
         "## Install NeMo\n",
-        "BRANCH = 'main'\n",
+        "BRANCH = 'r1.13.0'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]"
       ],
       "execution_count": null,
diff --git a/tutorials/asr/Multilang_ASR.ipynb b/tutorials/asr/Multilang_ASR.ipynb
index 8320cc8a07c9..06dd5f3d1ef2 100644
--- a/tutorials/asr/Multilang_ASR.ipynb
+++ b/tutorials/asr/Multilang_ASR.ipynb
@@ -101,7 +101,7 @@
     "\n",
     "## Install NeMo\n",
     "## We are using the main branch but you might want to adjust that too\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
     "\n",
     "\"\"\"\n",
diff --git a/tutorials/asr/Offline_ASR.ipynb b/tutorials/asr/Offline_ASR.ipynb
index 2dd4cbe9d814..6fc3862fb3a1 100644
--- a/tutorials/asr/Offline_ASR.ipynb
+++ b/tutorials/asr/Offline_ASR.ipynb
@@ -51,7 +51,7 @@
         "id": "I9eIxAyKHREB"
       },
       "source": [
-        "BRANCH = 'main'\n",
+        "BRANCH = 'r1.13.0'\n",
         "try:\n",
         "    # Import NeMo Speech Recognition collection\n",
         "    import nemo.collections.asr as nemo_asr\n",
diff --git a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb
index 29913fe0fe73..d05503c0f1f3 100644
--- a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb
+++ b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb
@@ -22,7 +22,7 @@
                 "!pip install wget\n",
                 "\n",
                 "## Install NeMo\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
                 "\n",
                 "\"\"\"\n",
diff --git a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb
index 5d2f1451d1bf..751a33cdd705 100644
--- a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb
+++ b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb
@@ -26,7 +26,7 @@
                 "!pip install pyaudio\n",
                 "\n",
                 "# ## Install NeMo\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
                 "\n",
                 "## Install TorchAudio\n",
diff --git a/tutorials/asr/Online_Noise_Augmentation.ipynb b/tutorials/asr/Online_Noise_Augmentation.ipynb
index 5756c7d58ebe..9781d965b0c1 100644
--- a/tutorials/asr/Online_Noise_Augmentation.ipynb
+++ b/tutorials/asr/Online_Noise_Augmentation.ipynb
@@ -31,7 +31,7 @@
                 "!pip install text-unidecode\n",
                 "\n",
                 "# ## Install NeMo\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
                 "\n",
                 "## Install TorchAudio\n",
diff --git a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb
index 2076bc06982b..43b7c74e1db8 100644
--- a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb
+++ b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb
@@ -26,7 +26,7 @@
                 "!pip install pyaudio\n",
                 "\n",
                 "# ## Install NeMo\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
                 "\n",
                 "## Install TorchAudio\n",
diff --git a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb
index 2488e46287a6..3e1f05369e48 100644
--- a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb
+++ b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb
@@ -28,7 +28,7 @@
                 "!pip install pyaudio\n",
                 "\n",
                 "# ## Install NeMo\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
                 "\n",
                 "## Install TorchAudio\n",
diff --git a/tutorials/asr/Self_Supervised_Pre_Training.ipynb b/tutorials/asr/Self_Supervised_Pre_Training.ipynb
index 0f0270c1ad75..c0b0f8aff869 100644
--- a/tutorials/asr/Self_Supervised_Pre_Training.ipynb
+++ b/tutorials/asr/Self_Supervised_Pre_Training.ipynb
@@ -27,7 +27,7 @@
         "!pip install matplotlib>=3.3.2\n",
         "\n",
         "## Install NeMo\n",
-        "BRANCH = 'main'\n",
+        "BRANCH = 'r1.13.0'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "\"\"\"\n",
diff --git a/tutorials/asr/Speech_Commands.ipynb b/tutorials/asr/Speech_Commands.ipynb
index 14cf1dc3812f..40ce00ae23c2 100644
--- a/tutorials/asr/Speech_Commands.ipynb
+++ b/tutorials/asr/Speech_Commands.ipynb
@@ -60,7 +60,7 @@
                 "!pip install text-unidecode\n",
                 "\n",
                 "# ## Install NeMo\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
                 "\n",
                 "## Install TorchAudio\n",
diff --git a/tutorials/asr/Streaming_ASR.ipynb b/tutorials/asr/Streaming_ASR.ipynb
index 5d4d5b188e18..f4aa8d160057 100644
--- a/tutorials/asr/Streaming_ASR.ipynb
+++ b/tutorials/asr/Streaming_ASR.ipynb
@@ -27,7 +27,7 @@
     "!pip install matplotlib>=3.3.2\n",
     "\n",
     "## Install NeMo\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
     "\n",
     "## Grab the config we'll use in this example\n",
diff --git a/tutorials/asr/Voice_Activity_Detection.ipynb b/tutorials/asr/Voice_Activity_Detection.ipynb
index f0d2ef14ce6f..8ef5322b13a2 100644
--- a/tutorials/asr/Voice_Activity_Detection.ipynb
+++ b/tutorials/asr/Voice_Activity_Detection.ipynb
@@ -27,7 +27,7 @@
                 "!pip install text-unidecode\n",
                 "\n",
                 "# ## Install NeMo\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
                 "\n",
                 "## Install TorchAudio\n",
diff --git a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb
index 468c602a8765..2fcd2f399940 100644
--- a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb
+++ b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb
@@ -51,7 +51,7 @@
         "!pip install matplotlib>=3.3.2\n",
         "\n",
         "## Install NeMo\n",
-        "BRANCH = 'main'\n",
+        "BRANCH = 'r1.13.0'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "## Grab the config we'll use in this example\n",
diff --git a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb
index faa93de12514..120207831ce6 100644
--- a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb
+++ b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb
@@ -26,7 +26,7 @@
                 "# If you're using Google Colab and not running locally, run this cell\n",
                 "\n",
                 "# install NeMo\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]"
             ]
         },
diff --git a/tutorials/nlp/02_NLP_Tokenizers.ipynb b/tutorials/nlp/02_NLP_Tokenizers.ipynb
index c63d2a8b1689..f6b56e0712a3 100644
--- a/tutorials/nlp/02_NLP_Tokenizers.ipynb
+++ b/tutorials/nlp/02_NLP_Tokenizers.ipynb
@@ -10,7 +10,7 @@
             },
             "outputs": [],
             "source": [
-                "BRANCH = 'main'"
+                "BRANCH = 'r1.13.0'"
             ]
         },
         {
@@ -35,7 +35,7 @@
                 "# If you're using Google Colab and not running locally, run this cell\n",
                 "\n",
                 "# install NeMo\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]"
             ]
         },
diff --git a/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb b/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb
index 323bfa1c49b8..e535f7594f97 100644
--- a/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb
+++ b/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb
@@ -300,7 +300,7 @@
     "\n",
     "## Install NeMo\n",
     "\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
     "\n",
     "!pip uninstall -y sacrebleu\n",
diff --git a/tutorials/nlp/Dialogue.ipynb b/tutorials/nlp/Dialogue.ipynb
index ddd3bdd4f929..8395fb4c8eb1 100644
--- a/tutorials/nlp/Dialogue.ipynb
+++ b/tutorials/nlp/Dialogue.ipynb
@@ -27,7 +27,7 @@
       "outputs": [],
       "source": [
         "import os \n",
-        "BRANCH = 'main'\n",
+        "BRANCH = 'r1.13.0'\n",
         "!apt-get update && apt-get install -y libsndfile1 ffmpeg\n",
         "!git clone https://github.com/NVIDIA/NeMo --branch $BRANCH\n",
         "os.chdir('NeMo')\n",
diff --git a/tutorials/nlp/Entity_Linking_Medical.ipynb b/tutorials/nlp/Entity_Linking_Medical.ipynb
index 0d7a1d5c8de5..dd41a25e5601 100644
--- a/tutorials/nlp/Entity_Linking_Medical.ipynb
+++ b/tutorials/nlp/Entity_Linking_Medical.ipynb
@@ -17,7 +17,7 @@
     "\"\"\"\n",
     "\n",
     "## Install NeMo if using google collab or if its not installed locally\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]"
    ]
   },
diff --git a/tutorials/nlp/GLUE_Benchmark.ipynb b/tutorials/nlp/GLUE_Benchmark.ipynb
index d8fe75940b09..203a278bea88 100644
--- a/tutorials/nlp/GLUE_Benchmark.ipynb
+++ b/tutorials/nlp/GLUE_Benchmark.ipynb
@@ -44,7 +44,7 @@
         "# If you're using Google Colab and not running locally, run this cell\n",
         "\n",
         "# install NeMo\n",
-        "BRANCH = 'main'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n"
+        "BRANCH = 'r1.13.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n"
       ],
       "execution_count": null,
       "outputs": []
diff --git a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb
index 104d69df18e2..c548bdb02161 100644
--- a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb
+++ b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb
@@ -22,7 +22,7 @@
                 "# If you're using Google Colab and not running locally, run this cell\n",
                 "\n",
                 "# install NeMo\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]"
             ]
         },
diff --git a/tutorials/nlp/MegatronBert_export.ipynb b/tutorials/nlp/MegatronBert_export.ipynb
index f925d2bc59b0..54ad754e4617 100644
--- a/tutorials/nlp/MegatronBert_export.ipynb
+++ b/tutorials/nlp/MegatronBert_export.ipynb
@@ -7,7 +7,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "BRANCH='main'"
+                "BRANCH='r1.13.0'"
             ]
         },
         {
diff --git a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb
index 3dc3d6ce192e..985fdb568042 100644
--- a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb
+++ b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb
@@ -62,7 +62,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "DATA_PATH='.'\n",
                 "TRANSACTIONS=DATA_PATH+'/card_transaction.v1.csv'\n",
                 "#CHECKPOINTS='/chk_points'\n",
diff --git a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb
index 512a38bc90cc..c442913ec8ae 100644
--- a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb
+++ b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb
@@ -7,7 +7,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "BRANCH='main'"
+    "BRANCH='r1.13.0'"
    ]
   },
   {
diff --git a/tutorials/nlp/Non_English_Downstream_Tasks_(NER).ipynb b/tutorials/nlp/Non_English_Downstream_Tasks_(NER).ipynb
index bfa56e5a2567..f088f8ca4627 100644
--- a/tutorials/nlp/Non_English_Downstream_Tasks_(NER).ipynb
+++ b/tutorials/nlp/Non_English_Downstream_Tasks_(NER).ipynb
@@ -8,7 +8,7 @@
    },
    "outputs": [],
    "source": [
-    "BRANCH = 'main'"
+    "BRANCH = 'r1.13.0'"
    ]
   },
   {
diff --git a/tutorials/nlp/Punctuation_and_Capitalization.ipynb b/tutorials/nlp/Punctuation_and_Capitalization.ipynb
index 1519c234372b..aa80ebb5bd91 100644
--- a/tutorials/nlp/Punctuation_and_Capitalization.ipynb
+++ b/tutorials/nlp/Punctuation_and_Capitalization.ipynb
@@ -6,7 +6,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "BRANCH = 'main'"
+                "BRANCH = 'r1.13.0'"
             ]
         },
         {
diff --git a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb
index 5580bc4cf946..57d443ddf5df 100644
--- a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb
+++ b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb
@@ -10,7 +10,7 @@
    },
    "outputs": [],
    "source": [
-    "BRANCH = 'main'"
+    "BRANCH = 'r1.13.0'"
    ]
   },
   {
diff --git a/tutorials/nlp/Question_Answering.ipynb b/tutorials/nlp/Question_Answering.ipynb
index 5ce89b3baafc..f461a5f651ef 100644
--- a/tutorials/nlp/Question_Answering.ipynb
+++ b/tutorials/nlp/Question_Answering.ipynb
@@ -74,7 +74,7 @@
       },
       "outputs": [],
       "source": [
-        "BRANCH = 'main'"
+        "BRANCH = 'r1.13.0'"
       ]
     },
     {
diff --git a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb
index b7c25cb416ef..54ff9d7ccabb 100644
--- a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb
+++ b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb
@@ -6,7 +6,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "BRANCH = 'main'"
+                "BRANCH = 'r1.13.0'"
             ]
         },
         {
diff --git a/tutorials/nlp/Text2Sparql.ipynb b/tutorials/nlp/Text2Sparql.ipynb
index b734e72c1fc6..5b238ca27e60 100644
--- a/tutorials/nlp/Text2Sparql.ipynb
+++ b/tutorials/nlp/Text2Sparql.ipynb
@@ -20,7 +20,7 @@
                 "# If you're using Google Colab and not running locally, run this cell\n",
                 "\n",
                 "# install NeMo\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]"
             ]
         },
@@ -149,7 +149,7 @@
                 "WORK_DIR = \"PATH_TO_CHECKPOINTS_AND_LOGS\"\n",
                 "\n",
                 "# NeMo Version\n",
-                "BRANCH = 'main'\n"
+                "BRANCH = 'r1.13.0'\n"
             ]
         },
         {
diff --git a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb
index 5b5b74e7bf11..b38f23002b6e 100644
--- a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb
+++ b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb
@@ -20,7 +20,7 @@
                 "# If you're using Google Colab and not running locally, run this cell\n",
                 "\n",
                 "# install NeMo\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n",
                 "\n"
             ]
diff --git a/tutorials/nlp/Token_Classification-BioMegatron.ipynb b/tutorials/nlp/Token_Classification-BioMegatron.ipynb
index b07dfb061625..304befe44a14 100644
--- a/tutorials/nlp/Token_Classification-BioMegatron.ipynb
+++ b/tutorials/nlp/Token_Classification-BioMegatron.ipynb
@@ -7,7 +7,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "BRANCH='main'"
+                "BRANCH='r1.13.0'"
             ]
         },
         {
diff --git a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb
index 0e8fadde8041..bc41c8568844 100644
--- a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb
+++ b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb
@@ -30,7 +30,7 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "BRANCH = 'main'"
+        "BRANCH = 'r1.13.0'"
       ]
     },
     {
@@ -53,7 +53,7 @@
         "# If you're using Google Colab and not running locally, run this cell\n",
         "\n",
         "# install NeMo\n",
-        "BRANCH = 'main'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n"
+        "BRANCH = 'r1.13.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n"
       ],
       "execution_count": null,
       "outputs": []
diff --git a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb
index 69df7b27b02d..05706014b9ba 100644
--- a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb
+++ b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb
@@ -22,7 +22,7 @@
                 "# If you're using Google Colab and not running locally, run this cell\n",
                 "\n",
                 "# install NeMo\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]"
             ]
         },
diff --git a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb
index ea943b35e0d0..edb982a6fa0e 100644
--- a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb
+++ b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb
@@ -30,7 +30,7 @@
     "!pip install text-unidecode\n",
     "\n",
     "# ## Install NeMo\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
     "\n",
     "## Install TorchAudio\n",
diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb
index 64ceb49d7d64..9c790824afcb 100644
--- a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb
+++ b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb
@@ -23,7 +23,7 @@
     "!pip install text-unidecode\n",
     "\n",
     "# ## Install NeMo\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
     "\n",
     "## Install TorchAudio\n",
diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb
index 91df72848614..c401591ea319 100644
--- a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb
+++ b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb
@@ -18,7 +18,7 @@
     "\"\"\"\n",
     "\n",
     "NEMO_DIR_PATH = \"NeMo\"\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "\n",
     "! git clone https://github.com/NVIDIA/NeMo\n",
     "%cd NeMo\n",
diff --git a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb
index 8e3ae9c1f131..0b19f83bbcd8 100644
--- a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb
+++ b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb
@@ -27,7 +27,7 @@
     "!pip install text-unidecode\n",
     "\n",
     "## Install NeMo\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
     "\n",
     "# Install TorchAudio\n",
diff --git a/tutorials/text_processing/ITN_with_Thutmose_Tagger.ipynb b/tutorials/text_processing/ITN_with_Thutmose_Tagger.ipynb
index b72cee51003b..dcf944769e9f 100644
--- a/tutorials/text_processing/ITN_with_Thutmose_Tagger.ipynb
+++ b/tutorials/text_processing/ITN_with_Thutmose_Tagger.ipynb
@@ -21,7 +21,7 @@
         "import os\n",
         "\n",
         "# install NeMo\n",
-        "BRANCH = 'main'\n",
+        "BRANCH = 'r1.13.0'\n",
         "\n",
         "GITHUB_ACCOUNT = 'NVIDIA'  # change this if using a fork\n",
         "\n",
diff --git a/tutorials/text_processing/Text_(Inverse)_Normalization.ipynb b/tutorials/text_processing/Text_(Inverse)_Normalization.ipynb
index 596523b41c0a..e00dfc9463de 100644
--- a/tutorials/text_processing/Text_(Inverse)_Normalization.ipynb
+++ b/tutorials/text_processing/Text_(Inverse)_Normalization.ipynb
@@ -60,7 +60,7 @@
    "outputs": [],
    "source": [
     "## Install NeMo, which installs both nemo and nemo_text_processing package\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n",
     "\n",
     "# install Pynini for text normalization\n",
diff --git a/tutorials/text_processing/WFST_Tutorial.ipynb b/tutorials/text_processing/WFST_Tutorial.ipynb
index ed7127241dd5..51daded0b796 100644
--- a/tutorials/text_processing/WFST_Tutorial.ipynb
+++ b/tutorials/text_processing/WFST_Tutorial.ipynb
@@ -39,7 +39,7 @@
    "outputs": [],
    "source": [
     "## Install NeMo, which installs both nemo and nemo_text_processing package\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nemo_text_processing]\n",
           "\n",
           "# install Pynini for text normalization\n",
diff --git a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb
index d22258885db8..25f63da12df9 100644
--- a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb
+++ b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb
@@ -35,7 +35,7 @@
         "id": "d4KCUoxSpdoZ"
       },
       "source": [
-        "BRANCH = 'main'\n",
+        "BRANCH = 'r1.13.0'\n",
         "\n",
         "\"\"\"\n",
         "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n",
diff --git a/tutorials/tools/Multispeaker_Simulator.ipynb b/tutorials/tools/Multispeaker_Simulator.ipynb
index 8b0db6e75b49..e9822fd0ea9e 100644
--- a/tutorials/tools/Multispeaker_Simulator.ipynb
+++ b/tutorials/tools/Multispeaker_Simulator.ipynb
@@ -18,7 +18,7 @@
     "\"\"\"\n",
     "\n",
     "NEMO_DIR_PATH = \"NeMo\"\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "\n",
     "! git clone https://github.com/NVIDIA/NeMo\n",
     "%cd NeMo\n",
diff --git a/tutorials/tts/Aligner_Inference_Examples.ipynb b/tutorials/tts/Aligner_Inference_Examples.ipynb
index f6acbfa2c0d4..d32305579166 100644
--- a/tutorials/tts/Aligner_Inference_Examples.ipynb
+++ b/tutorials/tts/Aligner_Inference_Examples.ipynb
@@ -39,7 +39,7 @@
     "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
     "4. Run this cell to set up dependencies.\n",
     "\"\"\"\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "# # If you're using Colab and not running locally, uncomment and run this cell.\n",
     "# !apt-get install sox libsndfile1 ffmpeg\n",
     "# !pip install wget text-unidecode\n",
diff --git a/tutorials/tts/FastPitch_Finetuning.ipynb b/tutorials/tts/FastPitch_Finetuning.ipynb
index fe607944c1e8..034e9e050aaf 100755
--- a/tutorials/tts/FastPitch_Finetuning.ipynb
+++ b/tutorials/tts/FastPitch_Finetuning.ipynb
@@ -57,7 +57,7 @@
                 "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
                 "4. Run this cell to set up dependencies.\n",
                 "\"\"\"\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n",
                 "# !apt-get install sox libsndfile1 ffmpeg\n",
                 "# !pip install wget text-unidecode pynini==2.1.4\n",
diff --git a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb
index 0b2e4f3fe132..a7de4224ae97 100644
--- a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb
+++ b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb
@@ -51,7 +51,7 @@
     "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
     "4. Run this cell to set up dependencies# .\n",
     "\"\"\"\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "# # If you're using Colab and not running locally, uncomment and run this cell.\n",
     "# !apt-get install sox libsndfile1 ffmpeg\n",
     "# !pip install wget text-unidecode pynini==2.1.4 scipy==1.7.3\n",
diff --git a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb
index 1b2ebc66ea3b..7547d478b5b2 100644
--- a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb
+++ b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb
@@ -50,7 +50,7 @@
     "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
     "4. Run this cell to set up dependencies# .\n",
     "\"\"\"\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "# # If you're using Colab and not running locally, uncomment and run this cell.\n",
     "# !apt-get install sox libsndfile1 ffmpeg\n",
     "# !pip install wget text-unidecode pynini==2.1.4 scipy==1.7.3\n",
diff --git a/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb b/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb
index eda5bba0aa1e..7f55c3d2e12f 100644
--- a/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb
+++ b/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb
@@ -94,7 +94,7 @@
    "source": [
     "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n",
     "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]"
    ]
   },
diff --git a/tutorials/tts/Inference_DurationPitchControl.ipynb b/tutorials/tts/Inference_DurationPitchControl.ipynb
index c4879f38274c..59a01c628449 100644
--- a/tutorials/tts/Inference_DurationPitchControl.ipynb
+++ b/tutorials/tts/Inference_DurationPitchControl.ipynb
@@ -46,7 +46,7 @@
                 "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
                 "4. Run this cell to set up dependencies.\n",
                 "\"\"\"\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n",
                 "# !apt-get install sox libsndfile1 ffmpeg\n",
                 "# !pip install wget text-unidecode\n",
diff --git a/tutorials/tts/Inference_ModelSelect.ipynb b/tutorials/tts/Inference_ModelSelect.ipynb
index 8fe398edafa6..71067530b311 100644
--- a/tutorials/tts/Inference_ModelSelect.ipynb
+++ b/tutorials/tts/Inference_ModelSelect.ipynb
@@ -46,7 +46,7 @@
                 "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
                 "4. Run this cell to set up dependencies.\n",
                 "\"\"\"\n",
-                "BRANCH = 'main'\n",
+                "BRANCH = 'r1.13.0'\n",
                 "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n",
                 "# !apt-get install sox libsndfile1 ffmpeg\n",
                 "# !pip install wget text-unidecode\n",
diff --git a/tutorials/tts/NeMo_TTS_Primer.ipynb b/tutorials/tts/NeMo_TTS_Primer.ipynb
index 21c366155b17..938eac687d07 100644
--- a/tutorials/tts/NeMo_TTS_Primer.ipynb
+++ b/tutorials/tts/NeMo_TTS_Primer.ipynb
@@ -25,7 +25,7 @@
    "source": [
     "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n",
     "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]"
    ]
   },
diff --git a/tutorials/tts/Tacotron2_Training.ipynb b/tutorials/tts/Tacotron2_Training.ipynb
index 3642a3e9e4dc..995a204249a9 100644
--- a/tutorials/tts/Tacotron2_Training.ipynb
+++ b/tutorials/tts/Tacotron2_Training.ipynb
@@ -54,7 +54,7 @@
     "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
     "4. Run this cell to set up dependencies# .\n",
     "\"\"\"\n",
-    "BRANCH = 'main'\n",
+    "BRANCH = 'r1.13.0'\n",
     "# # If you're using Colab and not running locally, uncomment and run this cell.\n",
     "# !apt-get install sox libsndfile1 ffmpeg\n",
     "# !pip install wget text-unidecode\n",

From eca5363368d29f9842f72c8acda33958cf8fa573 Mon Sep 17 00:00:00 2001
From: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com>
Date: Wed, 9 Nov 2022 15:35:35 -0800
Subject: [PATCH 02/20] Rename Speech Dataset Processor to Speech Data
 Processor (#5378)

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>
---
 tools/speech_data_processor/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/speech_data_processor/requirements.txt b/tools/speech_data_processor/requirements.txt
index 63904d71d9c9..e07336a0d3c3 100644
--- a/tools/speech_data_processor/requirements.txt
+++ b/tools/speech_data_processor/requirements.txt
@@ -1 +1 @@
-diff_match_patch
+diff_match_patch
\ No newline at end of file

From 6e2c4a08424ea291f1b37b012e469b8621de20e1 Mon Sep 17 00:00:00 2001
From: David <amosalla@asu.edu>
Date: Tue, 15 Nov 2022 14:54:50 -0700
Subject: [PATCH 03/20] Megatron Export Update (#5343)

* export update for Megatron + change ORT optimization

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* updated export_utils to use autocast instead of manually casting >:/

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>

* removed dtype from LayerNorm

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>

* added comment

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>

* reverting changes on FloatCast

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>

* Cherry-picked changes from megatron-norm

Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>

* updated asr_model import to cast_utils

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>

* updated del onnx_model place

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>

* changed ort optimization to basic -> temp fix

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>
Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>
Co-authored-by: David Mosallanezhad <dmosallanezh@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Boris Fomitchev <bfomitchev@nvidia.com>
---
 nemo/utils/cast_utils.py   |  2 +-
 nemo/utils/export_utils.py | 50 +++++++++++++++++++++++++++++---------
 scripts/export.py          |  3 +--
 3 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/nemo/utils/cast_utils.py b/nemo/utils/cast_utils.py
index f973a4719e24..9eb064936ea5 100644
--- a/nemo/utils/cast_utils.py
+++ b/nemo/utils/cast_utils.py
@@ -70,6 +70,6 @@ def __init__(self, mod):
         self.mod = mod
 
     def forward(self, x):
-        with torch.cuda.amp.autocast(enabled=False):
+        with avoid_float16_autocast_context():
             ret = self.mod.forward(x.to(torch.float32)).to(x.dtype)
         return ret
diff --git a/nemo/utils/export_utils.py b/nemo/utils/export_utils.py
index 02d99c6ba7fd..c7a45649daa2 100644
--- a/nemo/utils/export_utils.py
+++ b/nemo/utils/export_utils.py
@@ -59,6 +59,42 @@ def forward(self, x):
         return F.linear(x, self.weight, self.bias), None
 
 
+class ExportableMatchedScaleMaskSoftmax(nn.Module):
+    def __init__(self, mod):
+        super(ExportableMatchedScaleMaskSoftmax, self).__init__()
+        self.init_module(mod.input_in_fp16, mod.input_in_bf16, mod.mask_func, mod.softmax_in_fp32, mod.scale)
+
+    def init_module(
+        self, input_in_fp16, input_in_bf16, mask_func, softmax_in_fp32, scale,
+    ):
+        self.input_in_fp16 = input_in_fp16
+        self.input_in_bf16 = input_in_bf16
+        self.softmax_in_fp32 = softmax_in_fp32
+        self.mask_func = mask_func
+        self.scale = scale
+
+        self.input_in_float16 = self.input_in_fp16 or self.input_in_bf16
+
+    def forward(self, input, mask):
+        if self.input_in_float16 and self.softmax_in_fp32:
+            input = input.float()
+
+        if self.scale is not None:
+            input = input * self.scale
+        mask_output = self.mask_func(input, mask) if mask is not None else input
+        probs = torch.nn.Softmax(dim=-1)(mask_output)
+        all_k_masked = mask.all(axis=-1)
+        zero_attention_mask = (1.0 - all_k_masked.float())[:, :, :, None]
+        probs = probs * zero_attention_mask
+
+        if self.input_in_float16 and self.softmax_in_fp32:
+            if self.input_in_fp16:
+                probs = probs.half()
+            else:
+                probs = probs.bfloat16()
+        return probs
+
+
 def get_export_format(filename: str):
     _, ext = os.path.splitext(filename)
     try:
@@ -330,13 +366,9 @@ def replace_MatchedScaleMaskSoftmax(n: nn.Module) -> Optional[nn.Linear]:
     Returns:
         exportable module
     """
-    # including the import here to avoid circular imports
-    from nemo.collections.nlp.modules.common.megatron.fused_softmax import MatchedScaleMaskSoftmax
 
-    # disabling fusion for the MatchedScaleMaskSoftmax
-    mod = MatchedScaleMaskSoftmax(
-        n.input_in_fp16, n.input_in_bf16, n.attn_mask_type, False, n.mask_func, n.softmax_in_fp32, n.scale
-    )
+    mod = ExportableMatchedScaleMaskSoftmax(n.input_in_fp16, n.input_in_bf16, n.mask_func, n.softmax_in_fp32, n.scale)
+
     return mod
 
 
@@ -408,11 +440,7 @@ def script_module(m: nn.Module):
     "BatchNorm1d": wrap_module(nn.BatchNorm1d, CastToFloat),
     "BatchNorm2d": wrap_module(nn.BatchNorm2d, CastToFloat),
     "LayerNorm": wrap_module(nn.LayerNorm, CastToFloat),
-    "MatchedScaleMaskSoftmax": wrap_module(None, replace_MatchedScaleMaskSoftmax),
-}
-
-script_replacements = {
-    "BiLSTM": script_module,
+    "MatchedScaleMaskSoftmax": wrap_module(nn.Softmax, ExportableMatchedScaleMaskSoftmax),
 }
 
 
diff --git a/scripts/export.py b/scripts/export.py
index 2e100e446e72..b3d6317e936c 100644
--- a/scripts/export.py
+++ b/scripts/export.py
@@ -143,11 +143,10 @@ def nemo_export(argv):
             if check_trace and len(in_args) > 0:
                 input_example = model.input_module.input_example(**in_args)
                 check_trace = [input_example]
-                for key, arg in in_args.items():
+                for key, arg in in_args:
                     in_args[key] = (arg + 1) // 2
                 input_example2 = model.input_module.input_example(**in_args)
                 check_trace.append(input_example2)
-                logging.info(f"Using additional check args: {in_args}")
 
             _, descriptions = model.export(
                 out,

From 0c9a919a9cd55cdb481418242b8863254b46de3b Mon Sep 17 00:00:00 2001
From: Eric Harper <complex451@gmail.com>
Date: Tue, 15 Nov 2022 17:14:35 -0700
Subject: [PATCH 04/20] Disable sync_batch_comm in validation_step for GPT
 (#5397)

* disable sync_batch_comm in validation_step

Signed-off-by: ericharper <complex451@gmail.com>

* Read sync_batch_comm from config or default to False

Signed-off-by: Markel Sanz Ausin <markelsanz14@gmail.com>

* Update megatron_gpt_config to default sync_batch_comm to False to avoid CUDA error

Signed-off-by: Markel Sanz Ausin <markelsanz14@gmail.com>

* Empty

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Comment out test

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

Signed-off-by: ericharper <complex451@gmail.com>
Signed-off-by: Markel Sanz Ausin <markelsanz14@gmail.com>
Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: Oleksii Kuchaiev <okuchaiev@nvidia.com>
Co-authored-by: Oleksii Kuchaiev <okuchaiev@users.noreply.github.com>
Co-authored-by: Markel Sanz Ausin <markelsanz14@gmail.com>
Co-authored-by: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: Oleksii Kuchaiev <okuchaiev@nvidia.com>
---
 examples/nlp/language_modeling/conf/megatron_gpt_config.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
index a40e5a2ae35f..0604c0287d05 100755
--- a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
+++ b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
@@ -106,7 +106,6 @@ model:
   apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
   gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
   sync_batch_comm: False # Enable stream synchronization after each p2p communication between pipeline stages
-  use_unified_checkpoint: True # Use model parallel independent checkpointing
 
   ## Activation Checkpointing
   # NeMo Megatron supports 'selective' activation checkpointing where only the memory intensive part of attention is checkpointed.

From 3a9616f4efad4674d23505d18a7c716c1e43ce39 Mon Sep 17 00:00:00 2001
From: Boris Fomitchev <borisfom@users.noreply.github.com>
Date: Fri, 18 Nov 2022 10:01:33 -0800
Subject: [PATCH 05/20] Radtts 1.13 (#5451)

* [TTS] Fixing RADTTS training - removing view buffer and fixing accuracy issue (#5358)
* [TTS] add CI test for RADTTS training recipe.

Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>
Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Co-authored-by: Oleksii Kuchaiev <okuchaiev@users.noreply.github.com>
---
 nemo/collections/tts/modules/common.py        | 45 +++++++++++++------
 nemo/collections/tts/modules/radtts.py        |  8 ++--
 nemo/core/classes/exportable.py               |  2 +-
 nemo/utils/cast_utils.py                      |  2 +-
 nemo/utils/export_utils.py                    | 40 +++++++----------
 scripts/export.py                             |  3 +-
 tests/collections/tts/test_tts_exportables.py |  5 +--
 7 files changed, 56 insertions(+), 49 deletions(-)

diff --git a/nemo/collections/tts/modules/common.py b/nemo/collections/tts/modules/common.py
index 63c28f12a4a7..0765d0499bda 100644
--- a/nemo/collections/tts/modules/common.py
+++ b/nemo/collections/tts/modules/common.py
@@ -122,22 +122,30 @@ def lstm_tensor(self, context: Tensor, lens: Tensor, enforce_sorted: bool = Fals
         seq = nn.utils.rnn.pack_padded_sequence(
             context, lens.long().cpu(), batch_first=True, enforce_sorted=enforce_sorted
         )
-        return self.lstm_sequence(seq)
+        if not (torch.jit.is_scripting() or torch.jit.is_tracing()):
+            self.bilstm.flatten_parameters()
+        if hasattr(self.bilstm, 'forward'):
+            ret, _ = self.bilstm.forward(seq)
+        else:
+            ret, _ = self.bilstm.forward_1(seq)
+        return nn.utils.rnn.pad_packed_sequence(ret, batch_first=True)
 
     def lstm_sequence(self, seq: PackedSequence) -> Tuple[Tensor, Tensor]:
         if not (torch.jit.is_scripting() or torch.jit.is_tracing()):
             self.bilstm.flatten_parameters()
-        ret, _ = self.bilstm(seq)
+        if hasattr(self.bilstm, 'forward'):
+            ret, _ = self.bilstm.forward(seq)
+        elif hasattr(self.bilstm, 'forward_1'):
+            ret, _ = self.bilstm.forward_1(seq)
         return nn.utils.rnn.pad_packed_sequence(ret, batch_first=True)
 
-    def forward(self, context: Tensor, lens: Tensor) -> Tensor:
+    @torch.jit.export
+    def sort_and_lstm_tensor(self, context: Tensor, lens: Tensor) -> Tensor:
         context, lens_sorted, unsort_ids = sort_tensor(context, lens)
-        dtype = context.dtype
-        # this is only needed for Torchscript to run in Triton
-        # (https://github.com/pytorch/pytorch/issues/89241)
-        with torch.cuda.amp.autocast(enabled=False):
-            ret = self.lstm_tensor(context.to(dtype=torch.float32), lens_sorted, enforce_sorted=True)
-        return ret[0].to(dtype=dtype)[unsort_ids]
+        seq = nn.utils.rnn.pack_padded_sequence(
+            context, lens_sorted.long().cpu(), batch_first=True, enforce_sorted=True
+        )
+        return self.lstm_sequence(seq)[0][unsort_ids]
 
 
 class ConvLSTMLinear(nn.Module):
@@ -152,8 +160,7 @@ def __init__(
         use_partial_padding=False,
         norm_fn=None,
     ):
-        super(ConvLSTMLinear, self).__init__()
-        self.bilstm = BiLSTM(n_channels, int(n_channels // 2), 1)
+        super(ConvLSTMLinear, self).__init__(n_channels, int(n_channels // 2), 1)
         self.convolutions = nn.ModuleList()
 
         if n_layers > 0:
@@ -184,14 +191,24 @@ def __init__(
         if out_dim is not None:
             self.dense = nn.Linear(n_channels, out_dim)
 
-    def forward(self, context: Tensor, lens: Tensor) -> Tensor:
+    def masked_conv_to_sequence(self, context: Tensor, lens: Tensor, enforce_sorted: bool = False) -> PackedSequence:
         mask = get_mask_from_lengths_and_val(lens, context)
         mask = mask.to(dtype=context.dtype).unsqueeze(1)
         for conv in self.convolutions:
             context = self.dropout(F.relu(conv(context, mask)))
+
         context = context.transpose(1, 2)
-        # Apply Bidirectional LSTM
-        context = self.bilstm(context, lens)
+        seq = torch.nn.utils.rnn.pack_padded_sequence(
+            context, lens.long().cpu(), batch_first=True, enforce_sorted=enforce_sorted
+        )
+        return seq
+
+    def forward(self, context: Tensor, lens: Tensor) -> Tensor:
+        context, lens, unsort_ids = sort_tensor(context, lens)
+        seq = self.masked_conv_to_sequence(context, lens, enforce_sorted=True)
+        context, _ = self.lstm_sequence(seq)
+        context = context[unsort_ids]
+
         if self.dense is not None:
             context = self.dense(context).permute(0, 2, 1)
         return context
diff --git a/nemo/collections/tts/modules/radtts.py b/nemo/collections/tts/modules/radtts.py
index 9f360a4e5a33..dca0f0ede62c 100644
--- a/nemo/collections/tts/modules/radtts.py
+++ b/nemo/collections/tts/modules/radtts.py
@@ -345,7 +345,9 @@ def preprocess_context(self, context, speaker_vecs, out_lens, f0, energy_avg):
                     context_w_spkvec = torch.cat((context_w_spkvec, energy_avg), 1)
 
             unfolded_out_lens = out_lens // self.n_group_size
-            context_lstm_padded_output = self.context_lstm(context_w_spkvec.transpose(1, 2), unfolded_out_lens)
+            context_lstm_padded_output = self.context_lstm.sort_and_lstm_tensor(
+                context_w_spkvec.transpose(1, 2), unfolded_out_lens
+            )
             context_w_spkvec = context_lstm_padded_output.transpose(1, 2)
 
         if not self.context_lstm_w_f0_and_energy:
@@ -770,8 +772,8 @@ def input_example(self, max_batch=1, max_dim=256):
         """
         par = next(self.parameters())
         sz = (max_batch, max_dim)
-        inp = torch.randint(16, 32, sz, device=par.device, dtype=torch.int64)
-        lens = torch.randint(max_dim // 4, max_dim // 2, (max_batch,), device=par.device, dtype=torch.int)
+        inp = torch.randint(0, 16, sz, device=par.device, dtype=torch.int64)
+        lens = torch.randint(16, max_dim, (max_batch,), device=par.device, dtype=torch.int)
         speaker = torch.randint(0, 1, (max_batch,), device=par.device, dtype=torch.int64)
         inputs = {
             'text': inp,
diff --git a/nemo/core/classes/exportable.py b/nemo/core/classes/exportable.py
index 50266dab3dbe..b3f0b2fdd642 100644
--- a/nemo/core/classes/exportable.py
+++ b/nemo/core/classes/exportable.py
@@ -128,7 +128,7 @@ def _export(
             # Set module mode
             with torch.onnx.select_model_mode_for_export(
                 self, training
-            ), torch.inference_mode(), torch.no_grad(), torch.jit.optimized_execution(True), _jit_is_scripting():
+            ), torch.inference_mode(), torch.no_grad(), torch.jit.optimized_execution(True):
 
                 if input_example is None:
                     input_example = self.input_module.input_example()
diff --git a/nemo/utils/cast_utils.py b/nemo/utils/cast_utils.py
index 9eb064936ea5..f973a4719e24 100644
--- a/nemo/utils/cast_utils.py
+++ b/nemo/utils/cast_utils.py
@@ -70,6 +70,6 @@ def __init__(self, mod):
         self.mod = mod
 
     def forward(self, x):
-        with avoid_float16_autocast_context():
+        with torch.cuda.amp.autocast(enabled=False):
             ret = self.mod.forward(x.to(torch.float32)).to(x.dtype)
         return ret
diff --git a/nemo/utils/export_utils.py b/nemo/utils/export_utils.py
index c7a45649daa2..0fbe2999bffe 100644
--- a/nemo/utils/export_utils.py
+++ b/nemo/utils/export_utils.py
@@ -15,7 +15,7 @@
 import os
 from contextlib import nullcontext
 from enum import Enum
-from typing import Callable, Dict, Optional, Type
+from typing import Callable, Dict, List, Optional, Type
 
 import onnx
 import torch
@@ -158,12 +158,8 @@ def verify_torchscript(model, output, input_examples, input_names, check_toleran
     for input_example in input_examples:
         input_list, input_dict = parse_input_example(input_example)
         output_example = model.forward(*input_list, **input_dict)
-        # We disable autocast here to make sure exported TS will run under Triton or other C++ env
-        with torch.cuda.amp.autocast(enabled=False):
-            ts_model = torch.jit.load(output)
-            all_good = all_good and run_ts_and_compare(
-                ts_model, input_list, input_dict, output_example, check_tolerance
-            )
+
+        all_good = all_good and run_ts_and_compare(ts_model, input_list, input_dict, output_example, check_tolerance)
     status = "SUCCESS" if all_good else "FAIL"
     logging.info(f"Torchscript generated at {output} verified with torchscript forward : " + status)
     return all_good
@@ -205,15 +201,8 @@ def run_ts_and_compare(ts_model, ts_input_list, ts_input_dict, output_example, c
 
         if torch.is_tensor(expected):
             tout = out.to('cpu')
-            logging.debug(f"Checking output {i}, shape: {expected.shape}:\n")
-            this_good = True
-            try:
-                if not torch.allclose(tout, expected.cpu(), rtol=check_tolerance, atol=check_tolerance):
-                    this_good = False
-            except Exception:  # there may ne size mismatch and it may be OK
-                this_good = False
-            if not this_good:
-                logging.info(f"Results mismatch! PyTorch(expected):\n{expected}\nTorchScript:\n{tout}")
+            logging.debug(f"Checking output {i}, shape: {expected.shape}:\n{expected}\n{tout}")
+            if not torch.allclose(tout, expected.cpu(), rtol=check_tolerance, atol=check_tolerance):
                 all_good = False
     return all_good
 
@@ -227,14 +216,9 @@ def run_ort_and_compare(sess, ort_input, output_example, check_tolerance=0.01):
 
         if torch.is_tensor(expected):
             tout = torch.from_numpy(out)
-            logging.debug(f"Checking output {i}, shape: {expected.shape}:\n")
-            this_good = True
-            try:
-                if not torch.allclose(tout, expected.cpu(), rtol=check_tolerance, atol=100 * check_tolerance):
-                    this_good = False
-            except Exception:  # there may ne size mismatch and it may be OK
-                this_good = False
-            if not this_good:
+            logging.debug(f"Checking output {i}, shape: {expected.shape}:\n{expected}\n{tout}")
+            if not torch.allclose(tout, expected.cpu(), rtol=check_tolerance, atol=100 * check_tolerance):
+                all_good = False
                 logging.info(f"onnxruntime results mismatch! PyTorch(expected):\n{expected}\nONNXruntime:\n{tout}")
                 all_good = False
     return all_good
@@ -433,7 +417,8 @@ def replace_modules(
 
 
 def script_module(m: nn.Module):
-    return torch.jit.script(m)
+    m1 = torch.jit.script(m)
+    return m1
 
 
 default_replacements = {
@@ -443,6 +428,11 @@ def script_module(m: nn.Module):
     "MatchedScaleMaskSoftmax": wrap_module(nn.Softmax, ExportableMatchedScaleMaskSoftmax),
 }
 
+script_replacements = {
+    "BiLSTM": script_module,
+    "ConvLSTMLinear": script_module,
+}
+
 
 def replace_for_export(model: nn.Module) -> nn.Module:
     """
diff --git a/scripts/export.py b/scripts/export.py
index b3d6317e936c..2e100e446e72 100644
--- a/scripts/export.py
+++ b/scripts/export.py
@@ -143,10 +143,11 @@ def nemo_export(argv):
             if check_trace and len(in_args) > 0:
                 input_example = model.input_module.input_example(**in_args)
                 check_trace = [input_example]
-                for key, arg in in_args:
+                for key, arg in in_args.items():
                     in_args[key] = (arg + 1) // 2
                 input_example2 = model.input_module.input_example(**in_args)
                 check_trace.append(input_example2)
+                logging.info(f"Using additional check args: {in_args}")
 
             _, descriptions = model.export(
                 out,
diff --git a/tests/collections/tts/test_tts_exportables.py b/tests/collections/tts/test_tts_exportables.py
index d7684de732e5..e3e496373271 100644
--- a/tests/collections/tts/test_tts_exportables.py
+++ b/tests/collections/tts/test_tts_exportables.py
@@ -15,7 +15,6 @@
 import tempfile
 
 import pytest
-import torch
 from omegaconf import OmegaConf
 
 from nemo.collections.tts.models import FastPitchModel, HifiGanModel, RadTTSModel
@@ -74,12 +73,10 @@ def test_HifiGanModel_export_to_onnx(self, hifigan_model):
             filename = os.path.join(tmpdir, 'hfg.pt')
             model.export(output=filename, verbose=True, check_trace=True)
 
-    @pytest.mark.pleasefixme
     @pytest.mark.run_only_on('GPU')
     @pytest.mark.unit
     def test_RadTTSModel_export_to_torchscript(self, radtts_model):
         model = radtts_model.cuda()
         with tempfile.TemporaryDirectory() as tmpdir:
             filename = os.path.join(tmpdir, 'rad.ts')
-            with torch.cuda.amp.autocast(enabled=True):
-                model.export(output=filename, verbose=True, check_trace=True)
+            model.export(output=filename, verbose=True, check_trace=True)

From 3f10f02a9c85edae5ca3cbb00113a5b68fd816a3 Mon Sep 17 00:00:00 2001
From: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Date: Tue, 22 Nov 2022 11:51:59 -0800
Subject: [PATCH 06/20] Support for finetuning and finetuning inference with
 .ckpt files & batch size refactoring (#5339) (#5478)

* Initial refactor

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Resolve config before passing to load_from_checkpoint

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Fixes for model parallel and nemo restore

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Fixes for eval

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Revert config changes

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Refactor

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix typo

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Remove comments

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Minor

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix validation reconfiguration

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Remove old comment

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fixes for test_ds

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py     | 3 +--
 examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py b/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py
index 008bdc90cdd6..9064ce9f452d 100644
--- a/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py
+++ b/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from lightning_lite.plugins.environments import TorchElasticEnvironment
 from megatron_t5_seq2seq_finetune import load_from_checkpoint_dir, load_from_nemo, validate_checkpoint_loading_args
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
@@ -127,7 +126,7 @@ def main(cfg) -> None:
             t5_cfg = MegatronT5FinetuneModel.restore_from(
                 restore_path=cfg.model.restore_from_path, trainer=trainer, return_config=True
             )
-            model = load_from_nemo(MegatronT5FinetuneModel, cfg, trainer, t5_cfg, modify_confg_fn=_modify_config)
+            model = load_from_nemo(MegatronT5FinetuneModel, cfg, trainer, modify_confg_fn=_modify_config)
         else:
             validate_checkpoint_loading_args(cfg.model.pretrained_checkpoint)
             model = load_from_checkpoint_dir(MegatronT5FinetuneModel, cfg, trainer, modify_confg_fn=_modify_config)
diff --git a/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py b/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py
index 84dec0fac387..c714c970b38c 100644
--- a/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py
+++ b/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py
@@ -15,7 +15,6 @@
 import os
 import tempfile
 
-from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer

From 7321e4ad6e797ed3779cd15f0aeb147f06bf1b44 Mon Sep 17 00:00:00 2001
From: David <amosalla@asu.edu>
Date: Tue, 22 Nov 2022 19:05:40 -0700
Subject: [PATCH 07/20] export_utils bugfix (#5480)

* updated export_utils

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>
Co-authored-by: David Mosallanezhad <dmosallanezh@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 nemo/utils/export_utils.py | 46 +++++++-------------------------------
 1 file changed, 8 insertions(+), 38 deletions(-)

diff --git a/nemo/utils/export_utils.py b/nemo/utils/export_utils.py
index 0fbe2999bffe..63faee83bf3c 100644
--- a/nemo/utils/export_utils.py
+++ b/nemo/utils/export_utils.py
@@ -59,42 +59,6 @@ def forward(self, x):
         return F.linear(x, self.weight, self.bias), None
 
 
-class ExportableMatchedScaleMaskSoftmax(nn.Module):
-    def __init__(self, mod):
-        super(ExportableMatchedScaleMaskSoftmax, self).__init__()
-        self.init_module(mod.input_in_fp16, mod.input_in_bf16, mod.mask_func, mod.softmax_in_fp32, mod.scale)
-
-    def init_module(
-        self, input_in_fp16, input_in_bf16, mask_func, softmax_in_fp32, scale,
-    ):
-        self.input_in_fp16 = input_in_fp16
-        self.input_in_bf16 = input_in_bf16
-        self.softmax_in_fp32 = softmax_in_fp32
-        self.mask_func = mask_func
-        self.scale = scale
-
-        self.input_in_float16 = self.input_in_fp16 or self.input_in_bf16
-
-    def forward(self, input, mask):
-        if self.input_in_float16 and self.softmax_in_fp32:
-            input = input.float()
-
-        if self.scale is not None:
-            input = input * self.scale
-        mask_output = self.mask_func(input, mask) if mask is not None else input
-        probs = torch.nn.Softmax(dim=-1)(mask_output)
-        all_k_masked = mask.all(axis=-1)
-        zero_attention_mask = (1.0 - all_k_masked.float())[:, :, :, None]
-        probs = probs * zero_attention_mask
-
-        if self.input_in_float16 and self.softmax_in_fp32:
-            if self.input_in_fp16:
-                probs = probs.half()
-            else:
-                probs = probs.bfloat16()
-        return probs
-
-
 def get_export_format(filename: str):
     _, ext = os.path.splitext(filename)
     try:
@@ -351,7 +315,13 @@ def replace_MatchedScaleMaskSoftmax(n: nn.Module) -> Optional[nn.Linear]:
         exportable module
     """
 
-    mod = ExportableMatchedScaleMaskSoftmax(n.input_in_fp16, n.input_in_bf16, n.mask_func, n.softmax_in_fp32, n.scale)
+    # including the import here to avoid circular imports
+    from nemo.collections.nlp.modules.common.megatron.fused_softmax import MatchedScaleMaskSoftmax
+
+    # disabling fusion for the MatchedScaleMaskSoftmax
+    mod = MatchedScaleMaskSoftmax(
+        n.input_in_fp16, n.input_in_bf16, n.attn_mask_type, False, n.mask_func, n.softmax_in_fp32, n.scale
+    )
 
     return mod
 
@@ -425,7 +395,7 @@ def script_module(m: nn.Module):
     "BatchNorm1d": wrap_module(nn.BatchNorm1d, CastToFloat),
     "BatchNorm2d": wrap_module(nn.BatchNorm2d, CastToFloat),
     "LayerNorm": wrap_module(nn.LayerNorm, CastToFloat),
-    "MatchedScaleMaskSoftmax": wrap_module(nn.Softmax, ExportableMatchedScaleMaskSoftmax),
+    "MatchedScaleMaskSoftmax": wrap_module(None, replace_MatchedScaleMaskSoftmax),
 }
 
 script_replacements = {

From 015bc95d2968d886ae693e6f4c7114e0efc72b2f Mon Sep 17 00:00:00 2001
From: Boris Fomitchev <borisfom@users.noreply.github.com>
Date: Wed, 23 Nov 2022 15:19:58 -0800
Subject: [PATCH 08/20] Export fixes for Riva (#5496)

* Export fixes for Riva

Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>

* Cleaning up training_utils

Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>

Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>
---
 nemo/utils/export_utils.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/nemo/utils/export_utils.py b/nemo/utils/export_utils.py
index 63faee83bf3c..863c7fd1d2ec 100644
--- a/nemo/utils/export_utils.py
+++ b/nemo/utils/export_utils.py
@@ -165,8 +165,15 @@ def run_ts_and_compare(ts_model, ts_input_list, ts_input_dict, output_example, c
 
         if torch.is_tensor(expected):
             tout = out.to('cpu')
-            logging.debug(f"Checking output {i}, shape: {expected.shape}:\n{expected}\n{tout}")
-            if not torch.allclose(tout, expected.cpu(), rtol=check_tolerance, atol=check_tolerance):
+            logging.debug(f"Checking output {i}, shape: {expected.shape}:\n")
+            this_good = True
+            try:
+                if not torch.allclose(tout, expected.cpu(), rtol=check_tolerance, atol=check_tolerance):
+                    this_good = False
+            except Exception:  # there may ne size mismatch and it may be OK
+                this_good = False
+            if not this_good:
+                logging.info(f"Results mismatch! PyTorch(expected):\n{expected}\nTorchScript:\n{tout}")
                 all_good = False
     return all_good
 
@@ -180,9 +187,14 @@ def run_ort_and_compare(sess, ort_input, output_example, check_tolerance=0.01):
 
         if torch.is_tensor(expected):
             tout = torch.from_numpy(out)
-            logging.debug(f"Checking output {i}, shape: {expected.shape}:\n{expected}\n{tout}")
-            if not torch.allclose(tout, expected.cpu(), rtol=check_tolerance, atol=100 * check_tolerance):
-                all_good = False
+            logging.debug(f"Checking output {i}, shape: {expected.shape}:\n")
+            this_good = True
+            try:
+                if not torch.allclose(tout, expected.cpu(), rtol=check_tolerance, atol=100 * check_tolerance):
+                    this_good = False
+            except Exception:  # there may ne size mismatch and it may be OK
+                this_good = False
+            if not this_good:
                 logging.info(f"onnxruntime results mismatch! PyTorch(expected):\n{expected}\nONNXruntime:\n{tout}")
                 all_good = False
     return all_good

From 51dee701aa60a169586ae986e45be2aff9abf8d8 Mon Sep 17 00:00:00 2001
From: David <amosalla@asu.edu>
Date: Sun, 4 Dec 2022 22:14:05 -0700
Subject: [PATCH 09/20] added set_start_method + function param bugfix (#5539)

* added set_start_method + function param bugfix

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* upper bound torchmetrics

Signed-off-by: ericharper <complex451@gmail.com>

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>
Signed-off-by: ericharper <complex451@gmail.com>
Co-authored-by: David Mosallanezhad <dmosallanezh@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: ericharper <complex451@gmail.com>
---
 .../language_modeling/megatron_gpt_prompt_learning.py    | 2 ++
 .../nlp/language_modeling/megatron_t5_prompt_learning.py | 3 +++
 .../language_modeling/megatron_t5_seq2seq_finetune.py    | 9 +++++----
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py b/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py
index 1d0debb924f1..a7bb3351fe1c 100644
--- a/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py
+++ b/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from lightning_lite.plugins.environments import TorchElasticEnvironment
+import torch.multiprocessing as mp
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
@@ -30,6 +31,7 @@
 from nemo.utils import logging
 from nemo.utils.exp_manager import StatelessTimer, exp_manager
 
+mp.set_start_method("spawn", force=True)
 
 """
 This is an example of how to ptune/prompt-tune a pretrained GPT model.
diff --git a/examples/nlp/language_modeling/megatron_t5_prompt_learning.py b/examples/nlp/language_modeling/megatron_t5_prompt_learning.py
index 68c9f2cf5b30..a18c4ca84b6a 100644
--- a/examples/nlp/language_modeling/megatron_t5_prompt_learning.py
+++ b/examples/nlp/language_modeling/megatron_t5_prompt_learning.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from lightning_lite.plugins.environments import TorchElasticEnvironment
+import torch.multiprocessing as mp
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
@@ -30,6 +31,8 @@
 from nemo.utils import logging
 from nemo.utils.exp_manager import StatelessTimer, exp_manager
 
+mp.set_start_method("spawn", force=True)
+
 
 """
 This is an example of how to ptune/prompt-tune a pretrained T5 model.
diff --git a/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py b/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py
index c714c970b38c..0f513e387478 100644
--- a/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py
+++ b/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py
@@ -15,6 +15,7 @@
 import os
 import tempfile
 
+import torch.multiprocessing as mp
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
@@ -36,6 +37,8 @@
 from nemo.utils.exp_manager import StatelessTimer, exp_manager
 from nemo.utils.model_utils import inject_model_parallel_rank
 
+mp.set_start_method("spawn", force=True)
+
 
 def _modify_config(t5_cfg, cfg, add_cfg_to_tree=False):
     """
@@ -189,7 +192,7 @@ def main(cfg) -> None:
             model = load_from_nemo(MegatronT0Model, cfg, trainer, t5_cfg, modify_confg_fn=_modify_config)
         else:
             validate_checkpoint_loading_args(cfg.model.pretrained_checkpoint)
-            model = load_from_checkpoint_dir(MegatronT0Model, cfg, trainer, t5_cfg, modify_confg_fn=_modify_config)
+            model = load_from_checkpoint_dir(MegatronT0Model, cfg, trainer, modify_confg_fn=_modify_config)
     else:
         if cfg.model.restore_from_path:
             t5_cfg = MegatronT5FinetuneModel.restore_from(
@@ -198,9 +201,7 @@ def main(cfg) -> None:
             model = load_from_nemo(MegatronT5FinetuneModel, cfg, trainer, t5_cfg, modify_confg_fn=_modify_config)
         else:
             validate_checkpoint_loading_args(cfg.model.pretrained_checkpoint)
-            model = load_from_checkpoint_dir(
-                MegatronT5FinetuneModel, cfg, trainer, t5_cfg, modify_confg_fn=_modify_config
-            )
+            model = load_from_checkpoint_dir(MegatronT5FinetuneModel, cfg, trainer, modify_confg_fn=_modify_config)
 
     trainer.fit(model)
     trainer.validate(model)

From ba2d83e3bc3848f0bb42eb6fa5b6a3cea08a6afb Mon Sep 17 00:00:00 2001
From: Eric Harper <complex451@gmail.com>
Date: Mon, 5 Dec 2022 12:25:10 -0700
Subject: [PATCH 10/20] remove notebook (#5548)

Signed-off-by: ericharper <complex451@gmail.com>

Signed-off-by: ericharper <complex451@gmail.com>
---
 .../Non_English_Downstream_Tasks_(NER).ipynb  | 899 ------------------
 1 file changed, 899 deletions(-)
 delete mode 100644 tutorials/nlp/Non_English_Downstream_Tasks_(NER).ipynb

diff --git a/tutorials/nlp/Non_English_Downstream_Tasks_(NER).ipynb b/tutorials/nlp/Non_English_Downstream_Tasks_(NER).ipynb
deleted file mode 100644
index f088f8ca4627..000000000000
--- a/tutorials/nlp/Non_English_Downstream_Tasks_(NER).ipynb
+++ /dev/null
@@ -1,899 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "OETcTQlcguCm"
-   },
-   "outputs": [],
-   "source": [
-    "BRANCH = 'r1.13.0'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "o_0K1lsW1dj9"
-   },
-   "outputs": [],
-   "source": [
-    "\"\"\"\n",
-    "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n",
-    "\n",
-    "Instructions for setting up Colab are as follows:\n",
-    "1. Open a new Python 3 notebook.\n",
-    "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n",
-    "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
-    "4. Run this cell to set up dependencies.\n",
-    "\"\"\"\n",
-    "# If you're using Google Colab and not running locally, run this cell\n",
-    "\n",
-    "# install NeMo\n",
-    "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "pC0slAc0h9zN",
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# If you're not using Colab, you might need to upgrade jupyter notebook to avoid the following error:\n",
-    "# 'ImportError: IProgress not found. Please update jupyter and ipywidgets.'\n",
-    "\n",
-    "! pip install ipywidgets\n",
-    "! jupyter nbextension enable --py widgetsnbextension\n",
-    "\n",
-    "# Please restart the kernel after running this cell"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "dzqD2WDFOIN-"
-   },
-   "outputs": [],
-   "source": [
-    "from nemo.collections import nlp as nemo_nlp\n",
-    "from nemo.utils.exp_manager import exp_manager\n",
-    "\n",
-    "import os\n",
-    "import wget \n",
-    "import torch\n",
-    "import pytorch_lightning as pl\n",
-    "from omegaconf import OmegaConf\n",
-    "\n",
-    "import zipfile\n",
-    "import random\n",
-    "from glob import glob"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "daYw_Xll2ZR9"
-   },
-   "source": [
-    "# Tutorial Overview\n",
-    "In this tutorial, we will show how to use a pre-trained BERT language model on a non-English downstream task. Here we are going to use Persian language and Named entity recognition (NER) task as an example. Note, most of the rest downstream tasks supported in NeMo should work similarly for other languages. \n",
-    "\n",
-    "# Task Description\n",
-    "NER is the task of detecting and classifying key information (entities) in text.\n",
-    "For example, in a sentence:  `Mary lives in Santa Clara and works at NVIDIA`, we should detect that `Mary` is a person, `Santa Clara` is a location and `NVIDIA` is a company.\n",
-    "\n",
-    "In this tutorial we will be using [BERT language model](https://arxiv.org/abs/1810.04805).\n",
-    "\n",
-    "To read more about other topics and downstream task that can be done in NeMo, you can see the [NeMo's tutorial page](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/).\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "ZnuziSwJ1yEB"
-   },
-   "source": [
-    "# Dataset\n",
-    "\n",
-    "In this tutorial we are going to use [Persian Arman dataset for our NER task](https://github.com/HaniehP/PersianNER).\n",
-    "\n",
-    "Arman is a hand annotated Persian corpus for NER task with 250,015 tokens and 7,682 sentences. Using [IOB encoding](https://en.wikipedia.org/wiki/Inside%E2%80%93outside%E2%80%93beginning_(tagging)), tokens are labeled with either one of the following name entities or labeled with O.   \n",
-    "\n",
-    "* event = event\n",
-    "* fac = facility\n",
-    "* loc = location\n",
-    "* org = organization\n",
-    "* pers = person\n",
-    "* pro = product\n",
-    "\n",
-    "Each of these has a label staring with **B** that indicates it is the first token of the name entity and with **I** for others. \n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "qzcZ3nb_-SVT"
-   },
-   "source": [
-    "# NeMo Token Classification Data Format\n",
-    "\n",
-    "[TokenClassification Model](https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/nlp/models/token_classification/token_classification_model.py) in NeMo supports NER and other token level classification tasks, as long as the data follows the format specified below. \n",
-    "\n",
-    "Token Classification Model requires the data to be split into 2 files: \n",
-    "* text.txt  \n",
-    "* labels.txt. \n",
-    "\n",
-    "Each line of the **text.txt** file contains text sequences, where words are separated with spaces, i.e.: \n",
-    "[WORD] [SPACE] [WORD] [SPACE] [WORD].\n",
-    "\n",
-    "The **labels.txt** file contains corresponding labels for each word in text.txt, the labels are separated with spaces, i.e.:\n",
-    "[LABEL] [SPACE] [LABEL] [SPACE] [LABEL].\n",
-    "\n",
-    "Example of a text.txt file:\n",
-    "```\n",
-    "دبیر شورای عالی انقلاب فرهنگی از گنجانده شدن 5 زبان خارجی جدید در برنامه درسی مدارس خبر داد.\n",
-    "```\n",
-    "Corresponding labels.txt file:\n",
-    "```\n",
-    "O B_ORG I_ORG I_ORG I_ORG O O O O O O O O O O O O O O \n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "SL58EWkd2ZVb"
-   },
-   "source": [
-    "## Download and preprocess the data¶"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "_z2tCEIXZa90"
-   },
-   "source": [
-    "You can download the Arman dataset by cloning to the following github repository: https://github.com/HaniehP/PersianNER.\n",
-    "\n",
-    "After downloading the data, you will see a few files and folders inside a directory named PersianNER. Take ArmanPersoNERCorpus.zip and upload it to `DATA_DIR` (if running in a docker or locally) or use **files** from Google colab to upload the files.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "n8HZrDmr12_-"
-   },
-   "outputs": [],
-   "source": [
-    "# path to the folder with ArmanPersoNERCorpus.zip file (if running locally on in a docker)\n",
-    "DATA_DIR = \"PATH_TO_FOLDER_WITH_ZIP.ZIP_FILE\"\n",
-    "WORK_DIR = \"WORK_DIR\"\n",
-    "\n",
-    "# adding an empty subfolder for data (otherwise it can interact with existing folders in DATA_DIR)\n",
-    "subfolder = f\"{DATA_DIR}/non_eng_NER\"\n",
-    "\n",
-    "os.makedirs(WORK_DIR, exist_ok=True)\n",
-    "os.makedirs(DATA_DIR, exist_ok=True)\n",
-    "os.makedirs(subfolder, exist_ok=True)\n",
-    "\n",
-    "! cp $DATA_DIR/ArmanPersoNERCorpus.zip $subfolder/.\n",
-    "DATA_DIR = f\"{DATA_DIR}/non_eng_NER\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "k1TmF5rrdPMj"
-   },
-   "outputs": [],
-   "source": [
-    "if 'google.colab' in str(get_ipython):\n",
-    "    from google.colab import files\n",
-    "    uploaded = files.upload() "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "HTUKJOownkrF"
-   },
-   "outputs": [],
-   "source": [
-    "if 'google.colab' in str(get_ipython):\n",
-    "  ! mv ArmanPersoNERCorpus.zip $DATA_DIR/."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "NhUzIeF0Yg0l"
-   },
-   "source": [
-    "Let's extract files from the zip file. It will generate three test and train files which have overlaps and are intended to be used in turn as train and test sets. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "Y01BdjPRW-7B"
-   },
-   "outputs": [],
-   "source": [
-    "! cd $DATA_DIR && unzip \"ArmanPersoNERCorpus.zip\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "qaDgL-sQaX2e"
-   },
-   "source": [
-    "Next, we will be putting all data into a single file and removing any repeated sentences. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "B0T4CzJvbBJ4"
-   },
-   "outputs": [],
-   "source": [
-    "file_all = os.path.join(DATA_DIR, \"all_data.txt\")\n",
-    "with open(file_all, \"w\") as f1:\n",
-    "  for filename in glob(f\"{DATA_DIR}/test_fold*.txt\") + glob(f\"{DATA_DIR}/train_fold*.txt\"):\n",
-    "    with open(filename, \"r\", encoding = \"ISO-8859-1\") as f2:\n",
-    "      for line in f2:\n",
-    "        f1.write(line)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "VzVuET8HESFB"
-   },
-   "source": [
-    "Now, you need to convert this data into NeMo compatible format before starting the training process. For this purpose, you can run [examples/nlp/token_classification/data/import_from_iob_format.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/token_classification/data/import_from_iob_format.py) on your train and dev files, as follows:\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "```\n",
-    "python examples/nlp/token_classification/data/import_from_iob_format.py --data_file PATH_TO_IOB_FORMAT_DATAFILE, e.g., \"DATA_DIR/all_data.txt\"\n",
-    "```\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "ord_6KlkeNl8"
-   },
-   "outputs": [],
-   "source": [
-    "!wget https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/nlp/token_classification/data/import_from_iob_format.py"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "IfSUkxffeSpL"
-   },
-   "outputs": [],
-   "source": [
-    "!python import_from_iob_format.py --data_file $DATA_DIR/all_data.txt"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Aj0rXbYXbivW"
-   },
-   "source": [
-    "Now we process the data to remove potentially any repeated sentences and then split them into train and dev sets. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "CgvnTlqzbq5-"
-   },
-   "outputs": [],
-   "source": [
-    "sent_dict = dict()\n",
-    "line_removed = dict()\n",
-    "line_counter = 0\n",
-    "with open(DATA_DIR + \"/text_all_not_repeated.txt\", \"w\") as f1:\n",
-    "    with open(DATA_DIR + \"/text_all_data.txt\", \"r\") as f2:\n",
-    "        for line in f2:\n",
-    "            line_counter += 1\n",
-    "            if (not line in sent_dict):\n",
-    "                sent_dict[line] = 1\n",
-    "                f1.write(line)\n",
-    "            else:\n",
-    "                line_removed[line_counter] = 1\n",
-    "#labels:\n",
-    "line_counter = 0\n",
-    "with open(DATA_DIR + \"/labels_all_not_repeated.txt\", \"w\") as f1:\n",
-    "    with open(DATA_DIR + \"/labels_all_data.txt\", \"r\") as f2:\n",
-    "        for line in f2:\n",
-    "            line_counter += 1\n",
-    "            if(not line_counter in line_removed):\n",
-    "                f1.write(line)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "0cO3crs_gXjt"
-   },
-   "source": [
-    "After preprocessing the data and removing repeated sentences, there will be 7668 total valid sentences. We will be using 85% of that as train and 15% as dev. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "7oHQYsMMbugP"
-   },
-   "outputs": [],
-   "source": [
-    "total_data = 7668\n",
-    "train_share = 0.85\n",
-    "used_lines_train = dict()\n",
-    "flag = 1\n",
-    "count = 0\n",
-    "while flag:\n",
-    "  idx = random.randint(1, total_data)\n",
-    "  if (not idx in used_lines_train):\n",
-    "    used_lines_train[idx] = 1\n",
-    "    count += 1\n",
-    "  if (count/total_data > train_share):\n",
-    "    flag = 0\n",
-    "\n",
-    "line_counter = 0\n",
-    "with open(DATA_DIR+ \"/text_train.txt\", \"w\") as f1:\n",
-    "  with open(DATA_DIR + \"/text_dev.txt\", \"w\") as f2:\n",
-    "    with open(DATA_DIR + \"/text_all_not_repeated.txt\", \"r\") as f3:\n",
-    "      for line in f3:\n",
-    "        line_counter += 1\n",
-    "        if (line_counter in used_lines_train):\n",
-    "          f1.write(line)\n",
-    "        else:\n",
-    "          f2.write(line)\n",
-    "\n",
-    "line_counter = 0\n",
-    "with open(DATA_DIR + \"/labels_train.txt\", \"w\") as f1:\n",
-    "  with open(DATA_DIR + \"/labels_dev.txt\", \"w\") as f2:\n",
-    "    with open(DATA_DIR + \"/labels_all_not_repeated.txt\", \"r\") as f3:\n",
-    "      for line in f3:\n",
-    "        line_counter += 1\n",
-    "        if (line_counter in used_lines_train):\n",
-    "          f1.write(line)\n",
-    "        else:\n",
-    "          f2.write(line)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "1Q-GWNwDbzKl"
-   },
-   "source": [
-    "Finally, we remove files that are not needed anymore."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "II20ustub5BF"
-   },
-   "outputs": [],
-   "source": [
-    "print(\"Removed files:\")\n",
-    "for filename in os.listdir(DATA_DIR):\n",
-    "    if (filename == \"text_dev.txt\" or filename == \"text_train.txt\" or filename == \"labels_dev.txt\" or filename == \"labels_train.txt\"):\n",
-    "      continue\n",
-    "    print(filename)\n",
-    "    os.remove(DATA_DIR + \"/\" + filename)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "U8Ty5_S7Ye8h"
-   },
-   "source": [
-    "Now, the data folder should contain these 4 files:"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "L8vsyh3JZH26"
-   },
-   "source": [
-    "\n",
-    "\n",
-    "* labels_dev.txt\n",
-    "* labels_train.txt\n",
-    "* text_dev.txt\n",
-    "* text_train.txt\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "qB0oLE4R9EhJ"
-   },
-   "outputs": [],
-   "source": [
-    "! ls -l $DATA_DIR"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "6UDPgadLN6SG"
-   },
-   "outputs": [],
-   "source": [
-    "# let's take a look at the data \n",
-    "print('Text:')\n",
-    "! head -n 5 {DATA_DIR}/text_train.txt\n",
-    "\n",
-    "print('\\nLabels:')\n",
-    "! head -n 5 {DATA_DIR}/labels_train.txt"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "_whKCxfTMo6Y"
-   },
-   "source": [
-    "# Model configuration\n",
-    "\n",
-    "Our Named Entity Recognition model is comprised of the pretrained [BERT](https://arxiv.org/pdf/1810.04805.pdf) model followed by a Token Classification layer.\n",
-    "\n",
-    "The model is defined in a config file which declares multiple important sections. They are:\n",
-    "- **model**: All arguments that are related to the Model - language model, token classifier, optimizer and schedulers, datasets and any other related information\n",
-    "\n",
-    "- **trainer**: Any argument to be passed to PyTorch Lightning"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "T1gA8PsJ13MJ"
-   },
-   "outputs": [],
-   "source": [
-    "MODEL_CONFIG = \"token_classification_config.yaml\"\n",
-    "# download the model's configuration file \n",
-    "config_dir = WORK_DIR + '/configs/'\n",
-    "os.makedirs(config_dir, exist_ok=True)\n",
-    "if not os.path.exists(config_dir + MODEL_CONFIG):\n",
-    "    print('Downloading config file...')\n",
-    "    wget.download(f'https://raw.githubusercontent.com/NVIDIA/NeMo/{BRANCH}/examples/nlp/token_classification/conf/' + MODEL_CONFIG, config_dir)\n",
-    "else:\n",
-    "    print ('config file is already exists')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "mX3KmWMvSUQw"
-   },
-   "outputs": [],
-   "source": [
-    "# this line will print the entire config of the model\n",
-    "config_path = f'{WORK_DIR}/configs/{MODEL_CONFIG}'\n",
-    "print(config_path)\n",
-    "config = OmegaConf.load(config_path)\n",
-    "print(OmegaConf.to_yaml(config))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "ZCgWzNBkaQLZ"
-   },
-   "source": [
-    "# Fine-tuning the model using Arman dataset\n",
-    "\n",
-    "Let's select a [`bert-base-multilingual-uncased`](https://huggingface.co/bert-base-multilingual-uncased) BERT model and fine-tune it on the Arman dataset.\n",
-    "\n",
-    "## Setting up Data within the config\n",
-    "\n",
-    "Among other things, the config file contains dictionaries called dataset, train_ds and validation_ds. These are configurations used to setup the Dataset and DataLoaders of the corresponding config.\n",
-    "\n",
-    "We assume that both training and evaluation files are in the same directory and use the default names mentioned during the data download step. \n",
-    "So, to start model training, we simply need to specify `model.dataset.data_dir`, like we are going to do below.\n",
-    "\n",
-    "Also notice that some config lines, including `model.dataset.data_dir`, have `???` in place of paths, this means that values for these fields are required to be specified by the user.\n",
-    "\n",
-    "Let us now add the data directory path to the config.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "LQHCJN-ZaoLp"
-   },
-   "outputs": [],
-   "source": [
-    "# in this tutorial train and dev datasets are located in the same folder, so it is enought to add the path of the data directory to the config\n",
-    "config.model.dataset.data_dir = DATA_DIR\n",
-    "\n",
-    "# if you want to use the full dataset, set NUM_SAMPLES to -1\n",
-    "NUM_SAMPLES = 1000\n",
-    "config.model.train_ds.num_samples = NUM_SAMPLES\n",
-    "config.model.validation_ds.num_samples = NUM_SAMPLES\n",
-    "\n",
-    "# for demonstartion purposes we're running only a single epoch\n",
-    "config.trainer.max_epochs = 5\n",
-    "print(OmegaConf.to_yaml(config.model))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "nB96-3sTc3yk"
-   },
-   "source": [
-    "## Building the PyTorch Lightning Trainer\n",
-    "\n",
-    "NeMo models are primarily PyTorch Lightning modules - and therefore are entirely compatible with the PyTorch Lightning ecosystem.\n",
-    "\n",
-    "Let's first instantiate a Trainer object"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "1tG4FzZ4Ui60"
-   },
-   "outputs": [],
-   "source": [
-    "print(\"Trainer config - \\n\")\n",
-    "print(OmegaConf.to_yaml(config.trainer))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "knF6QeQQdMrH"
-   },
-   "outputs": [],
-   "source": [
-    "# lets modify some trainer configs\n",
-    "# checks if we have GPU available and uses it\n",
-    "accelerator = 'gpu' if torch.cuda.is_available() else 'cpu'\n",
-    "config.trainer.devices = 1\n",
-    "config.trainer.accelerator = accelerator\n",
-    "\n",
-    "config.trainer.precision = 16 if torch.cuda.is_available() else 32\n",
-    "\n",
-    "# for mixed precision training, uncomment the line below (precision should be set to 16 and amp_level to O1):\n",
-    "# config.trainer.amp_level = O1\n",
-    "\n",
-    "# remove distributed training flags\n",
-    "config.trainer.strategy = None\n",
-    "\n",
-    "# setup max number of steps to reduce training time for demonstration purposes of this tutorial\n",
-    "config.trainer.max_steps = 32\n",
-    "\n",
-    "config.exp_manager.exp_dir = WORK_DIR\n",
-    "trainer = pl.Trainer(**config.trainer)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "8IlEMdVxdr6p"
-   },
-   "source": [
-    "## Setting up a NeMo Experiment¶\n",
-    "\n",
-    "NeMo has an experiment manager that handles logging and checkpointing for us, so let's use it:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "exp_manager(trainer, config.get(\"exp_manager\", None))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "8uztqGAmdrYt"
-   },
-   "outputs": [],
-   "source": [
-    "exp_dir = config.exp_manager.exp_dir\n",
-    "\n",
-    "# the exp_dir provides a path to the current experiment for easy access\n",
-    "exp_dir = str(exp_dir)\n",
-    "exp_dir"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "8tjLhUvL_o7_"
-   },
-   "source": [
-    "Before initializing the model, we might want to modify some of the model configs. For example, we might want to modify the pretrained BERT model:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "Xeuc2i7Y_nP5"
-   },
-   "outputs": [],
-   "source": [
-    "# get the list of supported BERT-like models, for the complete list of HugginFace models, see https://huggingface.co/models\n",
-    "print(nemo_nlp.modules.get_pretrained_lm_models_list(include_external=False))\n",
-    "\n",
-    "# specify BERT-like model, you want to use\n",
-    "PRETRAINED_BERT_MODEL = \"bert-base-multilingual-uncased\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "fzNZNAVRjDD-"
-   },
-   "source": [
-    "Now, we are ready to initialize our model. During the model initialization call, the dataset and data loaders we'll be prepared for training and evaluation.\n",
-    "Also, the pretrained BERT model will be downloaded, note it can take up to a few minutes depending on the size of the chosen BERT model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "NgsGLydWo-6-"
-   },
-   "outputs": [],
-   "source": [
-    "model = nemo_nlp.models.TokenClassificationModel(cfg=config.model, trainer=trainer)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "kQ592Tx4pzyB"
-   },
-   "source": [
-    "## Monitoring training progress\n",
-    "Optionally, you can create a Tensorboard visualization to monitor training progress."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "mTJr16_pp0aS"
-   },
-   "outputs": [],
-   "source": [
-    "try:\n",
-    "  from google import colab\n",
-    "  COLAB_ENV = True\n",
-    "except (ImportError, ModuleNotFoundError):\n",
-    "  COLAB_ENV = False\n",
-    "\n",
-    "# Load the TensorBoard notebook extension\n",
-    "if COLAB_ENV:\n",
-    "  %load_ext tensorboard\n",
-    "  %tensorboard --logdir {exp_dir}\n",
-    "else:\n",
-    "  print(\"To use tensorboard, please use this notebook in a Google Colab environment.\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Fj1pdEdD0Vm3"
-   },
-   "source": [
-    "See how it performs before fine-tuning"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "wo1oVGIT0aBZ"
-   },
-   "outputs": [],
-   "source": [
-    "# define the list of queries for inference\n",
-    "queries = [\n",
-    "    'حمید طاهایی افزود : برای اجرای این طرحها 0 میلیارد و 0 میلیون ریال اعتبار هزینه شده است . ',\n",
-    "    'دکتر اصغری دبیر چهارمین همایش انجمن زمین‌شناسی ایران در این زمینه گفت : از مجموع چهار صد مقاله رسیده به دبیرخانه همایش ، يك صد و هشتاد مقاله ظرف مدت دو روز در هشت سالن همایش برگزار شد . '\n",
-    "]\n",
-    "results = model.add_predictions(queries)\n",
-    "\n",
-    "for query, result in zip(queries, results):\n",
-    "    print()\n",
-    "    print(f'Query : {query}')\n",
-    "    print(f'Result: {result.strip()}\\n')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "kyElt0Es-aSk"
-   },
-   "outputs": [],
-   "source": [
-    "print(\"Trainer config - \\n\")\n",
-    "print(OmegaConf.to_yaml(config.trainer))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "hUvnSpyjp0Dh"
-   },
-   "outputs": [],
-   "source": [
-    "# start model training\n",
-    "trainer.fit(model)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "MOrR0PeJqa0j"
-   },
-   "source": [
-    "After the training is complete, `.nemo` file that contains model's checkpoints and all associated artifacts could be found under `nemo_experiments/token_classification_model/DATE_TIME`"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "-lFo27PJ0o3W"
-   },
-   "source": [
-    "See how it gets better after:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "9fNcBnz80rLO"
-   },
-   "outputs": [],
-   "source": [
-    "results = model.add_predictions(queries)\n",
-    "\n",
-    "for query, result in zip(queries, results):\n",
-    "    print()\n",
-    "    print(f'Query : {query}')\n",
-    "    print(f'Result: {result.strip()}\\n')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "JxBiIKMlH8yv"
-   },
-   "source": [
-    "After training for 100 epochs, with the default config and NUM_SAMPLES = -1 (i.e. all data is used), your model performance should look similar to this: \n",
-    "```\n",
-    "    label                                                precision    recall       f1           support\n",
-    "    O (label_id: 0)                                         99.09      99.19      99.14      32867\n",
-    "    B-event (label_id: 1)                                   67.74      70.00      68.85         90\n",
-    "    B-fac (label_id: 2)                                     70.89      73.68      72.26         76\n",
-    "    B-loc (label_id: 3)                                     87.45      82.70      85.01        497\n",
-    "    B-org (label_id: 4)                                     81.88      87.06      84.39        649\n",
-    "    B-pers (label_id: 5)                                    94.93      93.36      94.14        542\n",
-    "    B-pro (label_id: 6)                                     79.31      70.41      74.59         98\n",
-    "    I-event (label_id: 7)                                   87.38      74.72      80.55        352\n",
-    "    I-fac (label_id: 8)                                     83.08      77.14      80.00        140\n",
-    "    I-loc (label_id: 9)                                     77.78      73.39      75.52        124\n",
-    "    I-org (label_id: 10)                                    86.51      89.93      88.18        834\n",
-    "    I-pers (label_id: 11)                                   95.30      94.35      94.82        301\n",
-    "    I-pro (label_id: 12)                                    82.86      86.57      84.67         67\n",
-    "    -------------------\n",
-    "    micro avg                                               97.78      97.78      97.78      36637\n",
-    "    macro avg                                               84.17      82.50      83.24      36637\n",
-    "    weighted avg                                            97.78      97.78      97.77      36637\n",
-    "```\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "VZp9STMHQAp1"
-   },
-   "source": [
-    "**References**\n",
-    "\n",
-    "1. Devlin, Jacob, et al. \"BERT: Pre-training of deep bidirectional transformers for language understanding.\" arXiv preprint arXiv:1810.04805 (2018).\n",
-    "\n",
-    "2. Hanieh Poostchi, Ehsan Zare Borzeshi, Mohammad Abdous, and Massimo Piccardi, \"PersoNER: Persian Named-Entity Recognition,\" The 26th International Conference on Computational Linguistics (COLING 2016), pages 3381–3389, Osaka, Japan, 2016.\n",
-    "\n",
-    "3. Hanieh Poostchi, Ehsan Zare Borzeshi, and Massimo Piccardi, \"BiLSTM-CRF for Persian Named-Entity Recognition; ArmanPersoNERCorpus: the First Entity-Annotated Persian Dataset,\" The 11th Edition of the Language Resources and Evaluation Conference (LREC), Miyazaki, Japan, 7-12 May 2018, ISLRN 399-379-640-828-6, ISLRN 921-509-141-609-6."
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "collapsed_sections": [],
-   "name": "Non_English_Downstream_Tasks_(NER).ipynb",
-   "private_outputs": true,
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.13"
-  },
-  "pycharm": {
-   "stem_cell": {
-    "cell_type": "raw",
-    "metadata": {
-     "collapsed": false
-    },
-    "source": []
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}

From 6cde3edd34b4aab10cb06f989316d24f3e7ddca2 Mon Sep 17 00:00:00 2001
From: ericharper <complex451@gmail.com>
Date: Wed, 7 Dec 2022 10:31:17 -0700
Subject: [PATCH 11/20] update readme

Signed-off-by: ericharper <complex451@gmail.com>
---
 README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index 06ea863600e7..c2334f5e2336 100644
--- a/README.rst
+++ b/README.rst
@@ -224,7 +224,7 @@ Install it manually if not using the NVIDIA PyTorch container.
 
     git clone https://github.com/ericharper/apex.git
     cd apex
-    git checkout nm_v1.11.0
+    git checkout nm_v1.13.0
     pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--fast_layer_norm" --global-option="--distributed_adam" --global-option="--deprecated_fused_adam" ./
 
 Transformer Engine

From 5e5bfab85510874a4615e567727363836810bae0 Mon Sep 17 00:00:00 2001
From: ericharper <complex451@gmail.com>
Date: Wed, 7 Dec 2022 14:28:33 -0700
Subject: [PATCH 12/20] update branch

Signed-off-by: ericharper <complex451@gmail.com>
---
 Jenkinsfile                                   | 288 +++++++++---------
 nemo/package_info.py                          |   2 +-
 tutorials/00_NeMo_Primer.ipynb                |   2 +-
 tutorials/01_NeMo_Models.ipynb                |   2 +-
 tutorials/02_NeMo_Adapters.ipynb              |   2 +-
 tutorials/AudioTranslationSample.ipynb        |   2 +-
 ...blish_NeMo_Model_On_Hugging_Face_Hub.ipynb |   2 +-
 tutorials/VoiceSwapSample.ipynb               |   2 +-
 .../asr/ASR_CTC_Language_Finetuning.ipynb     |   2 +-
 tutorials/asr/ASR_for_telephony_speech.ipynb  |   2 +-
 tutorials/asr/ASR_with_NeMo.ipynb             |   4 +-
 .../asr/ASR_with_Subword_Tokenization.ipynb   |   2 +-
 tutorials/asr/ASR_with_Transducers.ipynb      |   2 +-
 .../asr/Buffered_Transducer_Inference.ipynb   |   2 +-
 ..._Transducer_Inference_with_LCS_Merge.ipynb |   2 +-
 tutorials/asr/Intro_to_Transducers.ipynb      |   2 +-
 tutorials/asr/Multilang_ASR.ipynb             |   2 +-
 tutorials/asr/Offline_ASR.ipynb               |   2 +-
 .../Offline_ASR_with_VAD_for_CTC_models.ipynb |   2 +-
 .../asr/Online_ASR_Microphone_Demo.ipynb      |   2 +-
 tutorials/asr/Online_Noise_Augmentation.ipynb |   2 +-
 .../Online_Offline_Microphone_VAD_Demo.ipynb  |   2 +-
 .../Online_Offline_Speech_Commands_Demo.ipynb |   2 +-
 .../asr/Self_Supervised_Pre_Training.ipynb    |   2 +-
 tutorials/asr/Speech_Commands.ipynb           |   2 +-
 tutorials/asr/Streaming_ASR.ipynb             |   2 +-
 tutorials/asr/Voice_Activity_Detection.ipynb  |   2 +-
 .../asr/asr_adapters/ASR_with_Adapters.ipynb  |   2 +-
 ...Language_Models_for_Downstream_Tasks.ipynb |   2 +-
 tutorials/nlp/02_NLP_Tokenizers.ipynb         |   4 +-
 ...a_Preprocessing_and_Cleaning_for_NMT.ipynb |   2 +-
 tutorials/nlp/Dialogue.ipynb                  |   2 +-
 tutorials/nlp/Entity_Linking_Medical.ipynb    |   2 +-
 tutorials/nlp/GLUE_Benchmark.ipynb            |   2 +-
 ...Joint_Intent_and_Slot_Classification.ipynb |   2 +-
 tutorials/nlp/MegatronBert_export.ipynb       |   2 +-
 ...on_Synthetic_Tabular_Data_Generation.ipynb |   2 +-
 .../nlp/Multitask_Prompt_and_PTuning.ipynb    |   2 +-
 .../nlp/Punctuation_and_Capitalization.ipynb  |   2 +-
 ...ion_and_Capitalization_Lexical_Audio.ipynb |   2 +-
 tutorials/nlp/Question_Answering.ipynb        |   2 +-
 .../nlp/Relation_Extraction-BioMegatron.ipynb |   2 +-
 tutorials/nlp/Text2Sparql.ipynb               |   4 +-
 ...xt_Classification_Sentiment_Analysis.ipynb |   2 +-
 .../Token_Classification-BioMegatron.ipynb    |   2 +-
 ...ssification_Named_Entity_Recognition.ipynb |   4 +-
 .../nlp/Zero_Shot_Intent_Recognition.ipynb    |   2 +-
 .../ASR_with_SpeakerDiarization.ipynb         |   2 +-
 .../Speaker_Diarization_Inference.ipynb       |   2 +-
 .../Speaker_Diarization_Training.ipynb        |   2 +-
 .../Speaker_Identification_Verification.ipynb |   2 +-
 .../ITN_with_Thutmose_Tagger.ipynb            |   2 +-
 .../Text_(Inverse)_Normalization.ipynb        |   2 +-
 tutorials/text_processing/WFST_Tutorial.ipynb |   2 +-
 .../tools/CTC_Segmentation_Tutorial.ipynb     |   2 +-
 tutorials/tools/Multispeaker_Simulator.ipynb  |   2 +-
 .../tts/Aligner_Inference_Examples.ipynb      |   2 +-
 tutorials/tts/FastPitch_Finetuning.ipynb      |   2 +-
 .../tts/FastPitch_GermanTTS_Training.ipynb    |   2 +-
 .../tts/FastPitch_MixerTTS_Training.ipynb     |   2 +-
 .../tts/FastPitch_Speaker_Interpolation.ipynb |   2 +-
 .../tts/Inference_DurationPitchControl.ipynb  |   2 +-
 tutorials/tts/Inference_ModelSelect.ipynb     |   2 +-
 tutorials/tts/NeMo_TTS_Primer.ipynb           |   2 +-
 tutorials/tts/Tacotron2_Training.ipynb        |   2 +-
 65 files changed, 212 insertions(+), 212 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 212eec02697b..3da4efea155b 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -93,8 +93,8 @@ pipeline {
     stage('L0: Unit Tests CPU') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       steps {
@@ -106,8 +106,8 @@ pipeline {
     stage('L0: TN/ITN Tests CPU') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -139,8 +139,8 @@ pipeline {
     stage('L2: NeMo text processing') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -191,8 +191,8 @@ pipeline {
     stage('L2: ASR dev run') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -291,8 +291,8 @@ pipeline {
     stage('L2: ASR dev run - part two') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -322,8 +322,8 @@ pipeline {
     stage('L2: Speaker dev run') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -444,8 +444,8 @@ pipeline {
     // stage('L2: ASR DALI dev run') {
     //   when {
     //     anyOf {
-    //       branch 'r1.13.0'
-    //       changeRequest target: 'r1.13.0'
+    //       branch 'main'
+    //       changeRequest target: 'main'
     //     }
     //   }
     //   failFast true
@@ -512,8 +512,8 @@ pipeline {
     // stage('L2: ASR RNNT dev run') {
     //   when {
     //     anyOf {
-    //       branch 'r1.13.0'
-    //       changeRequest target: 'r1.13.0'
+    //       branch 'main'
+    //       changeRequest target: 'main'
     //     }
     //   }
     //   failFast true
@@ -604,8 +604,8 @@ pipeline {
     stage('L2: ASR Multi-dataloader dev run') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -652,8 +652,8 @@ pipeline {
     stage('L2: ASR Adapters') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -680,8 +680,8 @@ pipeline {
     stage('L2: Megatron T5 Adapter PP=2') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -724,8 +724,8 @@ pipeline {
     stage('L2: Megatron T5 Adapter TP=2') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -766,8 +766,8 @@ pipeline {
     stage('L2: Megatron T5 IA3 PP=2') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -810,8 +810,8 @@ pipeline {
     stage('L2: Megatron T5 IA3 TP=2') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -852,8 +852,8 @@ pipeline {
     stage('L2: Megatron GPT Adapter TP=2') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -893,8 +893,8 @@ pipeline {
     stage('L2: Megatron GPT Adapter PP=2') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -935,8 +935,8 @@ pipeline {
     stage('L2: Speech Transcription') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -957,8 +957,8 @@ pipeline {
     stage('L2: Segmentation Tool') {
       when {
             anyOf {
-              branch 'r1.13.0'
-              changeRequest target: 'r1.13.0'
+              branch 'main'
+              changeRequest target: 'main'
             }
       }
       stages {
@@ -1013,8 +1013,8 @@ pipeline {
     stage('L2: G2P Models') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -1094,8 +1094,8 @@ pipeline {
     // stage('L2: Multi-GPU Megatron finetuning') {
     //   when {
     //     anyOf {
-    //       branch 'r1.13.0'
-    //       changeRequest target: 'r1.13.0'
+    //       branch 'main'
+    //       changeRequest target: 'main'
     //     }
     //   }
     //   failFast true
@@ -1121,8 +1121,8 @@ pipeline {
     stage('L2: STS-b') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -1181,8 +1181,8 @@ pipeline {
     stage('L2: Dialogue Classification') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -1352,8 +1352,8 @@ pipeline {
     stage('L2: Dialogue Generation') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -1418,8 +1418,8 @@ pipeline {
 //     stage('L2: Dialogue Generation Part 2') {
 //       when {
 //         anyOf {
-//           branch 'r1.13.0'
-//           changeRequest target: 'r1.13.0'
+//           branch 'main'
+//           changeRequest target: 'main'
 //         }
 //       }
 //       failFast true
@@ -1448,8 +1448,8 @@ pipeline {
     stage('L2: COPY') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -1478,8 +1478,8 @@ pipeline {
     stage('L2: Duplex Text Normalization') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -1516,8 +1516,8 @@ pipeline {
     // stage('L2: MegaBERT Token Classification') {
     //   when {
     //     anyOf {
-    //       branch 'r1.13.0'
-    //       changeRequest target: 'r1.13.0'
+    //       branch 'main'
+    //       changeRequest target: 'main'
     //     }
     //   }
     //   failFast true
@@ -1542,8 +1542,8 @@ pipeline {
     stage('L2: BERT Text Classification') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -1571,8 +1571,8 @@ pipeline {
     stage('L2: Parallel BERT Question-Answering SQUAD v1.1 & v2.0') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -1630,8 +1630,8 @@ pipeline {
     stage('L2: Parallel BART Question-Answering SQUAD v1.1 & v2.0') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -1691,8 +1691,8 @@ pipeline {
     stage('L2: Parallel GPT2 Question-Answering SQUAD v1.1 & v2.0') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -1752,8 +1752,8 @@ pipeline {
     stage('L2: Intent and Slot Classification Tasks') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -1792,8 +1792,8 @@ pipeline {
     // stage('L2: Model Parallel Size 2 Megatron Text Classification') {
     //   when {
     //     anyOf{
-    //       branch 'r1.13.0'
-    //       changeRequest target: 'r1.13.0'
+    //       branch 'main'
+    //       changeRequest target: 'main'
     //     }
     //   }
     //   failFast true
@@ -1821,8 +1821,8 @@ pipeline {
     // stage('L2: Model Parallel Size 2 Megatron Autoresume') {
     //   when {
     //     anyOf{
-    //       branch 'r1.13.0'
-    //       changeRequest target: 'r1.13.0'
+    //       branch 'main'
+    //       changeRequest target: 'main'
     //     }
     //   }
     //   failFast true
@@ -1852,8 +1852,8 @@ pipeline {
     // stage('L2: Model Parallel Size 2 Megatron Evaluation from .nemo') {
     //   when {
     //     anyOf{
-    //       branch 'r1.13.0'
-    //       changeRequest target: 'r1.13.0'
+    //       branch 'main'
+    //       changeRequest target: 'main'
     //     }
     //   }
     //   failFast true
@@ -1873,8 +1873,8 @@ pipeline {
     // stage('L2: Model Parallel Size 2 Megatron Train from .nemo') {
     //   when {
     //     anyOf{
-    //       branch 'r1.13.0'
-    //       changeRequest target: 'r1.13.0'
+    //       branch 'main'
+    //       changeRequest target: 'main'
     //     }
     //   }
     //   failFast true
@@ -1896,8 +1896,8 @@ pipeline {
     stage('L2: Parallel NLP Examples 2') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -2021,8 +2021,8 @@ pipeline {
     stage('Punctuation & Capitalization tarred dataset') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -2080,8 +2080,8 @@ pipeline {
     stage('Punctuation & Capitalization, Different ways of passing labels to model') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -2188,8 +2188,8 @@ pipeline {
     stage('Punctuation & Capitalization inference') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -2214,8 +2214,8 @@ pipeline {
     stage('L2: Parallel Pretraining BERT pretraining from Text/Preprocessed') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -2276,8 +2276,8 @@ pipeline {
     stage('L2: Entity Linking') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -2304,8 +2304,8 @@ pipeline {
     stage('L2: NMT Attention is All You Need Training') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -2427,8 +2427,8 @@ pipeline {
     stage('L2: NMT Attention is All You Need Inference') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -2463,8 +2463,8 @@ pipeline {
     stage('L2: NMT Attention is All You Need Finetuning') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -2497,8 +2497,8 @@ pipeline {
     stage('L2: NMT with HuggingFace') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -2574,8 +2574,8 @@ pipeline {
     stage('L2: NMT Tarred Dataset Creation') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -2628,8 +2628,8 @@ pipeline {
     stage('L2: Megatron NMT Training TP=2') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -2723,8 +2723,8 @@ pipeline {
     // stage('L2: NMT Bottleneck Fallback') {
     //   when {
     //     anyOf {
-    //       branch 'r1.13.0'
-    //       changeRequest target: 'r1.13.0'
+    //       branch 'main'
+    //       changeRequest target: 'main'
     //     }
     //   }
     //   failFast true
@@ -2770,8 +2770,8 @@ pipeline {
     // stage('L2: NMT Bottleneck Architecture') {
     //   when {
     //     anyOf {
-    //       branch 'r1.13.0'
-    //       changeRequest target: 'r1.13.0'
+    //       branch 'main'
+    //       changeRequest target: 'main'
     //     }
     //   }
     //   failFast true
@@ -2853,8 +2853,8 @@ pipeline {
     // stage('L2: NMT Bottleneck LVM') {
     //   when {
     //     anyOf {
-    //       branch 'r1.13.0'
-    //       changeRequest target: 'r1.13.0'
+    //       branch 'main'
+    //       changeRequest target: 'main'
     //     }
     //   }
     //   failFast true
@@ -3006,8 +3006,8 @@ pipeline {
     stage('L2: Megatron Bert Pretraining and Resume Training') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -3077,8 +3077,8 @@ pipeline {
     stage('L2: Megatron RETRO Pretraining and Resume Training') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -3149,8 +3149,8 @@ pipeline {
     stage('L2: Megatron RETRO muTransfer Pretraining Performance') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -3232,8 +3232,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: BioMegatron Bert NER Task') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -3250,8 +3250,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron GPT Pretraining and Resume Training TP=2') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -3322,8 +3322,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron GPT Pretraining and Resume Training PP=2') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -3394,8 +3394,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron GPT Eval') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -3411,8 +3411,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron GPT Eval PP2') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -3460,8 +3460,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron GPT Prompt Learning') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -3533,8 +3533,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     // stage('L2: Megatron GPT Convert from Megatron-LM checkpoing and Eval') {
     //   when {
     //     anyOf {
-    //       branch 'r1.13.0'
-    //       changeRequest target: 'r1.13.0'
+    //       branch 'main'
+    //       changeRequest target: 'main'
     //     }
     //   }
     //   failFast true
@@ -3560,8 +3560,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron Change Partitions') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -3599,8 +3599,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron T5 Pretraining and Resume Training TP=2') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -3695,8 +3695,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron T5 Pretraining and Resume Training PP=2') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -3809,8 +3809,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron T5 Prompt Learning') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -3908,8 +3908,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron UL2 Pretraining and Resume Training TP=2') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -3988,8 +3988,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron T5 Eval') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -4005,8 +4005,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron BART Pretraining and Resume Training, TP=2') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -4074,8 +4074,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron BART Pretraining and Resume Training, PP=2') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -4147,8 +4147,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: Megatron T5 GLUE/XNLI Finetuning') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
@@ -4220,8 +4220,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L2: TTS Fast dev runs 1') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       parallel {
@@ -4366,8 +4366,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
     stage('L??: Speech Checkpoints tests') {
       when {
         anyOf {
-          branch 'r1.13.0'
-          changeRequest target: 'r1.13.0'
+          branch 'main'
+          changeRequest target: 'main'
         }
       }
       failFast true
diff --git a/nemo/package_info.py b/nemo/package_info.py
index 3570f1ff1f6f..071179db59c3 100644
--- a/nemo/package_info.py
+++ b/nemo/package_info.py
@@ -16,7 +16,7 @@
 MAJOR = 1
 MINOR = 14
 PATCH = 0
-PRE_RELEASE = ''
+PRE_RELEASE = 'rc0'
 
 # Use the following formatting: (major, minor, patch, pre-release)
 VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE)
diff --git a/tutorials/00_NeMo_Primer.ipynb b/tutorials/00_NeMo_Primer.ipynb
index aac1ee3b72c6..5e5dcbb92c1e 100644
--- a/tutorials/00_NeMo_Primer.ipynb
+++ b/tutorials/00_NeMo_Primer.ipynb
@@ -42,7 +42,7 @@
         "!pip install text-unidecode\n",
         "\n",
         "# ## Install NeMo\n",
-        "BRANCH = 'r1.13.0'\n",
+        "BRANCH = 'main'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "## Install TorchAudio\n",
diff --git a/tutorials/01_NeMo_Models.ipynb b/tutorials/01_NeMo_Models.ipynb
index c537f2c86855..6f230e62c1a3 100644
--- a/tutorials/01_NeMo_Models.ipynb
+++ b/tutorials/01_NeMo_Models.ipynb
@@ -37,7 +37,7 @@
         "!pip install text-unidecode\n",
         "\n",
         "# ## Install NeMo\n",
-        "BRANCH = 'r1.13.0'\n",
+        "BRANCH = 'main'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "## Install TorchAudio\n",
diff --git a/tutorials/02_NeMo_Adapters.ipynb b/tutorials/02_NeMo_Adapters.ipynb
index c7c6bd32137e..75942c6bf4af 100644
--- a/tutorials/02_NeMo_Adapters.ipynb
+++ b/tutorials/02_NeMo_Adapters.ipynb
@@ -25,7 +25,7 @@
         "!pip install text-unidecode\n",
         "\n",
         "# ## Install NeMo\n",
-        "BRANCH = 'r1.13.0'\n",
+        "BRANCH = 'main'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "## Grab the config we'll use in this example\n",
diff --git a/tutorials/AudioTranslationSample.ipynb b/tutorials/AudioTranslationSample.ipynb
index f0ab7df20199..c4fec16c4181 100644
--- a/tutorials/AudioTranslationSample.ipynb
+++ b/tutorials/AudioTranslationSample.ipynb
@@ -38,7 +38,7 @@
             },
             "outputs": [],
             "source": [
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
                 "\n",
                 "# install Pynini for text normalization\n",
diff --git a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb
index a13174033e0c..1b951e7b9e8c 100644
--- a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb
+++ b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb
@@ -41,7 +41,7 @@
         "!pip install text-unidecode\n",
         "\n",
         "### Install NeMo\n",
-        "BRANCH = 'r1.13.0'\n",
+        "BRANCH = 'main'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]"
       ]
     },
diff --git a/tutorials/VoiceSwapSample.ipynb b/tutorials/VoiceSwapSample.ipynb
index 7c895e4e6681..016737f26a9f 100644
--- a/tutorials/VoiceSwapSample.ipynb
+++ b/tutorials/VoiceSwapSample.ipynb
@@ -39,7 +39,7 @@
             },
             "outputs": [],
             "source": [
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
                 "\n",
                 "# install Pynini for text normalization\n",
diff --git a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb
index 27b229af8a4c..aad696e667b9 100644
--- a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb
+++ b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb
@@ -39,7 +39,7 @@
         "!pip install matplotlib>=3.3.2\n",
         "\n",
         "## Install NeMo\n",
-        "BRANCH = 'r1.13.0'\n",
+        "BRANCH = 'main'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "\"\"\"\n",
diff --git a/tutorials/asr/ASR_for_telephony_speech.ipynb b/tutorials/asr/ASR_for_telephony_speech.ipynb
index 787b448620f7..5be3b50502b3 100644
--- a/tutorials/asr/ASR_for_telephony_speech.ipynb
+++ b/tutorials/asr/ASR_for_telephony_speech.ipynb
@@ -27,7 +27,7 @@
                 "!pip install matplotlib>=3.3.2\n",
                 "\n",
                 "## Install NeMo\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
                 "\n",
                 "## Grab the config we'll use in this example\n",
diff --git a/tutorials/asr/ASR_with_NeMo.ipynb b/tutorials/asr/ASR_with_NeMo.ipynb
index 9b86fab7e900..519456a012af 100644
--- a/tutorials/asr/ASR_with_NeMo.ipynb
+++ b/tutorials/asr/ASR_with_NeMo.ipynb
@@ -53,7 +53,7 @@
                 "!pip install matplotlib>=3.3.2\n",
                 "\n",
                 "## Install NeMo\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
                 "\n",
                 "\"\"\"\n",
@@ -587,7 +587,7 @@
                 "\n",
                 "if not os.path.exists(config_path):\n",
                 "    # Grab the config we'll use in this example\n",
-                "    BRANCH = 'r1.13.0'\n",
+                "    BRANCH = 'main'\n",
                 "    !mkdir configs\n",
                 "    !wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/config.yaml\n",
                 "\n",
diff --git a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb
index 224984b64cca..50e4f4536908 100644
--- a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb
+++ b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb
@@ -40,7 +40,7 @@
         "!pip install matplotlib>=3.3.2\n",
         "\n",
         "## Install NeMo\n",
-        "BRANCH = 'r1.13.0'\n",
+        "BRANCH = 'main'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "## Grab the config we'll use in this example\n",
diff --git a/tutorials/asr/ASR_with_Transducers.ipynb b/tutorials/asr/ASR_with_Transducers.ipynb
index b59153517558..f0efdf1cb363 100644
--- a/tutorials/asr/ASR_with_Transducers.ipynb
+++ b/tutorials/asr/ASR_with_Transducers.ipynb
@@ -28,7 +28,7 @@
         "!pip install matplotlib>=3.3.2\n",
         "\n",
         "## Install NeMo\n",
-        "BRANCH = 'r1.13.0'\n",
+        "BRANCH = 'main'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "## Grab the config we'll use in this example\n",
diff --git a/tutorials/asr/Buffered_Transducer_Inference.ipynb b/tutorials/asr/Buffered_Transducer_Inference.ipynb
index 939355de6368..2d42749524d9 100644
--- a/tutorials/asr/Buffered_Transducer_Inference.ipynb
+++ b/tutorials/asr/Buffered_Transducer_Inference.ipynb
@@ -27,7 +27,7 @@
     "!pip install matplotlib>=3.3.2\n",
     "\n",
     "## Install NeMo\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
     "\n",
     "# Update numba and restart (this is required to update internal numba version of Colab)\n",
diff --git a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb
index eb4676b6e01f..9a6b7b2380cf 100644
--- a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb
+++ b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb
@@ -45,7 +45,7 @@
         "!pip install matplotlib>=3.3.2\n",
         "\n",
         "## Install NeMo\n",
-        "BRANCH = 'r1.13.0'\n",
+        "BRANCH = 'main'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "# Update numba and restart (this is required to update internal numba version of Colab)\n",
diff --git a/tutorials/asr/Intro_to_Transducers.ipynb b/tutorials/asr/Intro_to_Transducers.ipynb
index d03508f52168..a82a4804ca56 100644
--- a/tutorials/asr/Intro_to_Transducers.ipynb
+++ b/tutorials/asr/Intro_to_Transducers.ipynb
@@ -43,7 +43,7 @@
         "!pip install matplotlib>=3.3.2\n",
         "\n",
         "## Install NeMo\n",
-        "BRANCH = 'r1.13.0'\n",
+        "BRANCH = 'main'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]"
       ],
       "execution_count": null,
diff --git a/tutorials/asr/Multilang_ASR.ipynb b/tutorials/asr/Multilang_ASR.ipynb
index 06dd5f3d1ef2..8320cc8a07c9 100644
--- a/tutorials/asr/Multilang_ASR.ipynb
+++ b/tutorials/asr/Multilang_ASR.ipynb
@@ -101,7 +101,7 @@
     "\n",
     "## Install NeMo\n",
     "## We are using the main branch but you might want to adjust that too\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
     "\n",
     "\"\"\"\n",
diff --git a/tutorials/asr/Offline_ASR.ipynb b/tutorials/asr/Offline_ASR.ipynb
index 6fc3862fb3a1..2dd4cbe9d814 100644
--- a/tutorials/asr/Offline_ASR.ipynb
+++ b/tutorials/asr/Offline_ASR.ipynb
@@ -51,7 +51,7 @@
         "id": "I9eIxAyKHREB"
       },
       "source": [
-        "BRANCH = 'r1.13.0'\n",
+        "BRANCH = 'main'\n",
         "try:\n",
         "    # Import NeMo Speech Recognition collection\n",
         "    import nemo.collections.asr as nemo_asr\n",
diff --git a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb
index d05503c0f1f3..29913fe0fe73 100644
--- a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb
+++ b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb
@@ -22,7 +22,7 @@
                 "!pip install wget\n",
                 "\n",
                 "## Install NeMo\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
                 "\n",
                 "\"\"\"\n",
diff --git a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb
index 751a33cdd705..5d2f1451d1bf 100644
--- a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb
+++ b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb
@@ -26,7 +26,7 @@
                 "!pip install pyaudio\n",
                 "\n",
                 "# ## Install NeMo\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
                 "\n",
                 "## Install TorchAudio\n",
diff --git a/tutorials/asr/Online_Noise_Augmentation.ipynb b/tutorials/asr/Online_Noise_Augmentation.ipynb
index 9781d965b0c1..5756c7d58ebe 100644
--- a/tutorials/asr/Online_Noise_Augmentation.ipynb
+++ b/tutorials/asr/Online_Noise_Augmentation.ipynb
@@ -31,7 +31,7 @@
                 "!pip install text-unidecode\n",
                 "\n",
                 "# ## Install NeMo\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
                 "\n",
                 "## Install TorchAudio\n",
diff --git a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb
index 43b7c74e1db8..2076bc06982b 100644
--- a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb
+++ b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb
@@ -26,7 +26,7 @@
                 "!pip install pyaudio\n",
                 "\n",
                 "# ## Install NeMo\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
                 "\n",
                 "## Install TorchAudio\n",
diff --git a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb
index 3e1f05369e48..2488e46287a6 100644
--- a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb
+++ b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb
@@ -28,7 +28,7 @@
                 "!pip install pyaudio\n",
                 "\n",
                 "# ## Install NeMo\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
                 "\n",
                 "## Install TorchAudio\n",
diff --git a/tutorials/asr/Self_Supervised_Pre_Training.ipynb b/tutorials/asr/Self_Supervised_Pre_Training.ipynb
index c0b0f8aff869..0f0270c1ad75 100644
--- a/tutorials/asr/Self_Supervised_Pre_Training.ipynb
+++ b/tutorials/asr/Self_Supervised_Pre_Training.ipynb
@@ -27,7 +27,7 @@
         "!pip install matplotlib>=3.3.2\n",
         "\n",
         "## Install NeMo\n",
-        "BRANCH = 'r1.13.0'\n",
+        "BRANCH = 'main'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "\"\"\"\n",
diff --git a/tutorials/asr/Speech_Commands.ipynb b/tutorials/asr/Speech_Commands.ipynb
index 40ce00ae23c2..14cf1dc3812f 100644
--- a/tutorials/asr/Speech_Commands.ipynb
+++ b/tutorials/asr/Speech_Commands.ipynb
@@ -60,7 +60,7 @@
                 "!pip install text-unidecode\n",
                 "\n",
                 "# ## Install NeMo\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
                 "\n",
                 "## Install TorchAudio\n",
diff --git a/tutorials/asr/Streaming_ASR.ipynb b/tutorials/asr/Streaming_ASR.ipynb
index f4aa8d160057..5d4d5b188e18 100644
--- a/tutorials/asr/Streaming_ASR.ipynb
+++ b/tutorials/asr/Streaming_ASR.ipynb
@@ -27,7 +27,7 @@
     "!pip install matplotlib>=3.3.2\n",
     "\n",
     "## Install NeMo\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
     "\n",
     "## Grab the config we'll use in this example\n",
diff --git a/tutorials/asr/Voice_Activity_Detection.ipynb b/tutorials/asr/Voice_Activity_Detection.ipynb
index 8ef5322b13a2..f0d2ef14ce6f 100644
--- a/tutorials/asr/Voice_Activity_Detection.ipynb
+++ b/tutorials/asr/Voice_Activity_Detection.ipynb
@@ -27,7 +27,7 @@
                 "!pip install text-unidecode\n",
                 "\n",
                 "# ## Install NeMo\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
                 "\n",
                 "## Install TorchAudio\n",
diff --git a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb
index 2fcd2f399940..468c602a8765 100644
--- a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb
+++ b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb
@@ -51,7 +51,7 @@
         "!pip install matplotlib>=3.3.2\n",
         "\n",
         "## Install NeMo\n",
-        "BRANCH = 'r1.13.0'\n",
+        "BRANCH = 'main'\n",
         "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
         "\n",
         "## Grab the config we'll use in this example\n",
diff --git a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb
index 120207831ce6..faa93de12514 100644
--- a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb
+++ b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb
@@ -26,7 +26,7 @@
                 "# If you're using Google Colab and not running locally, run this cell\n",
                 "\n",
                 "# install NeMo\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]"
             ]
         },
diff --git a/tutorials/nlp/02_NLP_Tokenizers.ipynb b/tutorials/nlp/02_NLP_Tokenizers.ipynb
index f6b56e0712a3..c63d2a8b1689 100644
--- a/tutorials/nlp/02_NLP_Tokenizers.ipynb
+++ b/tutorials/nlp/02_NLP_Tokenizers.ipynb
@@ -10,7 +10,7 @@
             },
             "outputs": [],
             "source": [
-                "BRANCH = 'r1.13.0'"
+                "BRANCH = 'main'"
             ]
         },
         {
@@ -35,7 +35,7 @@
                 "# If you're using Google Colab and not running locally, run this cell\n",
                 "\n",
                 "# install NeMo\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]"
             ]
         },
diff --git a/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb b/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb
index e535f7594f97..323bfa1c49b8 100644
--- a/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb
+++ b/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb
@@ -300,7 +300,7 @@
     "\n",
     "## Install NeMo\n",
     "\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
     "\n",
     "!pip uninstall -y sacrebleu\n",
diff --git a/tutorials/nlp/Dialogue.ipynb b/tutorials/nlp/Dialogue.ipynb
index 8395fb4c8eb1..ddd3bdd4f929 100644
--- a/tutorials/nlp/Dialogue.ipynb
+++ b/tutorials/nlp/Dialogue.ipynb
@@ -27,7 +27,7 @@
       "outputs": [],
       "source": [
         "import os \n",
-        "BRANCH = 'r1.13.0'\n",
+        "BRANCH = 'main'\n",
         "!apt-get update && apt-get install -y libsndfile1 ffmpeg\n",
         "!git clone https://github.com/NVIDIA/NeMo --branch $BRANCH\n",
         "os.chdir('NeMo')\n",
diff --git a/tutorials/nlp/Entity_Linking_Medical.ipynb b/tutorials/nlp/Entity_Linking_Medical.ipynb
index dd41a25e5601..0d7a1d5c8de5 100644
--- a/tutorials/nlp/Entity_Linking_Medical.ipynb
+++ b/tutorials/nlp/Entity_Linking_Medical.ipynb
@@ -17,7 +17,7 @@
     "\"\"\"\n",
     "\n",
     "## Install NeMo if using google collab or if its not installed locally\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]"
    ]
   },
diff --git a/tutorials/nlp/GLUE_Benchmark.ipynb b/tutorials/nlp/GLUE_Benchmark.ipynb
index 203a278bea88..d8fe75940b09 100644
--- a/tutorials/nlp/GLUE_Benchmark.ipynb
+++ b/tutorials/nlp/GLUE_Benchmark.ipynb
@@ -44,7 +44,7 @@
         "# If you're using Google Colab and not running locally, run this cell\n",
         "\n",
         "# install NeMo\n",
-        "BRANCH = 'r1.13.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n"
+        "BRANCH = 'main'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n"
       ],
       "execution_count": null,
       "outputs": []
diff --git a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb
index c548bdb02161..104d69df18e2 100644
--- a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb
+++ b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb
@@ -22,7 +22,7 @@
                 "# If you're using Google Colab and not running locally, run this cell\n",
                 "\n",
                 "# install NeMo\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]"
             ]
         },
diff --git a/tutorials/nlp/MegatronBert_export.ipynb b/tutorials/nlp/MegatronBert_export.ipynb
index 54ad754e4617..f925d2bc59b0 100644
--- a/tutorials/nlp/MegatronBert_export.ipynb
+++ b/tutorials/nlp/MegatronBert_export.ipynb
@@ -7,7 +7,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "BRANCH='r1.13.0'"
+                "BRANCH='main'"
             ]
         },
         {
diff --git a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb
index 985fdb568042..3dc3d6ce192e 100644
--- a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb
+++ b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb
@@ -62,7 +62,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "DATA_PATH='.'\n",
                 "TRANSACTIONS=DATA_PATH+'/card_transaction.v1.csv'\n",
                 "#CHECKPOINTS='/chk_points'\n",
diff --git a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb
index c442913ec8ae..512a38bc90cc 100644
--- a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb
+++ b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb
@@ -7,7 +7,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "BRANCH='r1.13.0'"
+    "BRANCH='main'"
    ]
   },
   {
diff --git a/tutorials/nlp/Punctuation_and_Capitalization.ipynb b/tutorials/nlp/Punctuation_and_Capitalization.ipynb
index aa80ebb5bd91..1519c234372b 100644
--- a/tutorials/nlp/Punctuation_and_Capitalization.ipynb
+++ b/tutorials/nlp/Punctuation_and_Capitalization.ipynb
@@ -6,7 +6,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "BRANCH = 'r1.13.0'"
+                "BRANCH = 'main'"
             ]
         },
         {
diff --git a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb
index 57d443ddf5df..5580bc4cf946 100644
--- a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb
+++ b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb
@@ -10,7 +10,7 @@
    },
    "outputs": [],
    "source": [
-    "BRANCH = 'r1.13.0'"
+    "BRANCH = 'main'"
    ]
   },
   {
diff --git a/tutorials/nlp/Question_Answering.ipynb b/tutorials/nlp/Question_Answering.ipynb
index f461a5f651ef..5ce89b3baafc 100644
--- a/tutorials/nlp/Question_Answering.ipynb
+++ b/tutorials/nlp/Question_Answering.ipynb
@@ -74,7 +74,7 @@
       },
       "outputs": [],
       "source": [
-        "BRANCH = 'r1.13.0'"
+        "BRANCH = 'main'"
       ]
     },
     {
diff --git a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb
index 54ff9d7ccabb..b7c25cb416ef 100644
--- a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb
+++ b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb
@@ -6,7 +6,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "BRANCH = 'r1.13.0'"
+                "BRANCH = 'main'"
             ]
         },
         {
diff --git a/tutorials/nlp/Text2Sparql.ipynb b/tutorials/nlp/Text2Sparql.ipynb
index 5b238ca27e60..b734e72c1fc6 100644
--- a/tutorials/nlp/Text2Sparql.ipynb
+++ b/tutorials/nlp/Text2Sparql.ipynb
@@ -20,7 +20,7 @@
                 "# If you're using Google Colab and not running locally, run this cell\n",
                 "\n",
                 "# install NeMo\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]"
             ]
         },
@@ -149,7 +149,7 @@
                 "WORK_DIR = \"PATH_TO_CHECKPOINTS_AND_LOGS\"\n",
                 "\n",
                 "# NeMo Version\n",
-                "BRANCH = 'r1.13.0'\n"
+                "BRANCH = 'main'\n"
             ]
         },
         {
diff --git a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb
index b38f23002b6e..5b5b74e7bf11 100644
--- a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb
+++ b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb
@@ -20,7 +20,7 @@
                 "# If you're using Google Colab and not running locally, run this cell\n",
                 "\n",
                 "# install NeMo\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n",
                 "\n"
             ]
diff --git a/tutorials/nlp/Token_Classification-BioMegatron.ipynb b/tutorials/nlp/Token_Classification-BioMegatron.ipynb
index 304befe44a14..b07dfb061625 100644
--- a/tutorials/nlp/Token_Classification-BioMegatron.ipynb
+++ b/tutorials/nlp/Token_Classification-BioMegatron.ipynb
@@ -7,7 +7,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "BRANCH='r1.13.0'"
+                "BRANCH='main'"
             ]
         },
         {
diff --git a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb
index bc41c8568844..0e8fadde8041 100644
--- a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb
+++ b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb
@@ -30,7 +30,7 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "BRANCH = 'r1.13.0'"
+        "BRANCH = 'main'"
       ]
     },
     {
@@ -53,7 +53,7 @@
         "# If you're using Google Colab and not running locally, run this cell\n",
         "\n",
         "# install NeMo\n",
-        "BRANCH = 'r1.13.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n"
+        "BRANCH = 'main'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n"
       ],
       "execution_count": null,
       "outputs": []
diff --git a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb
index 05706014b9ba..69df7b27b02d 100644
--- a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb
+++ b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb
@@ -22,7 +22,7 @@
                 "# If you're using Google Colab and not running locally, run this cell\n",
                 "\n",
                 "# install NeMo\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]"
             ]
         },
diff --git a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb
index edb982a6fa0e..ea943b35e0d0 100644
--- a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb
+++ b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb
@@ -30,7 +30,7 @@
     "!pip install text-unidecode\n",
     "\n",
     "# ## Install NeMo\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
     "\n",
     "## Install TorchAudio\n",
diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb
index 9c790824afcb..64ceb49d7d64 100644
--- a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb
+++ b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb
@@ -23,7 +23,7 @@
     "!pip install text-unidecode\n",
     "\n",
     "# ## Install NeMo\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
     "\n",
     "## Install TorchAudio\n",
diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb
index c401591ea319..91df72848614 100644
--- a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb
+++ b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb
@@ -18,7 +18,7 @@
     "\"\"\"\n",
     "\n",
     "NEMO_DIR_PATH = \"NeMo\"\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "\n",
     "! git clone https://github.com/NVIDIA/NeMo\n",
     "%cd NeMo\n",
diff --git a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb
index 0b19f83bbcd8..8e3ae9c1f131 100644
--- a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb
+++ b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb
@@ -27,7 +27,7 @@
     "!pip install text-unidecode\n",
     "\n",
     "## Install NeMo\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n",
     "\n",
     "# Install TorchAudio\n",
diff --git a/tutorials/text_processing/ITN_with_Thutmose_Tagger.ipynb b/tutorials/text_processing/ITN_with_Thutmose_Tagger.ipynb
index dcf944769e9f..b72cee51003b 100644
--- a/tutorials/text_processing/ITN_with_Thutmose_Tagger.ipynb
+++ b/tutorials/text_processing/ITN_with_Thutmose_Tagger.ipynb
@@ -21,7 +21,7 @@
         "import os\n",
         "\n",
         "# install NeMo\n",
-        "BRANCH = 'r1.13.0'\n",
+        "BRANCH = 'main'\n",
         "\n",
         "GITHUB_ACCOUNT = 'NVIDIA'  # change this if using a fork\n",
         "\n",
diff --git a/tutorials/text_processing/Text_(Inverse)_Normalization.ipynb b/tutorials/text_processing/Text_(Inverse)_Normalization.ipynb
index e00dfc9463de..596523b41c0a 100644
--- a/tutorials/text_processing/Text_(Inverse)_Normalization.ipynb
+++ b/tutorials/text_processing/Text_(Inverse)_Normalization.ipynb
@@ -60,7 +60,7 @@
    "outputs": [],
    "source": [
     "## Install NeMo, which installs both nemo and nemo_text_processing package\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n",
     "\n",
     "# install Pynini for text normalization\n",
diff --git a/tutorials/text_processing/WFST_Tutorial.ipynb b/tutorials/text_processing/WFST_Tutorial.ipynb
index 51daded0b796..ed7127241dd5 100644
--- a/tutorials/text_processing/WFST_Tutorial.ipynb
+++ b/tutorials/text_processing/WFST_Tutorial.ipynb
@@ -39,7 +39,7 @@
    "outputs": [],
    "source": [
     "## Install NeMo, which installs both nemo and nemo_text_processing package\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nemo_text_processing]\n",
           "\n",
           "# install Pynini for text normalization\n",
diff --git a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb
index 25f63da12df9..d22258885db8 100644
--- a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb
+++ b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb
@@ -35,7 +35,7 @@
         "id": "d4KCUoxSpdoZ"
       },
       "source": [
-        "BRANCH = 'r1.13.0'\n",
+        "BRANCH = 'main'\n",
         "\n",
         "\"\"\"\n",
         "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n",
diff --git a/tutorials/tools/Multispeaker_Simulator.ipynb b/tutorials/tools/Multispeaker_Simulator.ipynb
index e9822fd0ea9e..8b0db6e75b49 100644
--- a/tutorials/tools/Multispeaker_Simulator.ipynb
+++ b/tutorials/tools/Multispeaker_Simulator.ipynb
@@ -18,7 +18,7 @@
     "\"\"\"\n",
     "\n",
     "NEMO_DIR_PATH = \"NeMo\"\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "\n",
     "! git clone https://github.com/NVIDIA/NeMo\n",
     "%cd NeMo\n",
diff --git a/tutorials/tts/Aligner_Inference_Examples.ipynb b/tutorials/tts/Aligner_Inference_Examples.ipynb
index d32305579166..f6acbfa2c0d4 100644
--- a/tutorials/tts/Aligner_Inference_Examples.ipynb
+++ b/tutorials/tts/Aligner_Inference_Examples.ipynb
@@ -39,7 +39,7 @@
     "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
     "4. Run this cell to set up dependencies.\n",
     "\"\"\"\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "# # If you're using Colab and not running locally, uncomment and run this cell.\n",
     "# !apt-get install sox libsndfile1 ffmpeg\n",
     "# !pip install wget text-unidecode\n",
diff --git a/tutorials/tts/FastPitch_Finetuning.ipynb b/tutorials/tts/FastPitch_Finetuning.ipynb
index 034e9e050aaf..fe607944c1e8 100755
--- a/tutorials/tts/FastPitch_Finetuning.ipynb
+++ b/tutorials/tts/FastPitch_Finetuning.ipynb
@@ -57,7 +57,7 @@
                 "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
                 "4. Run this cell to set up dependencies.\n",
                 "\"\"\"\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n",
                 "# !apt-get install sox libsndfile1 ffmpeg\n",
                 "# !pip install wget text-unidecode pynini==2.1.4\n",
diff --git a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb
index a7de4224ae97..0b2e4f3fe132 100644
--- a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb
+++ b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb
@@ -51,7 +51,7 @@
     "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
     "4. Run this cell to set up dependencies# .\n",
     "\"\"\"\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "# # If you're using Colab and not running locally, uncomment and run this cell.\n",
     "# !apt-get install sox libsndfile1 ffmpeg\n",
     "# !pip install wget text-unidecode pynini==2.1.4 scipy==1.7.3\n",
diff --git a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb
index 7547d478b5b2..1b2ebc66ea3b 100644
--- a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb
+++ b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb
@@ -50,7 +50,7 @@
     "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
     "4. Run this cell to set up dependencies# .\n",
     "\"\"\"\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "# # If you're using Colab and not running locally, uncomment and run this cell.\n",
     "# !apt-get install sox libsndfile1 ffmpeg\n",
     "# !pip install wget text-unidecode pynini==2.1.4 scipy==1.7.3\n",
diff --git a/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb b/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb
index 7f55c3d2e12f..eda5bba0aa1e 100644
--- a/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb
+++ b/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb
@@ -94,7 +94,7 @@
    "source": [
     "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n",
     "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]"
    ]
   },
diff --git a/tutorials/tts/Inference_DurationPitchControl.ipynb b/tutorials/tts/Inference_DurationPitchControl.ipynb
index 59a01c628449..c4879f38274c 100644
--- a/tutorials/tts/Inference_DurationPitchControl.ipynb
+++ b/tutorials/tts/Inference_DurationPitchControl.ipynb
@@ -46,7 +46,7 @@
                 "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
                 "4. Run this cell to set up dependencies.\n",
                 "\"\"\"\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n",
                 "# !apt-get install sox libsndfile1 ffmpeg\n",
                 "# !pip install wget text-unidecode\n",
diff --git a/tutorials/tts/Inference_ModelSelect.ipynb b/tutorials/tts/Inference_ModelSelect.ipynb
index 71067530b311..8fe398edafa6 100644
--- a/tutorials/tts/Inference_ModelSelect.ipynb
+++ b/tutorials/tts/Inference_ModelSelect.ipynb
@@ -46,7 +46,7 @@
                 "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
                 "4. Run this cell to set up dependencies.\n",
                 "\"\"\"\n",
-                "BRANCH = 'r1.13.0'\n",
+                "BRANCH = 'main'\n",
                 "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n",
                 "# !apt-get install sox libsndfile1 ffmpeg\n",
                 "# !pip install wget text-unidecode\n",
diff --git a/tutorials/tts/NeMo_TTS_Primer.ipynb b/tutorials/tts/NeMo_TTS_Primer.ipynb
index 938eac687d07..21c366155b17 100644
--- a/tutorials/tts/NeMo_TTS_Primer.ipynb
+++ b/tutorials/tts/NeMo_TTS_Primer.ipynb
@@ -25,7 +25,7 @@
    "source": [
     "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n",
     "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]"
    ]
   },
diff --git a/tutorials/tts/Tacotron2_Training.ipynb b/tutorials/tts/Tacotron2_Training.ipynb
index 995a204249a9..3642a3e9e4dc 100644
--- a/tutorials/tts/Tacotron2_Training.ipynb
+++ b/tutorials/tts/Tacotron2_Training.ipynb
@@ -54,7 +54,7 @@
     "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
     "4. Run this cell to set up dependencies# .\n",
     "\"\"\"\n",
-    "BRANCH = 'r1.13.0'\n",
+    "BRANCH = 'main'\n",
     "# # If you're using Colab and not running locally, uncomment and run this cell.\n",
     "# !apt-get install sox libsndfile1 ffmpeg\n",
     "# !pip install wget text-unidecode\n",

From 6107a5bebec06cad9201ce75e78343da2aa060e3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 7 Dec 2022 22:21:48 +0000
Subject: [PATCH 13/20] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 examples/nlp/language_modeling/megatron_gpt_prompt_learning.py | 2 +-
 examples/nlp/language_modeling/megatron_t5_prompt_learning.py  | 2 +-
 tools/speech_data_processor/requirements.txt                   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py b/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py
index a7bb3351fe1c..554178e7e95d 100644
--- a/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py
+++ b/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from lightning_lite.plugins.environments import TorchElasticEnvironment
 import torch.multiprocessing as mp
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
diff --git a/examples/nlp/language_modeling/megatron_t5_prompt_learning.py b/examples/nlp/language_modeling/megatron_t5_prompt_learning.py
index a18c4ca84b6a..efcf01288a7c 100644
--- a/examples/nlp/language_modeling/megatron_t5_prompt_learning.py
+++ b/examples/nlp/language_modeling/megatron_t5_prompt_learning.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from lightning_lite.plugins.environments import TorchElasticEnvironment
 import torch.multiprocessing as mp
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
diff --git a/tools/speech_data_processor/requirements.txt b/tools/speech_data_processor/requirements.txt
index e07336a0d3c3..63904d71d9c9 100644
--- a/tools/speech_data_processor/requirements.txt
+++ b/tools/speech_data_processor/requirements.txt
@@ -1 +1 @@
-diff_match_patch
\ No newline at end of file
+diff_match_patch

From 778c43ba0fae0c879f93315098be17f038ec47b4 Mon Sep 17 00:00:00 2001
From: ericharper <complex451@gmail.com>
Date: Wed, 7 Dec 2022 15:23:34 -0700
Subject: [PATCH 14/20] revert

Signed-off-by: ericharper <complex451@gmail.com>
---
 examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py b/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py
index 9064ce9f452d..2d1e104660a2 100644
--- a/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py
+++ b/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from megatron_t5_seq2seq_finetune import load_from_checkpoint_dir, load_from_nemo, validate_checkpoint_loading_args
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer

From 2069440759d83b9da86a7b5e3bebf75df8799e6b Mon Sep 17 00:00:00 2001
From: ericharper <complex451@gmail.com>
Date: Wed, 7 Dec 2022 15:28:44 -0700
Subject: [PATCH 15/20] revert

Signed-off-by: ericharper <complex451@gmail.com>
---
 examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py b/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py
index 2d1e104660a2..008bdc90cdd6 100644
--- a/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py
+++ b/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py
@@ -127,7 +127,7 @@ def main(cfg) -> None:
             t5_cfg = MegatronT5FinetuneModel.restore_from(
                 restore_path=cfg.model.restore_from_path, trainer=trainer, return_config=True
             )
-            model = load_from_nemo(MegatronT5FinetuneModel, cfg, trainer, modify_confg_fn=_modify_config)
+            model = load_from_nemo(MegatronT5FinetuneModel, cfg, trainer, t5_cfg, modify_confg_fn=_modify_config)
         else:
             validate_checkpoint_loading_args(cfg.model.pretrained_checkpoint)
             model = load_from_checkpoint_dir(MegatronT5FinetuneModel, cfg, trainer, modify_confg_fn=_modify_config)

From d8fb975cfdd5f2beb25ead6efe3b185d88a0db9d Mon Sep 17 00:00:00 2001
From: ericharper <complex451@gmail.com>
Date: Wed, 7 Dec 2022 16:14:33 -0700
Subject: [PATCH 16/20] revert

Signed-off-by: ericharper <complex451@gmail.com>
---
 nemo/collections/tts/modules/common.py | 45 ++++++++------------------
 1 file changed, 14 insertions(+), 31 deletions(-)

diff --git a/nemo/collections/tts/modules/common.py b/nemo/collections/tts/modules/common.py
index 0765d0499bda..63c28f12a4a7 100644
--- a/nemo/collections/tts/modules/common.py
+++ b/nemo/collections/tts/modules/common.py
@@ -122,30 +122,22 @@ def lstm_tensor(self, context: Tensor, lens: Tensor, enforce_sorted: bool = Fals
         seq = nn.utils.rnn.pack_padded_sequence(
             context, lens.long().cpu(), batch_first=True, enforce_sorted=enforce_sorted
         )
-        if not (torch.jit.is_scripting() or torch.jit.is_tracing()):
-            self.bilstm.flatten_parameters()
-        if hasattr(self.bilstm, 'forward'):
-            ret, _ = self.bilstm.forward(seq)
-        else:
-            ret, _ = self.bilstm.forward_1(seq)
-        return nn.utils.rnn.pad_packed_sequence(ret, batch_first=True)
+        return self.lstm_sequence(seq)
 
     def lstm_sequence(self, seq: PackedSequence) -> Tuple[Tensor, Tensor]:
         if not (torch.jit.is_scripting() or torch.jit.is_tracing()):
             self.bilstm.flatten_parameters()
-        if hasattr(self.bilstm, 'forward'):
-            ret, _ = self.bilstm.forward(seq)
-        elif hasattr(self.bilstm, 'forward_1'):
-            ret, _ = self.bilstm.forward_1(seq)
+        ret, _ = self.bilstm(seq)
         return nn.utils.rnn.pad_packed_sequence(ret, batch_first=True)
 
-    @torch.jit.export
-    def sort_and_lstm_tensor(self, context: Tensor, lens: Tensor) -> Tensor:
+    def forward(self, context: Tensor, lens: Tensor) -> Tensor:
         context, lens_sorted, unsort_ids = sort_tensor(context, lens)
-        seq = nn.utils.rnn.pack_padded_sequence(
-            context, lens_sorted.long().cpu(), batch_first=True, enforce_sorted=True
-        )
-        return self.lstm_sequence(seq)[0][unsort_ids]
+        dtype = context.dtype
+        # this is only needed for Torchscript to run in Triton
+        # (https://github.com/pytorch/pytorch/issues/89241)
+        with torch.cuda.amp.autocast(enabled=False):
+            ret = self.lstm_tensor(context.to(dtype=torch.float32), lens_sorted, enforce_sorted=True)
+        return ret[0].to(dtype=dtype)[unsort_ids]
 
 
 class ConvLSTMLinear(nn.Module):
@@ -160,7 +152,8 @@ def __init__(
         use_partial_padding=False,
         norm_fn=None,
     ):
-        super(ConvLSTMLinear, self).__init__(n_channels, int(n_channels // 2), 1)
+        super(ConvLSTMLinear, self).__init__()
+        self.bilstm = BiLSTM(n_channels, int(n_channels // 2), 1)
         self.convolutions = nn.ModuleList()
 
         if n_layers > 0:
@@ -191,24 +184,14 @@ def __init__(
         if out_dim is not None:
             self.dense = nn.Linear(n_channels, out_dim)
 
-    def masked_conv_to_sequence(self, context: Tensor, lens: Tensor, enforce_sorted: bool = False) -> PackedSequence:
+    def forward(self, context: Tensor, lens: Tensor) -> Tensor:
         mask = get_mask_from_lengths_and_val(lens, context)
         mask = mask.to(dtype=context.dtype).unsqueeze(1)
         for conv in self.convolutions:
             context = self.dropout(F.relu(conv(context, mask)))
-
         context = context.transpose(1, 2)
-        seq = torch.nn.utils.rnn.pack_padded_sequence(
-            context, lens.long().cpu(), batch_first=True, enforce_sorted=enforce_sorted
-        )
-        return seq
-
-    def forward(self, context: Tensor, lens: Tensor) -> Tensor:
-        context, lens, unsort_ids = sort_tensor(context, lens)
-        seq = self.masked_conv_to_sequence(context, lens, enforce_sorted=True)
-        context, _ = self.lstm_sequence(seq)
-        context = context[unsort_ids]
-
+        # Apply Bidirectional LSTM
+        context = self.bilstm(context, lens)
         if self.dense is not None:
             context = self.dense(context).permute(0, 2, 1)
         return context

From 966eae457cd1735b2a60af06c81a149643665f62 Mon Sep 17 00:00:00 2001
From: ericharper <complex451@gmail.com>
Date: Wed, 7 Dec 2022 16:16:59 -0700
Subject: [PATCH 17/20] revert

Signed-off-by: ericharper <complex451@gmail.com>
---
 nemo/collections/tts/modules/radtts.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/nemo/collections/tts/modules/radtts.py b/nemo/collections/tts/modules/radtts.py
index dca0f0ede62c..9f360a4e5a33 100644
--- a/nemo/collections/tts/modules/radtts.py
+++ b/nemo/collections/tts/modules/radtts.py
@@ -345,9 +345,7 @@ def preprocess_context(self, context, speaker_vecs, out_lens, f0, energy_avg):
                     context_w_spkvec = torch.cat((context_w_spkvec, energy_avg), 1)
 
             unfolded_out_lens = out_lens // self.n_group_size
-            context_lstm_padded_output = self.context_lstm.sort_and_lstm_tensor(
-                context_w_spkvec.transpose(1, 2), unfolded_out_lens
-            )
+            context_lstm_padded_output = self.context_lstm(context_w_spkvec.transpose(1, 2), unfolded_out_lens)
             context_w_spkvec = context_lstm_padded_output.transpose(1, 2)
 
         if not self.context_lstm_w_f0_and_energy:
@@ -772,8 +770,8 @@ def input_example(self, max_batch=1, max_dim=256):
         """
         par = next(self.parameters())
         sz = (max_batch, max_dim)
-        inp = torch.randint(0, 16, sz, device=par.device, dtype=torch.int64)
-        lens = torch.randint(16, max_dim, (max_batch,), device=par.device, dtype=torch.int)
+        inp = torch.randint(16, 32, sz, device=par.device, dtype=torch.int64)
+        lens = torch.randint(max_dim // 4, max_dim // 2, (max_batch,), device=par.device, dtype=torch.int)
         speaker = torch.randint(0, 1, (max_batch,), device=par.device, dtype=torch.int64)
         inputs = {
             'text': inp,

From 2177c5b221c4f1b969be02780ebc7bd4c92fa078 Mon Sep 17 00:00:00 2001
From: ericharper <complex451@gmail.com>
Date: Wed, 7 Dec 2022 16:21:00 -0700
Subject: [PATCH 18/20] revert

Signed-off-by: ericharper <complex451@gmail.com>
---
 nemo/utils/export_utils.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/nemo/utils/export_utils.py b/nemo/utils/export_utils.py
index 863c7fd1d2ec..02d99c6ba7fd 100644
--- a/nemo/utils/export_utils.py
+++ b/nemo/utils/export_utils.py
@@ -15,7 +15,7 @@
 import os
 from contextlib import nullcontext
 from enum import Enum
-from typing import Callable, Dict, List, Optional, Type
+from typing import Callable, Dict, Optional, Type
 
 import onnx
 import torch
@@ -122,8 +122,12 @@ def verify_torchscript(model, output, input_examples, input_names, check_toleran
     for input_example in input_examples:
         input_list, input_dict = parse_input_example(input_example)
         output_example = model.forward(*input_list, **input_dict)
-
-        all_good = all_good and run_ts_and_compare(ts_model, input_list, input_dict, output_example, check_tolerance)
+        # We disable autocast here to make sure exported TS will run under Triton or other C++ env
+        with torch.cuda.amp.autocast(enabled=False):
+            ts_model = torch.jit.load(output)
+            all_good = all_good and run_ts_and_compare(
+                ts_model, input_list, input_dict, output_example, check_tolerance
+            )
     status = "SUCCESS" if all_good else "FAIL"
     logging.info(f"Torchscript generated at {output} verified with torchscript forward : " + status)
     return all_good
@@ -326,7 +330,6 @@ def replace_MatchedScaleMaskSoftmax(n: nn.Module) -> Optional[nn.Linear]:
     Returns:
         exportable module
     """
-
     # including the import here to avoid circular imports
     from nemo.collections.nlp.modules.common.megatron.fused_softmax import MatchedScaleMaskSoftmax
 
@@ -334,7 +337,6 @@ def replace_MatchedScaleMaskSoftmax(n: nn.Module) -> Optional[nn.Linear]:
     mod = MatchedScaleMaskSoftmax(
         n.input_in_fp16, n.input_in_bf16, n.attn_mask_type, False, n.mask_func, n.softmax_in_fp32, n.scale
     )
-
     return mod
 
 
@@ -399,8 +401,7 @@ def replace_modules(
 
 
 def script_module(m: nn.Module):
-    m1 = torch.jit.script(m)
-    return m1
+    return torch.jit.script(m)
 
 
 default_replacements = {
@@ -412,7 +413,6 @@ def script_module(m: nn.Module):
 
 script_replacements = {
     "BiLSTM": script_module,
-    "ConvLSTMLinear": script_module,
 }
 
 

From 64334e858c2a2f8a31bbbd5d7d3acb1c1bbd2449 Mon Sep 17 00:00:00 2001
From: ericharper <complex451@gmail.com>
Date: Wed, 7 Dec 2022 18:18:47 -0700
Subject: [PATCH 19/20] revert

Signed-off-by: ericharper <complex451@gmail.com>
---
 nemo/core/classes/exportable.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nemo/core/classes/exportable.py b/nemo/core/classes/exportable.py
index b3f0b2fdd642..50266dab3dbe 100644
--- a/nemo/core/classes/exportable.py
+++ b/nemo/core/classes/exportable.py
@@ -128,7 +128,7 @@ def _export(
             # Set module mode
             with torch.onnx.select_model_mode_for_export(
                 self, training
-            ), torch.inference_mode(), torch.no_grad(), torch.jit.optimized_execution(True):
+            ), torch.inference_mode(), torch.no_grad(), torch.jit.optimized_execution(True), _jit_is_scripting():
 
                 if input_example is None:
                     input_example = self.input_module.input_example()

From 3a33a0109fd3347952f4b73ddfd117c49c5a2a49 Mon Sep 17 00:00:00 2001
From: ericharper <complex451@gmail.com>
Date: Wed, 7 Dec 2022 18:19:27 -0700
Subject: [PATCH 20/20] revert

Signed-off-by: ericharper <complex451@gmail.com>
---
 tests/collections/tts/test_tts_exportables.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/collections/tts/test_tts_exportables.py b/tests/collections/tts/test_tts_exportables.py
index e3e496373271..c7083b45f6d7 100644
--- a/tests/collections/tts/test_tts_exportables.py
+++ b/tests/collections/tts/test_tts_exportables.py
@@ -73,6 +73,7 @@ def test_HifiGanModel_export_to_onnx(self, hifigan_model):
             filename = os.path.join(tmpdir, 'hfg.pt')
             model.export(output=filename, verbose=True, check_trace=True)
 
+    @pytest.mark.pleasefixme
     @pytest.mark.run_only_on('GPU')
     @pytest.mark.unit
     def test_RadTTSModel_export_to_torchscript(self, radtts_model):