From ec9cb401c448311288b3f744cdbedf989182f651 Mon Sep 17 00:00:00 2001 From: PeganovAnton Date: Tue, 11 Jan 2022 10:31:33 +0300 Subject: [PATCH 1/9] Fix RANK env variable check in global rank check Signed-off-by: PeganovAnton --- nemo/utils/get_rank.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/utils/get_rank.py b/nemo/utils/get_rank.py index 8d3fd0e5c7b6..3bd3ca24d7ae 100644 --- a/nemo/utils/get_rank.py +++ b/nemo/utils/get_rank.py @@ -21,7 +21,7 @@ def is_global_rank_zero(): # Try to get the pytorch RANK env var # RANK is set by torch.distributed.launch rank = get_envint("RANK", None) - if rank: + if rank is not None: return rank == 0 # If not set by pytorch, we need to determine node_rank From a56db7ae517ebc5db8dbd19b7e446abd7958a919 Mon Sep 17 00:00:00 2001 From: PeganovAnton Date: Fri, 28 Jan 2022 14:58:55 +0300 Subject: [PATCH 2/9] Fix restoring from checkpoint with label vocab dir Signed-off-by: PeganovAnton --- .../punctuation_capitalization_model.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/nemo/collections/nlp/models/token_classification/punctuation_capitalization_model.py b/nemo/collections/nlp/models/token_classification/punctuation_capitalization_model.py index d358d2975c82..fa0ffd70d5c8 100644 --- a/nemo/collections/nlp/models/token_classification/punctuation_capitalization_model.py +++ b/nemo/collections/nlp/models/token_classification/punctuation_capitalization_model.py @@ -651,16 +651,16 @@ def _check_label_config_parameters(self) -> None: ) def _extract_label_vocab_files_from_config(self) -> Tuple[Optional[Path], Optional[Path]]: - if self._cfg.common_dataset_parameters.label_vocab_dir is None: - if self._is_model_being_restored(): - punct_label_vocab_file = self._cfg.class_labels.punct_labels_file - capit_label_vocab_file = self._cfg.class_labels.capit_labels_file - else: - punct_label_vocab_file, capit_label_vocab_file = None, None + if self._is_model_being_restored(): + punct_label_vocab_file = self._cfg.class_labels.punct_labels_file + capit_label_vocab_file = self._cfg.class_labels.capit_labels_file else: - label_vocab_dir = Path(self._cfg.common_dataset_parameters.label_vocab_dir).expanduser() - punct_label_vocab_file = label_vocab_dir / self._cfg.class_labels.punct_labels_file - capit_label_vocab_file = label_vocab_dir / self._cfg.class_labels.capit_labels_file + if self._cfg.common_dataset_parameters.label_vocab_dir is None: + punct_label_vocab_file, capit_label_vocab_file = None, None + else: + label_vocab_dir = Path(self._cfg.common_dataset_parameters.label_vocab_dir).expanduser() + punct_label_vocab_file = label_vocab_dir / self._cfg.class_labels.punct_labels_file + capit_label_vocab_file = label_vocab_dir / self._cfg.class_labels.capit_labels_file return punct_label_vocab_file, capit_label_vocab_file def _set_label_ids(self) -> None: From 6409dd48cf49f799f1ff5882a43bdbbe6b02c7b0 Mon Sep 17 00:00:00 2001 From: PeganovAnton Date: Mon, 14 Feb 2022 15:38:16 +0300 Subject: [PATCH 3/9] Add tests for various ways to pass label ids to model Signed-off-by: PeganovAnton --- Jenkinsfile | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 1cc106cecd20..f411140765c9 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1168,6 +1168,92 @@ pipeline { rm -rf /home/TestData/nlp/token_classification_punctuation/output/*' } } + stage('Punctuation & Capitalization, Using model.common_datasest_parameters.label_vocab_dir') { + steps { + sh 'cd examples/nlp/token_classification && \ + mkdir -p tmp_data && \ + cp /home/TestData/nlp/token_classification_punctuation/*.txt tmp_data/ && \ + label_vocab_dir=label_vocab_dir && \ + mkdir -p ${label_vocab_dir} && \ + punct_label_vocab="${label_vocab_dir}/punct_label_vocab.csv" && \ + capit_label_vocab="${label_vocab_dir}/capit_label_vocab.csv" && \ + printf "O\n,\n.\n?\n" > "${punct_label_vocab}" && \ + printf "O\nU\n" > "${capit_label_vocab}" && \ + python punctuation_capitalization_train_evaluate.py \ + model.train_ds.use_tarred_dataset=false \ + model.train_ds.ds_item=tmp_data \ + model.validation_ds.ds_item=tmp_data \ + model.test_ds.ds_item=tmp_data \ + model.language_model.pretrained_model_name=distilbert-base-uncased \ + model.common_dataset_parameters.label_vocab_dir="${label_vocab_dir}" \ + model.class_labels.punct_labels_file="$(basename "${punct_label_vocab}")" \ + model.class_labels.capit_labels_file="$(basename "${capit_label_vocab}")" \ + +model.train_ds.use_cache=false \ + +model.validation_ds.use_cache=false \ + +model.test_ds.use_cache=false \ + trainer.gpus=[0,1] \ + trainer.strategy=ddp \ + trainer.max_epochs=1 \ + +exp_manager.explicit_log_dir=/home/TestData/nlp/token_classification_punctuation/output \ + +do_testing=false && \ + python punctuation_capitalization_train_evaluate.py \ + +do_training=false \ + +do_testing=true \ + ~model.train_ds \ + ~model.validation_ds \ + model.test_ds.ds_item=tmp_data \ + pretrained_model=/home/TestData/nlp/token_classification_punctuation/output/checkpoints/Punctuation_and_Capitalization.nemo \ + +model.train_ds.use_cache=false \ + +model.validation_ds.use_cache=false \ + +model.test_ds.use_cache=false \ + trainer.gpus=[0,1] \ + trainer.strategy=ddp \ + trainer.max_epochs=1 \ + exp_manager=null && \ + rm -r tmp_data && \ + rm -r "${label_vocab_dir}" && \ + rm -rf /home/TestData/nlp/token_classification_punctuation/output/*' + } + } + stage('Punctuation & Capitalization, Using model.common_datasest_parameters.{punct,capit}_label_ids') { + steps { + sh 'cd examples/nlp/token_classification && \ + mkdir -p tmp_data && \ + cp /home/TestData/nlp/token_classification_punctuation/*.txt tmp_data/ && \ + python punctuation_capitalization_train_evaluate.py \ + --config-path /home/TestData/nlp/token_classification + --config-name punctuation_capitalization_config_with_ids \ + model.train_ds.use_tarred_dataset=false \ + model.train_ds.ds_item=tmp_data \ + model.validation_ds.ds_item=tmp_data \ + model.test_ds.ds_item=tmp_data \ + model.language_model.pretrained_model_name=distilbert-base-uncased \ + +model.train_ds.use_cache=false \ + +model.validation_ds.use_cache=false \ + +model.test_ds.use_cache=false \ + trainer.gpus=[0,1] \ + trainer.strategy=ddp \ + trainer.max_epochs=1 \ + +exp_manager.explicit_log_dir=/home/TestData/nlp/token_classification_punctuation/output \ + +do_testing=false && \ + python punctuation_capitalization_train_evaluate.py \ + +do_training=false \ + +do_testing=true \ + ~model.train_ds \ + ~model.validation_ds \ + model.test_ds.ds_item=tmp_data \ + pretrained_model=/home/TestData/nlp/token_classification_punctuation/output/checkpoints/Punctuation_and_Capitalization.nemo \ + +model.train_ds.use_cache=false \ + +model.validation_ds.use_cache=false \ + +model.test_ds.use_cache=false \ + trainer.gpus=[0,1] \ + trainer.strategy=ddp \ + trainer.max_epochs=1 \ + exp_manager=null && \ + rm -r tmp_data && \ + rm -rf /home/TestData/nlp/token_classification_punctuation/output/*' + } + } } post { always { From c6b69282b99939e806827aa1913487cc5f242d8e Mon Sep 17 00:00:00 2001 From: PeganovAnton Date: Mon, 14 Feb 2022 17:30:11 +0300 Subject: [PATCH 4/9] Fix typo Signed-off-by: PeganovAnton --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index f411140765c9..254bcfea636d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1221,7 +1221,7 @@ pipeline { mkdir -p tmp_data && \ cp /home/TestData/nlp/token_classification_punctuation/*.txt tmp_data/ && \ python punctuation_capitalization_train_evaluate.py \ - --config-path /home/TestData/nlp/token_classification + --config-path /home/TestData/nlp/token_classification \ --config-name punctuation_capitalization_config_with_ids \ model.train_ds.use_tarred_dataset=false \ model.train_ds.ds_item=tmp_data \ From e0ffa4c71014c4c3612f6e27fbeac77ccc29b6c4 Mon Sep 17 00:00:00 2001 From: PeganovAnton Date: Mon, 14 Feb 2022 18:06:35 +0300 Subject: [PATCH 5/9] Fix typo Signed-off-by: PeganovAnton --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 254bcfea636d..2574e0a5a437 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1221,7 +1221,7 @@ pipeline { mkdir -p tmp_data && \ cp /home/TestData/nlp/token_classification_punctuation/*.txt tmp_data/ && \ python punctuation_capitalization_train_evaluate.py \ - --config-path /home/TestData/nlp/token_classification \ + --config-path /home/TestData/nlp/token_classification_punctuation \ --config-name punctuation_capitalization_config_with_ids \ model.train_ds.use_tarred_dataset=false \ model.train_ds.ds_item=tmp_data \ From fcc7c817f85a9a67d32fcbde512dad10b203d51d Mon Sep 17 00:00:00 2001 From: PeganovAnton Date: Mon, 14 Feb 2022 19:59:01 +0300 Subject: [PATCH 6/9] Do not create tmp directory Signed-off-by: PeganovAnton --- Jenkinsfile | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 2574e0a5a437..f3f2c3b172fc 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1171,8 +1171,6 @@ pipeline { stage('Punctuation & Capitalization, Using model.common_datasest_parameters.label_vocab_dir') { steps { sh 'cd examples/nlp/token_classification && \ - mkdir -p tmp_data && \ - cp /home/TestData/nlp/token_classification_punctuation/*.txt tmp_data/ && \ label_vocab_dir=label_vocab_dir && \ mkdir -p ${label_vocab_dir} && \ punct_label_vocab="${label_vocab_dir}/punct_label_vocab.csv" && \ @@ -1181,9 +1179,9 @@ pipeline { printf "O\nU\n" > "${capit_label_vocab}" && \ python punctuation_capitalization_train_evaluate.py \ model.train_ds.use_tarred_dataset=false \ - model.train_ds.ds_item=tmp_data \ - model.validation_ds.ds_item=tmp_data \ - model.test_ds.ds_item=tmp_data \ + model.train_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \ + model.validation_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \ + model.test_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \ model.language_model.pretrained_model_name=distilbert-base-uncased \ model.common_dataset_parameters.label_vocab_dir="${label_vocab_dir}" \ model.class_labels.punct_labels_file="$(basename "${punct_label_vocab}")" \ @@ -1201,7 +1199,7 @@ pipeline { +do_testing=true \ ~model.train_ds \ ~model.validation_ds \ - model.test_ds.ds_item=tmp_data \ + model.test_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \ pretrained_model=/home/TestData/nlp/token_classification_punctuation/output/checkpoints/Punctuation_and_Capitalization.nemo \ +model.train_ds.use_cache=false \ +model.validation_ds.use_cache=false \ @@ -1210,7 +1208,6 @@ pipeline { trainer.strategy=ddp \ trainer.max_epochs=1 \ exp_manager=null && \ - rm -r tmp_data && \ rm -r "${label_vocab_dir}" && \ rm -rf /home/TestData/nlp/token_classification_punctuation/output/*' } @@ -1218,15 +1215,13 @@ pipeline { stage('Punctuation & Capitalization, Using model.common_datasest_parameters.{punct,capit}_label_ids') { steps { sh 'cd examples/nlp/token_classification && \ - mkdir -p tmp_data && \ - cp /home/TestData/nlp/token_classification_punctuation/*.txt tmp_data/ && \ python punctuation_capitalization_train_evaluate.py \ --config-path /home/TestData/nlp/token_classification_punctuation \ --config-name punctuation_capitalization_config_with_ids \ model.train_ds.use_tarred_dataset=false \ - model.train_ds.ds_item=tmp_data \ - model.validation_ds.ds_item=tmp_data \ - model.test_ds.ds_item=tmp_data \ + model.train_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \ + model.validation_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \ + model.test_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \ model.language_model.pretrained_model_name=distilbert-base-uncased \ +model.train_ds.use_cache=false \ +model.validation_ds.use_cache=false \ @@ -1241,7 +1236,7 @@ pipeline { +do_testing=true \ ~model.train_ds \ ~model.validation_ds \ - model.test_ds.ds_item=tmp_data \ + model.test_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \ pretrained_model=/home/TestData/nlp/token_classification_punctuation/output/checkpoints/Punctuation_and_Capitalization.nemo \ +model.train_ds.use_cache=false \ +model.validation_ds.use_cache=false \ @@ -1250,7 +1245,6 @@ pipeline { trainer.strategy=ddp \ trainer.max_epochs=1 \ exp_manager=null && \ - rm -r tmp_data && \ rm -rf /home/TestData/nlp/token_classification_punctuation/output/*' } } From 62801d236aa8c20e76ecd1b88239d36a707c4f58 Mon Sep 17 00:00:00 2001 From: PeganovAnton Date: Sat, 7 May 2022 10:55:59 +0300 Subject: [PATCH 7/9] Fix parameter name Signed-off-by: PeganovAnton --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index c6c9e3571e72..2fd95abb5fbb 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1512,7 +1512,7 @@ pipeline { +model.train_ds.use_cache=false \ +model.validation_ds.use_cache=false \ +model.test_ds.use_cache=false \ - trainer.gpus=[0,1] \ + trainer.devices=[0,1] \ trainer.strategy=ddp \ trainer.max_epochs=1 \ +exp_manager.explicit_log_dir=/home/TestData/nlp/token_classification_punctuation/output \ @@ -1527,7 +1527,7 @@ pipeline { +model.train_ds.use_cache=false \ +model.validation_ds.use_cache=false \ +model.test_ds.use_cache=false \ - trainer.gpus=[0,1] \ + trainer.devices=[0,1] \ trainer.strategy=ddp \ trainer.max_epochs=1 \ exp_manager=null && \ From 47de46a1598ccbf133a298000ffbaee307cfadc7 Mon Sep 17 00:00:00 2001 From: PeganovAnton Date: Sat, 7 May 2022 11:51:24 +0300 Subject: [PATCH 8/9] Try CUDA launch blocking Signed-off-by: PeganovAnton --- Jenkinsfile | 143 ++++++++++++++++++++++++++++------------------------ 1 file changed, 77 insertions(+), 66 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 2fd95abb5fbb..d9fe59a1872d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1491,6 +1491,79 @@ pipeline { rm -rf /home/TestData/nlp/token_classification_punctuation/output/*' } } + } + post { + always { + sh 'pwd && ls nemo_* && rm -rf nemo_experiments && ls nemo_*' + } + } + } + stage('Punctuation & Capitalization tarred dataset') { + when { + anyOf { + branch 'main' + changeRequest target: 'main' + } + } + failFast true + stages { + stage('create and use tarred dataset') { + steps { + sh 'data_dir=/home/TestData/nlp/token_classification_punctuation && \ + usual_data=${data_dir}/wmt_wiki_10000 && \ + tarred_data=${data_dir}/train_tarred && \ + TIME=`date +"%Y-%m-%d-%T"` \ + output=${data_dir}/output_${TIME} && \ + tokens_in_batch=2000 && \ + max_seq_length=512 && \ + lm_model=distilbert-base-uncased && \ + python examples/nlp/token_classification/data/create_punctuation_capitalization_tarred_dataset.py \ + --text ${usual_data}/input.txt \ + --labels ${usual_data}/labels.txt \ + --output_dir ${tarred_data} \ + --tokens_in_batch ${tokens_in_batch} \ + --max_seq_length 512 \ + --lines_per_dataset_fragment 2000 \ + --num_batches_per_tarfile 5 \ + --tar_file_prefix punctuation_capitalization \ + --tokenizer_name ${lm_model} \ + --use_fast_tokenizer \ + --pad_label O \ + --n_jobs 3 && \ + echo "Number of tarred files in dataset:" && \ + ls ${tarred_data}/*.tar | wc -l && \ + echo "Label id files in dataset:" && \ + ls ${tarred_data}/*.csv && \ + metadata_file=${tarred_data}/metadata.punctuation_capitalization.tokens${tokens_in_batch}.max_seq_length${max_seq_length}.${lm_model}.json && \ + python examples/nlp/token_classification/punctuation_capitalization_train_evaluate.py \ + model.validation_ds.ds_item=/home/TestData/nlp/token_classification_punctuation/ \ + model.test_ds.ds_item=/home/TestData/nlp/token_classification_punctuation/ \ + model.train_ds.ds_item=${tarred_data} \ + model.language_model.pretrained_model_name=${lm_model} \ + model.train_ds.use_tarred_dataset=true \ + model.train_ds.tar_metadata_file=${metadata_file} \ + +model.train_ds.use_cache=false \ + +model.validation_ds.use_cache=false \ + +model.test_ds.use_cache=false \ + trainer.devices=[0,1] \ + trainer.accelerator="gpu" \ + trainer.strategy=ddp \ + trainer.max_epochs=1 \ + +exp_manager.explicit_log_dir=${output} && \ + rm -rf ${output}/* ${tarred_data}' + } + } + } + } + stage('Punctuation & Capitalization, Different ways of passing labels to model') { + when { + anyOf { + branch 'main' + changeRequest target: 'main' + } + } + failFast true + stages { stage('Punctuation & Capitalization, Using model.common_datasest_parameters.label_vocab_dir') { steps { sh 'cd examples/nlp/token_classification && \ @@ -1500,7 +1573,7 @@ pipeline { capit_label_vocab="${label_vocab_dir}/capit_label_vocab.csv" && \ printf "O\n,\n.\n?\n" > "${punct_label_vocab}" && \ printf "O\nU\n" > "${capit_label_vocab}" && \ - python punctuation_capitalization_train_evaluate.py \ + CUDA_LAUNCH_BLOCKING=1 python punctuation_capitalization_train_evaluate.py \ model.train_ds.use_tarred_dataset=false \ model.train_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \ model.validation_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \ @@ -1517,7 +1590,7 @@ pipeline { trainer.max_epochs=1 \ +exp_manager.explicit_log_dir=/home/TestData/nlp/token_classification_punctuation/output \ +do_testing=false && \ - python punctuation_capitalization_train_evaluate.py \ + CUDA_LAUNCH_BLOCKING=1 python punctuation_capitalization_train_evaluate.py \ +do_training=false \ +do_testing=true \ ~model.train_ds \ @@ -1538,7 +1611,7 @@ pipeline { stage('Punctuation & Capitalization, Using model.common_datasest_parameters.{punct,capit}_label_ids') { steps { sh 'cd examples/nlp/token_classification && \ - python punctuation_capitalization_train_evaluate.py \ + CUDA_LAUNCH_BLOCKING=1 python punctuation_capitalization_train_evaluate.py \ --config-path /home/TestData/nlp/token_classification_punctuation \ --config-name punctuation_capitalization_config_with_ids \ model.train_ds.use_tarred_dataset=false \ @@ -1554,7 +1627,7 @@ pipeline { trainer.max_epochs=1 \ +exp_manager.explicit_log_dir=/home/TestData/nlp/token_classification_punctuation/output \ +do_testing=false && \ - python punctuation_capitalization_train_evaluate.py \ + CUDA_LAUNCH_BLOCKING=1 python punctuation_capitalization_train_evaluate.py \ +do_training=false \ +do_testing=true \ ~model.train_ds \ @@ -1572,68 +1645,6 @@ pipeline { } } } - post { - always { - sh 'pwd && ls nemo_* && rm -rf nemo_experiments && ls nemo_*' - } - } - } - stage('Punctuation & Capitalization tarred dataset') { - when { - anyOf { - branch 'main' - changeRequest target: 'main' - } - } - failFast true - stages { - stage('create and use tarred dataset') { - steps { - sh 'data_dir=/home/TestData/nlp/token_classification_punctuation && \ - usual_data=${data_dir}/wmt_wiki_10000 && \ - tarred_data=${data_dir}/train_tarred && \ - TIME=`date +"%Y-%m-%d-%T"` \ - output=${data_dir}/output_${TIME} && \ - tokens_in_batch=2000 && \ - max_seq_length=512 && \ - lm_model=distilbert-base-uncased && \ - python examples/nlp/token_classification/data/create_punctuation_capitalization_tarred_dataset.py \ - --text ${usual_data}/input.txt \ - --labels ${usual_data}/labels.txt \ - --output_dir ${tarred_data} \ - --tokens_in_batch ${tokens_in_batch} \ - --max_seq_length 512 \ - --lines_per_dataset_fragment 2000 \ - --num_batches_per_tarfile 5 \ - --tar_file_prefix punctuation_capitalization \ - --tokenizer_name ${lm_model} \ - --use_fast_tokenizer \ - --pad_label O \ - --n_jobs 3 && \ - echo "Number of tarred files in dataset:" && \ - ls ${tarred_data}/*.tar | wc -l && \ - echo "Label id files in dataset:" && \ - ls ${tarred_data}/*.csv && \ - metadata_file=${tarred_data}/metadata.punctuation_capitalization.tokens${tokens_in_batch}.max_seq_length${max_seq_length}.${lm_model}.json && \ - python examples/nlp/token_classification/punctuation_capitalization_train_evaluate.py \ - model.validation_ds.ds_item=/home/TestData/nlp/token_classification_punctuation/ \ - model.test_ds.ds_item=/home/TestData/nlp/token_classification_punctuation/ \ - model.train_ds.ds_item=${tarred_data} \ - model.language_model.pretrained_model_name=${lm_model} \ - model.train_ds.use_tarred_dataset=true \ - model.train_ds.tar_metadata_file=${metadata_file} \ - +model.train_ds.use_cache=false \ - +model.validation_ds.use_cache=false \ - +model.test_ds.use_cache=false \ - trainer.devices=[0,1] \ - trainer.accelerator="gpu" \ - trainer.strategy=ddp \ - trainer.max_epochs=1 \ - +exp_manager.explicit_log_dir=${output} && \ - rm -rf ${output}/* ${tarred_data}' - } - } - } } stage('Punctuation & Capitalization inference') { when { From deaf46ec1b57de2acadf77fe81a84542a32e790c Mon Sep 17 00:00:00 2001 From: PeganovAnton Date: Sun, 8 May 2022 18:39:02 +0300 Subject: [PATCH 9/9] Fix labels errors Signed-off-by: PeganovAnton --- Jenkinsfile | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index d9fe59a1872d..16423f973580 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1611,9 +1611,16 @@ pipeline { stage('Punctuation & Capitalization, Using model.common_datasest_parameters.{punct,capit}_label_ids') { steps { sh 'cd examples/nlp/token_classification && \ + conf_path=/home/TestData/nlp/token_classification_punctuation && \ + conf_name=punctuation_capitalization_config_with_ids && \ + cp conf/punctuation_capitalization_config.yaml "${conf_path}/${conf_name}.yaml" && \ + sed -i $\'s/punct_label_ids: null/punct_label_ids: {O: 0, \\\',\\\': 1, .: 2, \\\'?\\\': 3}/\' \ + "${conf_path}/${conf_name}.yaml" && \ + sed -i $\'s/capit_label_ids: null/capit_label_ids: {O: 0, U: 1}/\' \ + "${conf_path}/${conf_name}.yaml" && \ CUDA_LAUNCH_BLOCKING=1 python punctuation_capitalization_train_evaluate.py \ - --config-path /home/TestData/nlp/token_classification_punctuation \ - --config-name punctuation_capitalization_config_with_ids \ + --config-path "${conf_path}" \ + --config-name "${conf_name}" \ model.train_ds.use_tarred_dataset=false \ model.train_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \ model.validation_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \ @@ -1622,7 +1629,7 @@ pipeline { +model.train_ds.use_cache=false \ +model.validation_ds.use_cache=false \ +model.test_ds.use_cache=false \ - trainer.gpus=[0,1] \ + trainer.devices=[0,1] \ trainer.strategy=ddp \ trainer.max_epochs=1 \ +exp_manager.explicit_log_dir=/home/TestData/nlp/token_classification_punctuation/output \ @@ -1637,11 +1644,12 @@ pipeline { +model.train_ds.use_cache=false \ +model.validation_ds.use_cache=false \ +model.test_ds.use_cache=false \ - trainer.gpus=[0,1] \ + trainer.devices=[0,1] \ trainer.strategy=ddp \ trainer.max_epochs=1 \ exp_manager=null && \ - rm -rf /home/TestData/nlp/token_classification_punctuation/output/*' + rm -rf /home/TestData/nlp/token_classification_punctuation/output/* && \ + rm "${conf_path}/${conf_name}.yaml"' } } }