Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
ec9cb40
Fix RANK env variable check in global rank check
PeganovAnton Jan 11, 2022
0e2f8bc
Merge branch 'main' of https://github.com/NVIDIA/NeMo
PeganovAnton Jan 16, 2022
17e9932
Merge branch 'main' of https://github.com/NVIDIA/NeMo
PeganovAnton Jan 20, 2022
b3de88e
Merge branch 'main' of https://github.com/NVIDIA/NeMo
PeganovAnton Jan 22, 2022
69d97c4
Merge branch 'main' of https://github.com/NVIDIA/NeMo
PeganovAnton Jan 28, 2022
4a7d082
Merge branch 'main' of https://github.com/NVIDIA/NeMo
PeganovAnton Jan 30, 2022
a56db7a
Fix restoring from checkpoint with label vocab dir
PeganovAnton Jan 28, 2022
19884da
Merge branch 'main' of https://github.com/NVIDIA/NeMo
PeganovAnton Feb 5, 2022
e9e3926
Merge branch 'main' of https://github.com/NVIDIA/NeMo
PeganovAnton Feb 13, 2022
38cd3fb
Merge branch 'main' into fix/punctuation/working_with_label_vocab_dir…
PeganovAnton Feb 13, 2022
6409dd4
Add tests for various ways to pass label ids to model
PeganovAnton Feb 14, 2022
c6b6928
Fix typo
PeganovAnton Feb 14, 2022
e0ffa4c
Fix typo
PeganovAnton Feb 14, 2022
fcc7c81
Do not create tmp directory
PeganovAnton Feb 14, 2022
fa47fc3
Merge branch 'main' into fix/punctuation/working_with_label_vocab_dir…
PeganovAnton Feb 14, 2022
35b8058
Merge branch 'main' into fix/punctuation/working_with_label_vocab_dir…
okuchaiev Apr 5, 2022
d2777ef
Merge branch 'main' into fix/punctuation/working_with_label_vocab_dir…
PeganovAnton May 7, 2022
62801d2
Fix parameter name
PeganovAnton May 7, 2022
47de46a
Try CUDA launch blocking
PeganovAnton May 7, 2022
deaf46e
Fix labels errors
PeganovAnton May 8, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -1555,6 +1555,105 @@ pipeline {
}
}
}
stage('Punctuation & Capitalization, Different ways of passing labels to model') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
stages {
stage('Punctuation & Capitalization, Using model.common_datasest_parameters.label_vocab_dir') {
steps {
sh 'cd examples/nlp/token_classification && \
label_vocab_dir=label_vocab_dir && \
mkdir -p ${label_vocab_dir} && \
punct_label_vocab="${label_vocab_dir}/punct_label_vocab.csv" && \
capit_label_vocab="${label_vocab_dir}/capit_label_vocab.csv" && \
printf "O\n,\n.\n?\n" > "${punct_label_vocab}" && \
printf "O\nU\n" > "${capit_label_vocab}" && \
CUDA_LAUNCH_BLOCKING=1 python punctuation_capitalization_train_evaluate.py \
model.train_ds.use_tarred_dataset=false \
model.train_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \
model.validation_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \
model.test_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \
model.language_model.pretrained_model_name=distilbert-base-uncased \
model.common_dataset_parameters.label_vocab_dir="${label_vocab_dir}" \
model.class_labels.punct_labels_file="$(basename "${punct_label_vocab}")" \
model.class_labels.capit_labels_file="$(basename "${capit_label_vocab}")" \
+model.train_ds.use_cache=false \
+model.validation_ds.use_cache=false \
+model.test_ds.use_cache=false \
trainer.devices=[0,1] \
trainer.strategy=ddp \
trainer.max_epochs=1 \
+exp_manager.explicit_log_dir=/home/TestData/nlp/token_classification_punctuation/output \
+do_testing=false && \
CUDA_LAUNCH_BLOCKING=1 python punctuation_capitalization_train_evaluate.py \
+do_training=false \
+do_testing=true \
~model.train_ds \
~model.validation_ds \
model.test_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \
pretrained_model=/home/TestData/nlp/token_classification_punctuation/output/checkpoints/Punctuation_and_Capitalization.nemo \
+model.train_ds.use_cache=false \
+model.validation_ds.use_cache=false \
+model.test_ds.use_cache=false \
trainer.devices=[0,1] \
trainer.strategy=ddp \
trainer.max_epochs=1 \
exp_manager=null && \
rm -r "${label_vocab_dir}" && \
rm -rf /home/TestData/nlp/token_classification_punctuation/output/*'
}
}
stage('Punctuation & Capitalization, Using model.common_datasest_parameters.{punct,capit}_label_ids') {
steps {
sh 'cd examples/nlp/token_classification && \
conf_path=/home/TestData/nlp/token_classification_punctuation && \
conf_name=punctuation_capitalization_config_with_ids && \
cp conf/punctuation_capitalization_config.yaml "${conf_path}/${conf_name}.yaml" && \
sed -i $\'s/punct_label_ids: null/punct_label_ids: {O: 0, \\\',\\\': 1, .: 2, \\\'?\\\': 3}/\' \
"${conf_path}/${conf_name}.yaml" && \
sed -i $\'s/capit_label_ids: null/capit_label_ids: {O: 0, U: 1}/\' \
"${conf_path}/${conf_name}.yaml" && \
CUDA_LAUNCH_BLOCKING=1 python punctuation_capitalization_train_evaluate.py \
--config-path "${conf_path}" \
--config-name "${conf_name}" \
model.train_ds.use_tarred_dataset=false \
model.train_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \
model.validation_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \
model.test_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \
model.language_model.pretrained_model_name=distilbert-base-uncased \
+model.train_ds.use_cache=false \
+model.validation_ds.use_cache=false \
+model.test_ds.use_cache=false \
trainer.devices=[0,1] \
trainer.strategy=ddp \
trainer.max_epochs=1 \
+exp_manager.explicit_log_dir=/home/TestData/nlp/token_classification_punctuation/output \
+do_testing=false && \
CUDA_LAUNCH_BLOCKING=1 python punctuation_capitalization_train_evaluate.py \
+do_training=false \
+do_testing=true \
~model.train_ds \
~model.validation_ds \
model.test_ds.ds_item=/home/TestData/nlp/token_classification_punctuation \
pretrained_model=/home/TestData/nlp/token_classification_punctuation/output/checkpoints/Punctuation_and_Capitalization.nemo \
+model.train_ds.use_cache=false \
+model.validation_ds.use_cache=false \
+model.test_ds.use_cache=false \
trainer.devices=[0,1] \
trainer.strategy=ddp \
trainer.max_epochs=1 \
exp_manager=null && \
rm -rf /home/TestData/nlp/token_classification_punctuation/output/* && \
rm "${conf_path}/${conf_name}.yaml"'
}
}
}
}
stage('Punctuation & Capitalization inference') {
when {
anyOf {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -638,16 +638,16 @@ def _check_label_config_parameters(self) -> None:
)

def _extract_label_vocab_files_from_config(self) -> Tuple[Optional[Path], Optional[Path]]:
if self._cfg.common_dataset_parameters.label_vocab_dir is None:
if self._is_model_being_restored():
punct_label_vocab_file = self._cfg.class_labels.punct_labels_file
capit_label_vocab_file = self._cfg.class_labels.capit_labels_file
else:
punct_label_vocab_file, capit_label_vocab_file = None, None
if self._is_model_being_restored():
punct_label_vocab_file = self._cfg.class_labels.punct_labels_file
capit_label_vocab_file = self._cfg.class_labels.capit_labels_file
else:
label_vocab_dir = Path(self._cfg.common_dataset_parameters.label_vocab_dir).expanduser()
punct_label_vocab_file = label_vocab_dir / self._cfg.class_labels.punct_labels_file
capit_label_vocab_file = label_vocab_dir / self._cfg.class_labels.capit_labels_file
if self._cfg.common_dataset_parameters.label_vocab_dir is None:
punct_label_vocab_file, capit_label_vocab_file = None, None
else:
label_vocab_dir = Path(self._cfg.common_dataset_parameters.label_vocab_dir).expanduser()
punct_label_vocab_file = label_vocab_dir / self._cfg.class_labels.punct_labels_file
capit_label_vocab_file = label_vocab_dir / self._cfg.class_labels.capit_labels_file
return punct_label_vocab_file, capit_label_vocab_file

def _set_label_ids(self) -> None:
Expand Down