From 2404d2eb03488ad940d7a89e3149aee6ce1f1a8f Mon Sep 17 00:00:00 2001 From: Travis Bartley Date: Wed, 13 Mar 2024 10:36:51 -0700 Subject: [PATCH 1/5] jenkins fix Signed-off-by: Travis Bartley --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index d671a53c0..f9a225b27 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -252,7 +252,7 @@ pipeline { parallel { stage('L0: MR ITN grammars') { steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=mr --text="शून्य" --cache_dir ${MR_ITN_CACHE}' + sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=mr --text="शून्य " --cache_dir ${MR_TN_CACHE}' } } stage('L0: HY TN grammars') { @@ -262,7 +262,7 @@ pipeline { } stage('L0: HY ITN grammars') { steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hy --text="վեց" --cache_dir ${HY_TN_CACHE}' + sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hy --text="վեց " --cache_dir ${HY_TN_CACHE}' } } } From 1e561f841b030b81d68cf2d45d5edc59344ff952 Mon Sep 17 00:00:00 2001 From: Travis Bartley Date: Wed, 13 Mar 2024 10:59:27 -0700 Subject: [PATCH 2/5] removing armenian to troubleshoot jenkins Signed-off-by: Travis Bartley --- Jenkinsfile | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index f9a225b27..f5887ac80 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -250,16 +250,16 @@ pipeline { } failFast true parallel { - stage('L0: MR ITN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=mr --text="शून्य " --cache_dir ${MR_TN_CACHE}' - } - } - stage('L0: HY TN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hy --text="6" --cache_dir ${HY_TN_CACHE}' - } - } + // stage('L0: MR ITN grammars') { + // steps { + // sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=mr --text="शून्य " --cache_dir ${MR_TN_CACHE}' + // } + // } + // stage('L0: HY TN grammars') { + // steps { + // sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hy --text="6" --cache_dir ${HY_TN_CACHE}' + // } + // } stage('L0: HY ITN grammars') { steps { sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hy --text="վեց " --cache_dir ${HY_TN_CACHE}' @@ -339,11 +339,11 @@ pipeline { sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/mr/ -m "not pleasefixme" --cpu --tn_cache_dir ${MR_TN_CACHE}' } } - stage('L1: Run all HY TN/ITN tests (restore grammars from cache)') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/hy/ -m "not pleasefixme" --cpu --tn_cache_dir ${HY_TN_CACHE}' - } - } + // stage('L1: Run all HY TN/ITN tests (restore grammars from cache)') { + // steps { + // sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/hy/ -m "not pleasefixme" --cpu --tn_cache_dir ${HY_TN_CACHE}' + // } + // } } } From c3915a0622b118fb710b7f035de3061b671da172 Mon Sep 17 00:00:00 2001 From: Travis Bartley Date: Wed, 13 Mar 2024 11:03:47 -0700 Subject: [PATCH 3/5] removing armenian to troubleshoot jenkins Signed-off-by: Travis Bartley --- Jenkinsfile | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index f5887ac80..7f7a27270 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -250,21 +250,21 @@ pipeline { } failFast true parallel { - // stage('L0: MR ITN grammars') { + stage('L0: MR ITN grammars') { + steps { + sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=mr --text="शून्य " --cache_dir ${MR_TN_CACHE}' + } + } + // stage('L0: HY TN grammars') { // steps { - // sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=mr --text="शून्य " --cache_dir ${MR_TN_CACHE}' + // sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hy --text="6" --cache_dir ${HY_TN_CACHE}' // } // } - // stage('L0: HY TN grammars') { + // stage('L0: HY ITN grammars') { // steps { - // sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hy --text="6" --cache_dir ${HY_TN_CACHE}' + // sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hy --text="վեց " --cache_dir ${HY_TN_CACHE}' // } // } - stage('L0: HY ITN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hy --text="վեց " --cache_dir ${HY_TN_CACHE}' - } - } } } From f944fcb77317b454709429c91e2eced47e0e9811 Mon Sep 17 00:00:00 2001 From: Travis Bartley Date: Wed, 13 Mar 2024 11:09:20 -0700 Subject: [PATCH 4/5] missing _init_ for python Signed-off-by: Travis Bartley --- Jenkinsfile | 30 +++++++++---------- .../hy/data/numbers/__init__.py | 13 ++++++++ .../hy/data/ordinals/__init__.py | 13 ++++++++ .../hy/data/time/__init__.py | 13 ++++++++ .../hy/data/numbers/__init__.py | 13 ++++++++ .../hy/data/ordinal/__init__.py | 13 ++++++++ .../hy/data/time/__init__.py | 13 ++++++++ 7 files changed, 93 insertions(+), 15 deletions(-) create mode 100644 nemo_text_processing/inverse_text_normalization/hy/data/numbers/__init__.py create mode 100644 nemo_text_processing/inverse_text_normalization/hy/data/ordinals/__init__.py create mode 100644 nemo_text_processing/inverse_text_normalization/hy/data/time/__init__.py create mode 100644 nemo_text_processing/text_normalization/hy/data/numbers/__init__.py create mode 100644 nemo_text_processing/text_normalization/hy/data/ordinal/__init__.py create mode 100644 nemo_text_processing/text_normalization/hy/data/time/__init__.py diff --git a/Jenkinsfile b/Jenkinsfile index 7f7a27270..f9a225b27 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -255,16 +255,16 @@ pipeline { sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=mr --text="शून्य " --cache_dir ${MR_TN_CACHE}' } } - // stage('L0: HY TN grammars') { - // steps { - // sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hy --text="6" --cache_dir ${HY_TN_CACHE}' - // } - // } - // stage('L0: HY ITN grammars') { - // steps { - // sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hy --text="վեց " --cache_dir ${HY_TN_CACHE}' - // } - // } + stage('L0: HY TN grammars') { + steps { + sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hy --text="6" --cache_dir ${HY_TN_CACHE}' + } + } + stage('L0: HY ITN grammars') { + steps { + sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hy --text="վեց " --cache_dir ${HY_TN_CACHE}' + } + } } } @@ -339,11 +339,11 @@ pipeline { sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/mr/ -m "not pleasefixme" --cpu --tn_cache_dir ${MR_TN_CACHE}' } } - // stage('L1: Run all HY TN/ITN tests (restore grammars from cache)') { - // steps { - // sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/hy/ -m "not pleasefixme" --cpu --tn_cache_dir ${HY_TN_CACHE}' - // } - // } + stage('L1: Run all HY TN/ITN tests (restore grammars from cache)') { + steps { + sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/hy/ -m "not pleasefixme" --cpu --tn_cache_dir ${HY_TN_CACHE}' + } + } } } diff --git a/nemo_text_processing/inverse_text_normalization/hy/data/numbers/__init__.py b/nemo_text_processing/inverse_text_normalization/hy/data/numbers/__init__.py new file mode 100644 index 000000000..9df65818d --- /dev/null +++ b/nemo_text_processing/inverse_text_normalization/hy/data/numbers/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo_text_processing/inverse_text_normalization/hy/data/ordinals/__init__.py b/nemo_text_processing/inverse_text_normalization/hy/data/ordinals/__init__.py new file mode 100644 index 000000000..9df65818d --- /dev/null +++ b/nemo_text_processing/inverse_text_normalization/hy/data/ordinals/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo_text_processing/inverse_text_normalization/hy/data/time/__init__.py b/nemo_text_processing/inverse_text_normalization/hy/data/time/__init__.py new file mode 100644 index 000000000..9df65818d --- /dev/null +++ b/nemo_text_processing/inverse_text_normalization/hy/data/time/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo_text_processing/text_normalization/hy/data/numbers/__init__.py b/nemo_text_processing/text_normalization/hy/data/numbers/__init__.py new file mode 100644 index 000000000..9df65818d --- /dev/null +++ b/nemo_text_processing/text_normalization/hy/data/numbers/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo_text_processing/text_normalization/hy/data/ordinal/__init__.py b/nemo_text_processing/text_normalization/hy/data/ordinal/__init__.py new file mode 100644 index 000000000..9df65818d --- /dev/null +++ b/nemo_text_processing/text_normalization/hy/data/ordinal/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo_text_processing/text_normalization/hy/data/time/__init__.py b/nemo_text_processing/text_normalization/hy/data/time/__init__.py new file mode 100644 index 000000000..9df65818d --- /dev/null +++ b/nemo_text_processing/text_normalization/hy/data/time/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. From f7a13e8a4fa5a6f93410f6d7490b5713c21f2419 Mon Sep 17 00:00:00 2001 From: Travis Bartley Date: Wed, 13 Mar 2024 11:58:48 -0700 Subject: [PATCH 5/5] mislabled cache Signed-off-by: Travis Bartley --- .../text_normalization/hy/taggers/tokenize_and_classify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo_text_processing/text_normalization/hy/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/hy/taggers/tokenize_and_classify.py index 08e121f86..a4562b436 100644 --- a/nemo_text_processing/text_normalization/hy/taggers/tokenize_and_classify.py +++ b/nemo_text_processing/text_normalization/hy/taggers/tokenize_and_classify.py @@ -61,7 +61,7 @@ def __init__( far_file = None if cache_dir is not None and cache_dir != "None": os.makedirs(cache_dir, exist_ok=True) - far_file = os.path.join(cache_dir, f"_hy_itn_{input_case}.far") + far_file = os.path.join(cache_dir, f"_hy_tn_{input_case}.far") if not overwrite_cache and far_file and os.path.exists(far_file): self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"] logger.info(f"ClassifyFst.fst was restored from {far_file}.")