Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/source/starthere/tutorials.rst
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,9 @@ To run a tutorial:
* - Text Processing
- Inverse Text Normalization for ASR
- `Inverse Text Normalization <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/text_processing/Inverse_Text_Normalization.ipynb>`_
* - Text Processing
- Inverse Text Normalization for ASR - Thutmose Tagger
- `Inverse Text Normalization with Thutmose Tagger <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/text_processing/ITN_with_Thutmose_Tagger.ipynb>`_
* - Text Processing
- Constructing Normalization Grammars with WFSTs
- `WFST Tutorial <https://github.com/NVIDIA/NeMo/blob/main/tutorials/text_processing/WFST_Tutorial.ipynb>`_
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@
from collections import Counter
from typing import Dict, Optional, TextIO, Tuple

from examples.nlp.text_normalization_as_tagging.dataset_preparation.utils import get_src_and_dst_for_alignment

from nemo.collections.nlp.data.text_normalization_as_tagging.utils import get_src_and_dst_for_alignment
from nemo.utils import logging

parser = ArgumentParser(description="Produce data for the ThutmoseTaggerModel")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
from os.path import isdir, join
from shutil import rmtree

from examples.nlp.text_normalization_as_tagging.dataset_preparation.utils import get_src_and_dst_for_alignment
from nemo.collections.nlp.data.text_normalization_as_tagging.utils import get_src_and_dst_for_alignment

parser = ArgumentParser(description='Split corpus to subcorpora for giza alignment')
parser.add_argument('--data_dir', type=str, required=True, help='Path to folder with data')
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

git clone https://github.com/moses-smt/giza-pp.git giza-pp
cd giza-pp
make
cd ..
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,10 @@ def realize_output(self, tags: List[Tag], semiotic_labels: List[str]) -> Tuple[s
output_tokens.append(frag.replace(" ", "").replace("_", ""))
else:
output_tokens.append(frag.strip().replace("_", ""))

output_str = " ".join(output_tokens)
output_str = re.sub(r" +", " ", output_str)
return (
" ".join(output_tokens),
output_str,
" ".join(self.source_tokens),
" ".join(out_tags_without_swap),
output_tags_with_swap_str,
Expand Down
Loading