Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ pipeline {
VI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
SV_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
ZH_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/07-27-23-0'
IT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
IT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-26-23-0'
DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
}
stages {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ def __init__(self, deterministic: bool = True):
# double digit
graph_tens = teen
graph_tens |= tens + (pynutil.delete('0') | graph_digit)
graph_tens |= tens_one
graph_tens |= tens_eight
graph_tens |= pynutil.add_weight(tens_one, -0.01)
graph_tens |= pynutil.add_weight(tens_eight, -0.01)

self.tens = graph_tens.optimize()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,6 @@
3544~tremila cinquecento quarantaquattro
1000~mille
1034500~un milione trentaquattromila cinquecento
3453243534~tremila quattrocento cinquantatre milioni duecento quarantatremila cinquecento trentaquattro
3453243534~tremila quattrocento cinquantatre milioni duecento quarantatremila cinquecento trentaquattro
38~trentotto
7 giorni sono una settimana~sette giorni sono una settimana
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
2,01 ₽~due rubli un copeca
3,23€~tre euro ventitre centesimi
4,2 £~quattro sterline venti penny
1 eur~un euro
1 eur~un euro
1 eur per il caffè~un euro per il caffè
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
12:30~dodici e mezza
05:15~cinque e un quarto
17:15:26~diciassette e un quarto e ventisei secondi
23:45~ventitre e quarantacinque minuti
12:30~dodici e trenta minuti~dodici e mezza
05:15~cinque e quindici minuti~cinque e un quarto
17:15:26~diciassette e quindici minuti e ventisei secondi~diciassette e un quarto e ventisei secondi
23:45~ventitre e quarantacinque minuti
03:38~tre e trentotto minuti
l'evento inizia alle 16:00~l'evento inizia alle sedici
10 changes: 7 additions & 3 deletions tests/nemo_text_processing/it/test_sparrowhawk_normalization.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,14 @@ runtest () {

# read test file
while read testcase; do
IFS='~' read written spoken <<< $testcase
# replace non breaking space with breaking space
denorm_pred=$(echo $written | normalizer_main --config=sparrowhawk_configuration.ascii_proto 2>&1 | tail -n 1)
IFS='~' read -a testcase_tokenized <<< $testcase
written=${testcase_tokenized[0]}
# only tests against first possible option when there are multiple shortest paths
spoken=${testcase_tokenized[1]}

# replace non breaking space with breaking space
denorm_pred=$(echo $written | normalizer_main --config=sparrowhawk_configuration.ascii_proto 2>&1 | tail -n 1 | sed 's/\xC2\xA0/ /g')

# trim white space
spoken="$(echo -e "${spoken}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
denorm_pred="$(echo -e "${denorm_pred}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
Expand Down
2 changes: 1 addition & 1 deletion tests/nemo_text_processing/it/test_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ class TestChar:
@pytest.mark.unit
def test_norm_char(self, test_input, expected):
preds = self.normalizer.normalize(test_input, punct_post_process=True)
assert expected == preds
assert preds in expected