Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pipeline {
DE_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/11-18-23-0'
ES_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-27-23-0'
ES_EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-13-23-1'
ES_EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-13-23-2'
FR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/12-05-23-0'
HU_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
PT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
Expand Down Expand Up @@ -381,6 +381,39 @@ pipeline {
}
}

stage('L2: Sparrowhawk Tests') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
stages {
stage('L2: EN ITN Run Sparrowhawk test - Lower Cased Input') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" cd tools/text_processing_deployment && bash sh_test.sh --MODE="test_itn_grammars" --OVERWRITE_CACHE=False --FAR_PATH=${EN_TN_CACHE}/SH_ITN --LANGUAGE="en"'
sh 'CUDA_VISIBLE_DEVICES="" cd tests/nemo_text_processing/en && bash test_sparrowhawk_inverse_text_normalization.sh `pwd`'

}
}
stage('L2: EN ITN Run Sparrowhawk test - Cased Input') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" cd tools/text_processing_deployment && bash sh_test.sh --MODE="test_itn_grammars" --INPUT_CASE="cased" --OVERWRITE_CACHE=False --FAR_PATH=${EN_TN_CACHE}/SH_ITN_cased --LANGUAGE="en"'
sh 'CUDA_VISIBLE_DEVICES="" cd tests/nemo_text_processing/en && bash test_sparrowhawk_inverse_text_normalization_cased.sh `pwd`'

}
}
stage('L2: EN TN Run Sparrowhawk test') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" cd tools/text_processing_deployment && bash sh_test.sh --MODE="test_tn_grammars" --OVERWRITE_CACHE=False --FAR_PATH=${EN_TN_CACHE}/SH_TN --GRAMMARS="tn_grammars" --LANGUAGE="en" '
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should INPUT_CASE be "cased" for TN tests or is this a default there?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be cased, but none of the current pynini TN tests or the export script use INPUT_CASE "cased". Also no separate test cases seem to be specified for explicit cased TN.

sh 'CUDA_VISIBLE_DEVICES="" cd tests/nemo_text_processing/en && bash test_sparrowhawk_normalization.sh `pwd`'
}
}

}
}

stage('L2: NeMo text processing') {
when {
anyOf {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ eight hundred kilo watt hours~800 kWh
eight hundred kilowatts~800 kW
eight hundred megahertz~800 mhz
eight hundred ninety four c c~894 cc
eight hundred ninety kilowatts~890 kW`
eight hundred ninety kilowatts~890 kW
eight hundred ninety millimeters~890 mm
eight hundred ninety two square kilometers~892 km²
eight hundred seventy horsepower~870 hp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ s and p five hundred~S&P 500
seven eleven stores~7-eleven stores
r t x~RTX
cat five e~CAT5e
nvidia a one hundred~Nvidia A100
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why was this one removed?

Copy link
Collaborator Author

@anand-nv anand-nv Nov 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sparrowhawk output even though correct (is in whitelist.tsv) was failing shtest2. Pytest passes but not SH tests. Hence removed for now.

c u d n n~cuDNN
p c i e x eight~PCIe x8
l g a eleven fifty~LGA 1150
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#! /bin/sh

PROJECT_DIR=/workspace/tests
TEST_DIR=${1:-"/workspace/tests/en"}

runtest () {
input=$1
echo "INPUT is $input"
cd /workspace/sparrowhawk/documentation/grammars

# read test file
Expand All @@ -21,59 +22,63 @@ runtest () {
}

testITNCardinal() {
input=$PROJECT_DIR/en/data_inverse_text_normalization/test_cases_cardinal.txt
input=$TEST_DIR/data_inverse_text_normalization/test_cases_cardinal.txt
runtest $input
}

testITNDate() {
input=$PROJECT_DIR/en/data_inverse_text_normalization/test_cases_date.txt
input=$TEST_DIR/data_inverse_text_normalization/test_cases_date.txt
runtest $input
}

testITNDecimal() {
input=$PROJECT_DIR/en/data_inverse_text_normalization/test_cases_decimal.txt
input=$TEST_DIR/data_inverse_text_normalization/test_cases_decimal.txt
runtest $input
}

testITNElectronic() {
input=$PROJECT_DIR/en/data_inverse_text_normalization/test_cases_electronic.txt
input=$TEST_DIR/data_inverse_text_normalization/test_cases_electronic.txt
runtest $input
}

testITNOrdinal() {
input=$PROJECT_DIR/en/data_inverse_text_normalization/test_cases_ordinal.txt
input=$TEST_DIR/data_inverse_text_normalization/test_cases_ordinal.txt
runtest $input
}

testITNTime() {
input=$PROJECT_DIR/en/data_inverse_text_normalization/test_cases_time.txt
input=$TEST_DIR/data_inverse_text_normalization/test_cases_time.txt
runtest $input
}

testITNMeasure() {
input=$PROJECT_DIR/en/data_inverse_text_normalization/test_cases_measure.txt
input=$TEST_DIR/data_inverse_text_normalization/test_cases_measure.txt
runtest $input
}

testITNMoney() {
input=$PROJECT_DIR/en/data_inverse_text_normalization/test_cases_money.txt
input=$TEST_DIR/data_inverse_text_normalization/test_cases_money.txt
runtest $input
}

testITNWhitelist() {
input=$PROJECT_DIR/en/data_inverse_text_normalization/test_cases_whitelist.txt
input=$TEST_DIR/data_inverse_text_normalization/test_cases_whitelist.txt
runtest $input
}

testITNTelephone() {
input=$PROJECT_DIR/en/data_inverse_text_normalization/test_cases_telephone.txt
input=$TEST_DIR/data_inverse_text_normalization/test_cases_telephone.txt
runtest $input
}

testITNWord() {
input=$PROJECT_DIR/en/data_inverse_text_normalization/test_cases_word.txt
input=$TEST_DIR/data_inverse_text_normalization/test_cases_word.txt
runtest $input
}


# Remove all command-line arguments
shift $#

# Load shUnit2
. $PROJECT_DIR/../shunit2/shunit2
. /workspace/shunit2/shunit2
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#! /bin/sh

PROJECT_DIR=/workspace/tests
TEST_DIR=${1:-"/workspace/tests/en"}

runtest () {
input=$1
echo "INPUT is $input"

cd /workspace/sparrowhawk/documentation/grammars

# read test file
Expand All @@ -21,59 +23,63 @@ runtest () {
}

testITNCardinal() {
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_cardinal.txt
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_cardinal_cased.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_cardinal.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_cardinal_cased.txt
}

testITNDate() {
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_date.txt
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_date_cased.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_date.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_date_cased.txt
}

testITNDecimal() {
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_decimal.txt
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_decimal_cased.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_decimal.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_decimal_cased.txt
}

testITNElectronic() {
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_electronic.txt
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_electronic_cased.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_electronic.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_electronic_cased.txt
}

testITNOrdinal() {
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_ordinal.txt
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_ordinal_cased.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_ordinal.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_ordinal_cased.txt
}

testITNTime() {
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_time.txt
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_time_cased.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_time.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_time_cased.txt
}

testITNMeasure() {
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_measure.txt
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_measure_cased.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_measure.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_measure_cased.txt
}

testITNMoney() {
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_money.txt
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_money_cased.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_money.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_money_cased.txt
}

testITNWhitelist() {
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_whitelist.txt
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_whitelist_cased.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_whitelist.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_whitelist_cased.txt
}

testITNTelephone() {
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_telephone.txt
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_telephone_cased.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_telephone.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_telephone_cased.txt
}

testITNWord() {
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_word.txt
runtest $PROJECT_DIR/en/data_inverse_text_normalization/test_cases_word_cased.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_word.txt
runtest $TEST_DIR/data_inverse_text_normalization/test_cases_word_cased.txt
}


# Remove all command-line arguments
shift $#

# Load shUnit2
. $PROJECT_DIR/../shunit2/shunit2
. /workspace/shunit2/shunit2
44 changes: 23 additions & 21 deletions tests/nemo_text_processing/en/test_sparrowhawk_normalization.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#! /bin/sh

PROJECT_DIR=/workspace/tests
TEST_DIR=${1:-"/workspace/tests/en"}

runtest () {
input=$1
Expand All @@ -22,94 +21,97 @@ runtest () {
}

testTNSpecialText() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_special_text.txt
input=$TEST_DIR/data_text_normalization/test_cases_special_text.txt
runtest $input
}

testTNCardinal() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_cardinal.txt
input=$TEST_DIR/data_text_normalization/test_cases_cardinal.txt
runtest $input
}

testTNDate() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_date.txt
input=$TEST_DIR/data_text_normalization/test_cases_date.txt
runtest $input
}

testTNDecimal() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_decimal.txt
input=$TEST_DIR/data_text_normalization/test_cases_decimal.txt
runtest $input
}

testTNRange() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_range.txt
input=$TEST_DIR/data_text_normalization/test_cases_range.txt
runtest $input
}

testTNSerial() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_serial.txt
input=$TEST_DIR/data_text_normalization/test_cases_serial.txt
runtest $input
}

#testTNRoman() {
# input=$PROJECT_DIR/en/data_text_normalization/test_cases_roman.txt
# input=$TEST_DIR/data_text_normalization/test_cases_roman.txt
# runtest $input
#}

testTNElectronic() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_electronic.txt
input=$TEST_DIR/data_text_normalization/test_cases_electronic.txt
runtest $input
}

testTNFraction() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_fraction.txt
input=$TEST_DIR/data_text_normalization/test_cases_fraction.txt
runtest $input
}

testTNMoney() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_money.txt
input=$TEST_DIR/data_text_normalization/test_cases_money.txt
runtest $input
}

testTNOrdinal() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_ordinal.txt
input=$TEST_DIR/data_text_normalization/test_cases_ordinal.txt
runtest $input
}

testTNTelephone() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_telephone.txt
input=$TEST_DIR/data_text_normalization/test_cases_telephone.txt
runtest $input
}

testTNTime() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_time.txt
input=$TEST_DIR/data_text_normalization/test_cases_time.txt
runtest $input
}

testTNMeasure() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_measure.txt
input=$TEST_DIR/data_text_normalization/test_cases_measure.txt
runtest $input
}

testTNWhitelist() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_whitelist.txt
input=$TEST_DIR/data_text_normalization/test_cases_whitelist.txt
runtest $input
}

testTNWord() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_word.txt
input=$TEST_DIR/data_text_normalization/test_cases_word.txt
runtest $input
}

testTNAddress() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_address.txt
input=$TEST_DIR/data_text_normalization/test_cases_address.txt
runtest $input
}

testTNMath() {
input=$PROJECT_DIR/en/data_text_normalization/test_cases_math.txt
input=$TEST_DIR/data_text_normalization/test_cases_math.txt
runtest $input
}

# Remove all command-line arguments
shift $#

# Load shUnit2
. $PROJECT_DIR/../shunit2/shunit2
. /workspace/shunit2/shunit2
10 changes: 6 additions & 4 deletions tools/text_processing_deployment/docker/launch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.

MODE=${1:-"export"}
MODE=${1:-"interactive"}
LANGUAGE=${2:-"en"}
INPUT_CASE=${3:-"lower_cased"}
SCRIPT_DIR=$(cd $(dirname $0); pwd)
: ${CLASSIFY_DIR:="$SCRIPT_DIR/../$LANGUAGE/classify"}
: ${VERBALIZE_DIR:="$SCRIPT_DIR/../$LANGUAGE/verbalize"}
: ${CMD:=${4:-"/bin/bash"}}
GRAMMAR_DIR=${4:-${SCRIPT_DIR}"/.."}

: ${CLASSIFY_DIR:="$GRAMMAR_DIR/$LANGUAGE/classify"}
: ${VERBALIZE_DIR:="$GRAMMAR_DIR/$LANGUAGE/verbalize"}
: ${CMD:=${5:-"/bin/bash"}}

MOUNTS=""
MOUNTS+=" -v $CLASSIFY_DIR:/workspace/sparrowhawk/documentation/grammars/en_toy/classify"
Expand Down
Loading