diff --git a/data/downloadInputs.sh b/data/downloadInputs.sh index 0929f63..113db3b 100755 --- a/data/downloadInputs.sh +++ b/data/downloadInputs.sh @@ -1,52 +1,20 @@ #!/bin/bash -e -OLD_DIR=`pwd` -DIR=`dirname $0` -cd $DIR +this_dir=$(dirname "${0}") +input_dir="${this_dir}"/inputs -echo "Note that unzipping is slow." +echo "Downloading dictionary files." -L=en -echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-latest-pages-articles.xml -curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 -mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2 +# Downloading wikimedia dump files +for l in en fr it de es pt; do + download_url="http://dumps.wikimedia.org/${l}wiktionary/latest/${l}wiktionary-latest-pages-articles.xml.bz2" + echo "Downloading data for language '${l}' from: ${download_url}" + wget --no-verbose --show-progress "${download_url}" --output-document="${input_dir}/${l}wiktionary-pages-articles.xml.bz2" +done -echo "Downloading from: http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/" -CHEMNITZ=de-en.txt -curl -L --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz -mv ${CHEMNITZ}.gz inputs/de-en_chemnitz.txt.gz +# Downloading de-en from chemnitz.de +download_url='http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/de-en.txt.gz' +echo "Downloading from: ${download_url}" +wget --no-verbose --show-progress "${download_url}" --output-document="${input_dir}/de-en_chemnitz.txt.gz" -L=fr -echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-latest-pages-articles.xml -curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 -mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2 - -L=it -echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-latest-pages-articles.xml -curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 -mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2 - -L=de -echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-latest-pages-articles.xml -curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 -mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2 - -L=es -echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-latest-pages-articles.xml -curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 -mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2 - -L=pt -echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-latest-pages-articles.xml -curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 -mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2 - -echo "Done. Now run WiktionarySplitter to split apart enwiktionary." - -cd $OLD_DIR +echo "Done. Now run './WiktionarySplitter.sh' to split apart wiktionary dump files."