From 7b31998cc4cbf4080b15c534e7901f66af8573f7 Mon Sep 17 00:00:00 2001 From: sfujiwara Date: Thu, 7 Nov 2019 04:48:10 +0900 Subject: [PATCH 01/11] new scripts --- .gitignore | 2 ++ Dockerfile | 3 ++- README.md | 38 ++++++++++++++++++++++++-------------- run.sh | 42 ++++++++++++++++++++++++++++++++++++++++++ run_docker.sh | 8 ++++++++ 5 files changed, 78 insertions(+), 15 deletions(-) create mode 100644 .gitignore create mode 100755 run.sh create mode 100755 run_docker.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6e07b6d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +ghrepos +result.txt diff --git a/Dockerfile b/Dockerfile index 0641309..a3becb2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,5 @@ +# TODO: Install git + FROM python:3.7.3-alpine3.9 # install java etc @@ -15,7 +17,6 @@ RUN mkdir -p /user/local/redpen RUN mv redpen-distribution-1.10.1 /usr/local/redpen # RUN mv redpen-redpen-1.10.2 /usr/local/redpen - # add redpen to PATH ENV PATH="/usr/local/redpen/bin:${PATH}" diff --git a/README.md b/README.md index ddb9a0b..4818153 100644 --- a/README.md +++ b/README.md @@ -4,20 +4,35 @@ # Usage +Basic usage is below: + +```bash +./run.sh ${REPOSITORY} ${BRANCH} ``` -$ git clone https://github.com/tensorflow/docs -$ cd docs/ -$ git clone https://github.com/tfug/proofreading proofreading -$ cd proofreading -$ bin/run-check # run text lint on the Docker container -$ bin/clear-output # remove temporary files +For example: + +```bash +./run.sh tensorflow/docs master ``` -If you would like to check one specific translated file, -please give the relative path from tensorflow/docs as argument of `bin/run-check` command as below. +The above command works + +1. Clone GitHub repository +2. Convert `*.ipynb` to `*.md` with `jupyter nbconvert` +3. Apply RedPen to `*.md` +We recommend you use the command as + +```bash +./run.sh tensorflow/docs master > result.txt ``` -$ bin/run-check site/ja/tutorials/keras/index.md + +to write the result to a text file. + +If you would like to use Docker, you can also execute the proofreading as + +```bash +./run_docker.sh tensorflow/docs master > result.txt ``` # Why use RedPen? @@ -26,8 +41,3 @@ We are working on translation with more than one person. So It is expected that [Redpen](http://redpen.cc/) is a proofreading tool to help writing documents that need to adhere to a writing standard. We can guarantee the quality of documents without lose writing speed while distributing translation tasks among multiple people. RedPen officially support English and Japanese, but we can use some of the functions with another language. - - -checking process consists of the following two parts. -1. run `jupyter nbconvert` to convert jupyter notebook to markdown -2. run `redpen` to read proofs diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..8394a39 --- /dev/null +++ b/run.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Check the number of arguments +if [ $# -ne 2 ]; then + echo "Error: Invalid arguments" 1>&2 + echo "Usage: ./bin/run.sh " + exit 1 +fi + +GITHUB_REPOSITORY=${1} +GITHUB_REPOSITORY_URL="https://github.com/${GITHUB_REPOSITORY}" +BRANCH=${2} + +echo "GITHUB_REPOSITORY: ${GITHUB_REPOSITORY}" +echo "GITHUB_REPOSITORY_URL: ${GITHUB_REPOSITORY_URL}" +echo "BRANCH: ${BRANCH}" + +TEMP_DIR="ghrepos" + +# Remove +rm -rf ${TEMP_DIR} +mkdir ${TEMP_DIR} + +# Clone GitHub repository +git clone -b ${BRANCH} ${GITHUB_REPOSITORY_URL} ${TEMP_DIR}/${GITHUB_REPOSITORY} + +# Convert all notebooks to markdowns +notebooks=`find ${TEMP_DIR}/${GITHUB_REPOSITORY}/site/ja -type f | grep .ipynb` +for notebook in ${notebooks}; do + jupyter nbconvert \ + --to markdown \ + ${notebook} +done + +# Apply RedPen to all markdowns +files=`find ${TEMP_DIR}/${GITHUB_REPOSITORY}/site/ja -type f | grep .md` +for file in ${files}; do + dir=`dirname ${file}` + filename=`basename ${file}` + echo "[${file}]" + redpen --result-format plain2 ${file} +done diff --git a/run_docker.sh b/run_docker.sh new file mode 100755 index 0000000..15bf149 --- /dev/null +++ b/run_docker.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +docker run \ + -it \ + --rm \ + -v $(PWD):/usr/local/documents \ + tfug/proofreading \ + /bin/ash ./run.sh ${1} ${2} From 38814557b6beda1d4924098af0fdb5942bf8398e Mon Sep 17 00:00:00 2001 From: sfujiwara Date: Thu, 7 Nov 2019 04:54:40 +0900 Subject: [PATCH 02/11] fix script --- run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run.sh b/run.sh index 8394a39..cbf83a4 100755 --- a/run.sh +++ b/run.sh @@ -2,7 +2,7 @@ # Check the number of arguments if [ $# -ne 2 ]; then - echo "Error: Invalid arguments" 1>&2 + echo "Error: Invalid arguments" echo "Usage: ./bin/run.sh " exit 1 fi From 9e95bbc5af7d2d44a2d2ed7544f2f6b1d82428e4 Mon Sep 17 00:00:00 2001 From: sfujiwara Date: Thu, 7 Nov 2019 05:00:36 +0900 Subject: [PATCH 03/11] fix a comment --- run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run.sh b/run.sh index cbf83a4..0ff8e85 100755 --- a/run.sh +++ b/run.sh @@ -17,7 +17,7 @@ echo "BRANCH: ${BRANCH}" TEMP_DIR="ghrepos" -# Remove +# Remove temporary directory rm -rf ${TEMP_DIR} mkdir ${TEMP_DIR} From b3018ebc0411a42526ef237d22bb1ef3b466edd2 Mon Sep 17 00:00:00 2001 From: sfujiwara Date: Thu, 7 Nov 2019 05:01:54 +0900 Subject: [PATCH 04/11] rm redundant lines --- run.sh | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/run.sh b/run.sh index 0ff8e85..9c3248a 100755 --- a/run.sh +++ b/run.sh @@ -27,16 +27,12 @@ git clone -b ${BRANCH} ${GITHUB_REPOSITORY_URL} ${TEMP_DIR}/${GITHUB_REPOSITORY} # Convert all notebooks to markdowns notebooks=`find ${TEMP_DIR}/${GITHUB_REPOSITORY}/site/ja -type f | grep .ipynb` for notebook in ${notebooks}; do - jupyter nbconvert \ - --to markdown \ - ${notebook} + jupyter nbconvert --to markdown ${notebook} done # Apply RedPen to all markdowns files=`find ${TEMP_DIR}/${GITHUB_REPOSITORY}/site/ja -type f | grep .md` for file in ${files}; do - dir=`dirname ${file}` - filename=`basename ${file}` echo "[${file}]" redpen --result-format plain2 ${file} done From 992dc0ee7f60863c62b067404c5c52e767b5f40d Mon Sep 17 00:00:00 2001 From: sfujiwara Date: Fri, 8 Nov 2019 00:43:04 +0900 Subject: [PATCH 05/11] fix to install git in docker image --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index a3becb2..43d0ca8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ FROM python:3.7.3-alpine3.9 # install java etc RUN apk update -RUN apk --no-cache add tar wget openjdk8 gcc pkgconfig zeromq zeromq-dev musl-dev +RUN apk --no-cache add tar wget openjdk8 gcc pkgconfig zeromq zeromq-dev musl-dev git # install python package RUN pip install jupyter click html2text From 3bbb5ca71593f6d188e39393034bd051b400660f Mon Sep 17 00:00:00 2001 From: sfujiwara Date: Fri, 8 Nov 2019 00:43:34 +0900 Subject: [PATCH 06/11] fix scripts --- README.md | 30 +++++++++++++----------------- bin/run | 45 +++++++++++++++++++++++++++++++++++++++++++++ bin/run-docker | 8 ++++++++ 3 files changed, 66 insertions(+), 17 deletions(-) create mode 100755 bin/run create mode 100755 bin/run-docker diff --git a/README.md b/README.md index 4818153..3f3e26a 100644 --- a/README.md +++ b/README.md @@ -2,37 +2,33 @@ [tensorflow/docs](https://github.com/tensorflow/docs)の日本語訳の表記ゆれ等をチェックするツールです。 -# Usage +## Usage -Basic usage is below: - -```bash -./run.sh ${REPOSITORY} ${BRANCH} -``` -For example: - -```bash -./run.sh tensorflow/docs master -``` - -The above command works +This tool works to 1. Clone GitHub repository 2. Convert `*.ipynb` to `*.md` with `jupyter nbconvert` 3. Apply RedPen to `*.md` +4. Output the result to a text file + +Basic usage is as below: + +```bash +./bin/run ${REPOSITORY} ${BRANCH} ${OUTPUT_FILE} +``` -We recommend you use the command as +### Without Docker ```bash -./run.sh tensorflow/docs master > result.txt +./bin/run tensorflow/docs master result.txt ``` -to write the result to a text file. +### With Docker If you would like to use Docker, you can also execute the proofreading as ```bash -./run_docker.sh tensorflow/docs master > result.txt +./bin/run-docker tensorflow/docs master result.txt ``` # Why use RedPen? diff --git a/bin/run b/bin/run new file mode 100755 index 0000000..67b8baa --- /dev/null +++ b/bin/run @@ -0,0 +1,45 @@ +#!/bin/bash + +# Check the number of arguments +if [ $# -ne 3 ]; then + echo "Error: Invalid arguments" + echo "Usage: ./bin/run.sh " + exit 1 +fi + +GITHUB_REPOSITORY=${1} +GITHUB_REPOSITORY_URL="https://github.com/${GITHUB_REPOSITORY}" +BRANCH=${2} +OUTPUT_FILE=${3} + +echo "GITHUB_REPOSITORY: ${GITHUB_REPOSITORY}" +echo "GITHUB_REPOSITORY_URL: ${GITHUB_REPOSITORY_URL}" +echo "BRANCH: ${BRANCH}" +echo "OUTPUT_FILE: ${OUTPUT_FILE}" + +TEMP_DIR="ghrepos" + +# Remove temporary directory +rm -rf ${TEMP_DIR} +mkdir ${TEMP_DIR} + +# Clone GitHub repository +git clone -b ${BRANCH} ${GITHUB_REPOSITORY_URL} ${TEMP_DIR}/${GITHUB_REPOSITORY} + +# Convert all notebooks to markdowns +notebooks=`find ${TEMP_DIR}/${GITHUB_REPOSITORY}/site/ja -type f | grep .ipynb` +for notebook in ${notebooks}; do + jupyter nbconvert --to markdown ${notebook} +done + +# Create output file +echo "GITHUB_REPOSITORY: ${GITHUB_REPOSITORY}" > "${OUTPUT_FILE}" +echo "BRANCH: ${BRANCH}" >> "${OUTPUT_FILE}" +echo "" >> "${OUTPUT_FILE}" + +# Apply RedPen to all markdowns +files=`find ${TEMP_DIR}/${GITHUB_REPOSITORY}/site/ja -type f | grep .md` +for file in ${files}; do + echo "[${file}]" >> "${OUTPUT_FILE}" + redpen --result-format plain2 ${file} >> "${OUTPUT_FILE}" +done diff --git a/bin/run-docker b/bin/run-docker new file mode 100755 index 0000000..b11c21d --- /dev/null +++ b/bin/run-docker @@ -0,0 +1,8 @@ +#!/bin/bash + +docker run \ + -it \ + --rm \ + -v $(PWD):/usr/local/documents \ + tfug/proofreading \ + /bin/ash ./bin/run ${1} ${2} ${3} From 480ebe2eb2c7711711147e03bff389b18c552b21 Mon Sep 17 00:00:00 2001 From: sfujiwara Date: Fri, 8 Nov 2019 00:43:51 +0900 Subject: [PATCH 07/11] fix .gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 6e07b6d..6308183 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ +# Temporary directory to clone GitHub repository ghrepos -result.txt From a30bd9721d6a7979e15ff0c7ba0030b2298b919d Mon Sep 17 00:00:00 2001 From: sfujiwara Date: Fri, 8 Nov 2019 00:44:31 +0900 Subject: [PATCH 08/11] fix line breaks --- bin/build-docker | 2 +- bin/clear-output | 2 ++ bin/run-check | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/bin/build-docker b/bin/build-docker index d8b1457..960c1fa 100755 --- a/bin/build-docker +++ b/bin/build-docker @@ -1,3 +1,3 @@ #!/bin/bash -docker build --no-cache -t tfug/proofreading . +docker build --no-cache -t tfug/proofreading . diff --git a/bin/clear-output b/bin/clear-output index 562a95f..96657a9 100755 --- a/bin/clear-output +++ b/bin/clear-output @@ -1 +1,3 @@ +#!/bin/bash + rm -r output diff --git a/bin/run-check b/bin/run-check index fcc6270..013b2f2 100755 --- a/bin/run-check +++ b/bin/run-check @@ -1,4 +1,5 @@ #!/bin/bash + docker run \ -it \ --rm \ From 5e65aa76ddc9f6ebe3d9712edaf26ab3b97d7e64 Mon Sep 17 00:00:00 2001 From: sfujiwara Date: Fri, 8 Nov 2019 00:45:16 +0900 Subject: [PATCH 09/11] rename --- run.sh | 38 -------------------------------------- run_docker.sh | 8 -------- 2 files changed, 46 deletions(-) delete mode 100755 run.sh delete mode 100755 run_docker.sh diff --git a/run.sh b/run.sh deleted file mode 100755 index 9c3248a..0000000 --- a/run.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - -# Check the number of arguments -if [ $# -ne 2 ]; then - echo "Error: Invalid arguments" - echo "Usage: ./bin/run.sh " - exit 1 -fi - -GITHUB_REPOSITORY=${1} -GITHUB_REPOSITORY_URL="https://github.com/${GITHUB_REPOSITORY}" -BRANCH=${2} - -echo "GITHUB_REPOSITORY: ${GITHUB_REPOSITORY}" -echo "GITHUB_REPOSITORY_URL: ${GITHUB_REPOSITORY_URL}" -echo "BRANCH: ${BRANCH}" - -TEMP_DIR="ghrepos" - -# Remove temporary directory -rm -rf ${TEMP_DIR} -mkdir ${TEMP_DIR} - -# Clone GitHub repository -git clone -b ${BRANCH} ${GITHUB_REPOSITORY_URL} ${TEMP_DIR}/${GITHUB_REPOSITORY} - -# Convert all notebooks to markdowns -notebooks=`find ${TEMP_DIR}/${GITHUB_REPOSITORY}/site/ja -type f | grep .ipynb` -for notebook in ${notebooks}; do - jupyter nbconvert --to markdown ${notebook} -done - -# Apply RedPen to all markdowns -files=`find ${TEMP_DIR}/${GITHUB_REPOSITORY}/site/ja -type f | grep .md` -for file in ${files}; do - echo "[${file}]" - redpen --result-format plain2 ${file} -done diff --git a/run_docker.sh b/run_docker.sh deleted file mode 100755 index 15bf149..0000000 --- a/run_docker.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -docker run \ - -it \ - --rm \ - -v $(PWD):/usr/local/documents \ - tfug/proofreading \ - /bin/ash ./run.sh ${1} ${2} From eb48a5b114470213db47b4cf07b729621bab19af Mon Sep 17 00:00:00 2001 From: sfujiwara Date: Fri, 8 Nov 2019 01:04:34 +0900 Subject: [PATCH 10/11] fix readme --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 3f3e26a..3d58626 100644 --- a/README.md +++ b/README.md @@ -14,13 +14,13 @@ This tool works to Basic usage is as below: ```bash -./bin/run ${REPOSITORY} ${BRANCH} ${OUTPUT_FILE} +$ ./bin/run ${REPOSITORY} ${BRANCH} ${OUTPUT_FILE} ``` ### Without Docker ```bash -./bin/run tensorflow/docs master result.txt +$ ./bin/run tensorflow/docs master result.txt ``` ### With Docker @@ -28,12 +28,12 @@ Basic usage is as below: If you would like to use Docker, you can also execute the proofreading as ```bash -./bin/run-docker tensorflow/docs master result.txt +$ ./bin/run-docker tensorflow/docs master result.txt ``` -# Why use RedPen? +## Why use RedPen? We are working on translation with more than one person. So It is expected that a lot of orthographical variants will occur. [Redpen](http://redpen.cc/) is a proofreading tool to help writing documents that need to adhere to a writing standard. We can guarantee the quality of documents without lose writing speed while distributing translation tasks among multiple people. -RedPen officially support English and Japanese, but we can use some of the functions with another language. +RedPen officially support English and Japanese, but we can use some of the functions with another language. \ No newline at end of file From 1cd699d358ab47956f9c1e4d1fc4569be4965d92 Mon Sep 17 00:00:00 2001 From: shuhei-a-fujiwara Date: Mon, 18 Nov 2019 19:38:01 +0900 Subject: [PATCH 11/11] rm some files --- README.md | 2 +- bin/clear-output | 3 -- bin/run-check | 8 ----- proofreading.sh | 80 ------------------------------------------- src/html_converter.py | 32 ----------------- 5 files changed, 1 insertion(+), 124 deletions(-) delete mode 100755 bin/clear-output delete mode 100755 bin/run-check delete mode 100644 proofreading.sh delete mode 100644 src/html_converter.py diff --git a/README.md b/README.md index 3d58626..3ebc31b 100644 --- a/README.md +++ b/README.md @@ -36,4 +36,4 @@ $ ./bin/run-docker tensorflow/docs master result.txt We are working on translation with more than one person. So It is expected that a lot of orthographical variants will occur. [Redpen](http://redpen.cc/) is a proofreading tool to help writing documents that need to adhere to a writing standard. We can guarantee the quality of documents without lose writing speed while distributing translation tasks among multiple people. -RedPen officially support English and Japanese, but we can use some of the functions with another language. \ No newline at end of file +RedPen officially support English and Japanese, but we can use some of the functions with another language. diff --git a/bin/clear-output b/bin/clear-output deleted file mode 100755 index 96657a9..0000000 --- a/bin/clear-output +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -rm -r output diff --git a/bin/run-check b/bin/run-check deleted file mode 100755 index 013b2f2..0000000 --- a/bin/run-check +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -docker run \ --it \ ---rm \ --v $(PWD)/..:/usr/local/documents \ -tfug/proofreading \ -/bin/ash proofreading/proofreading.sh $@ diff --git a/proofreading.sh b/proofreading.sh deleted file mode 100644 index aa9f3cb..0000000 --- a/proofreading.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/bin/bash - -# convert one ipynb to markdown and save to output directory -function create_markdown() { - dir=`dirname ${file}` - output_dir=${dir//site/proofreading\/output} - echo $output_dir - mkdir -p ${output_dir} - jupyter nbconvert --to markdown ${file} --output-dir ${output_dir} -} - -# find ipynb files, convert to markdown and save to output directory -function create_markdowns() { - files=`find site/ja -maxdepth 5 -type f |grep .ipynb` - for file in ${files}; do - create_markdown - done -} - -# convert one html to markdown and save to output directory -function create_markdown_from_html() { - dir=`dirname ${file}` - output_dir=${dir//site/proofreading\/output} - echo $output_dir - mkdir -p ${output_dir} - python proofreading/src/html_converter.py --input_file ${file} --output_dir ${output_dir} -} - -# find html files, convert to markdown and save to output directory -function create_markdowns_from_html() { - files=`find site/ja -maxdepth 5 -type f |grep .html` - for file in ${files}; do - create_markdown_from_html - done -} -function copy_markdown() { - files=`find site/${lang} -maxdepth 5 -type f |grep .md` - for file in ${files}; do - dir=`dirname ${file}` - output_dir=${dir//site/proofreading\/output} - echo $output_dir - mkdir -p ${output_dir} - cp ${file} ${output_dir}/ - done -} - -# execute redpen check to markdown files in output directory -function exec_redpen() { - docs=`find proofreading/output/${lang} -maxdepth 3 -type f |grep .md` - redpen --conf proofreading/redpen-conf.xml ${docs} -} - -lang=ja - -# 引数の数が1つあったらその引数で与えられたファイルのみチェックする -if [ $# -eq 1 ]; then - file=$1 - echo "check 1 file: ${file}" - if [ ${file##*.} = "ipynb" ]; then - create_markdown - base_filename=${file##*/} - redpen --conf proofreading/redpen-conf.xml ${output_dir}/${base_filename%.*}.md - elif [ ${file##*.} = "html" ]; then - create_markdown_from_html - base_filename=${file##*/} - redpen --conf proofreading/redpen-conf.xml ${output_dir}/${base_filename%.*}.md - elif [ ${file##*.} = "md" ]; then - redpen --conf proofreading/redpen-conf.xml ${file} - else - echo "invalid file type" - exit 1 - fi -# 引数が1つ以外だったら全ファイルチェックする -else - echo "check all files" - create_markdowns - create_markdowns_from_html - copy_markdown - exec_redpen -fi diff --git a/src/html_converter.py b/src/html_converter.py deleted file mode 100644 index 46d4f83..0000000 --- a/src/html_converter.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -import sys -import click -import html2text - -@click.command() -@click.option('--input_file', '-i', default=None) -@click.option('--output_dir', '-o', default=None) -def main(input_file, output_dir): - if input_file is None or output_dir is None: - print('invalid arguments') - sys.exit(1) - else: - try: - with open(input_file, 'r') as f: - html = f.read() - except: - print(f'{input_file} does not exist') - sys.exit(1) - text = html2text.html2text(html) - output_file = '{}.md'.format(os.path.basename(input_file).split('.')[0]) - output = os.path.join(output_dir, output_file) - try: - with open(output, 'w') as f: - f.write(text) - except: - print('output path does not exist') - print(f'converted {input_file} to {output_dir}') - sys.exit(0) - -if __name__ == '__main__': - main()