diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6308183 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# Temporary directory to clone GitHub repository +ghrepos diff --git a/Dockerfile b/Dockerfile index 0641309..43d0ca8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,10 @@ +# TODO: Install git + FROM python:3.7.3-alpine3.9 # install java etc RUN apk update -RUN apk --no-cache add tar wget openjdk8 gcc pkgconfig zeromq zeromq-dev musl-dev +RUN apk --no-cache add tar wget openjdk8 gcc pkgconfig zeromq zeromq-dev musl-dev git # install python package RUN pip install jupyter click html2text @@ -15,7 +17,6 @@ RUN mkdir -p /user/local/redpen RUN mv redpen-distribution-1.10.1 /usr/local/redpen # RUN mv redpen-redpen-1.10.2 /usr/local/redpen - # add redpen to PATH ENV PATH="/usr/local/redpen/bin:${PATH}" diff --git a/README.md b/README.md index ddb9a0b..3ebc31b 100644 --- a/README.md +++ b/README.md @@ -2,32 +2,38 @@ [tensorflow/docs](https://github.com/tensorflow/docs)の日本語訳の表記ゆれ等をチェックするツールです。 -# Usage +## Usage -``` -$ git clone https://github.com/tensorflow/docs -$ cd docs/ -$ git clone https://github.com/tfug/proofreading proofreading -$ cd proofreading -$ bin/run-check # run text lint on the Docker container -$ bin/clear-output # remove temporary files +This tool works to + +1. Clone GitHub repository +2. Convert `*.ipynb` to `*.md` with `jupyter nbconvert` +3. Apply RedPen to `*.md` +4. Output the result to a text file + +Basic usage is as below: + +```bash +$ ./bin/run ${REPOSITORY} ${BRANCH} ${OUTPUT_FILE} ``` -If you would like to check one specific translated file, -please give the relative path from tensorflow/docs as argument of `bin/run-check` command as below. +### Without Docker +```bash +$ ./bin/run tensorflow/docs master result.txt ``` -$ bin/run-check site/ja/tutorials/keras/index.md + +### With Docker + +If you would like to use Docker, you can also execute the proofreading as + +```bash +$ ./bin/run-docker tensorflow/docs master result.txt ``` -# Why use RedPen? +## Why use RedPen? We are working on translation with more than one person. So It is expected that a lot of orthographical variants will occur. [Redpen](http://redpen.cc/) is a proofreading tool to help writing documents that need to adhere to a writing standard. We can guarantee the quality of documents without lose writing speed while distributing translation tasks among multiple people. RedPen officially support English and Japanese, but we can use some of the functions with another language. - - -checking process consists of the following two parts. -1. run `jupyter nbconvert` to convert jupyter notebook to markdown -2. run `redpen` to read proofs diff --git a/bin/build-docker b/bin/build-docker index d8b1457..960c1fa 100755 --- a/bin/build-docker +++ b/bin/build-docker @@ -1,3 +1,3 @@ #!/bin/bash -docker build --no-cache -t tfug/proofreading . +docker build --no-cache -t tfug/proofreading . diff --git a/bin/clear-output b/bin/clear-output deleted file mode 100755 index 562a95f..0000000 --- a/bin/clear-output +++ /dev/null @@ -1 +0,0 @@ -rm -r output diff --git a/bin/run b/bin/run new file mode 100755 index 0000000..67b8baa --- /dev/null +++ b/bin/run @@ -0,0 +1,45 @@ +#!/bin/bash + +# Check the number of arguments +if [ $# -ne 3 ]; then + echo "Error: Invalid arguments" + echo "Usage: ./bin/run.sh " + exit 1 +fi + +GITHUB_REPOSITORY=${1} +GITHUB_REPOSITORY_URL="https://github.com/${GITHUB_REPOSITORY}" +BRANCH=${2} +OUTPUT_FILE=${3} + +echo "GITHUB_REPOSITORY: ${GITHUB_REPOSITORY}" +echo "GITHUB_REPOSITORY_URL: ${GITHUB_REPOSITORY_URL}" +echo "BRANCH: ${BRANCH}" +echo "OUTPUT_FILE: ${OUTPUT_FILE}" + +TEMP_DIR="ghrepos" + +# Remove temporary directory +rm -rf ${TEMP_DIR} +mkdir ${TEMP_DIR} + +# Clone GitHub repository +git clone -b ${BRANCH} ${GITHUB_REPOSITORY_URL} ${TEMP_DIR}/${GITHUB_REPOSITORY} + +# Convert all notebooks to markdowns +notebooks=`find ${TEMP_DIR}/${GITHUB_REPOSITORY}/site/ja -type f | grep .ipynb` +for notebook in ${notebooks}; do + jupyter nbconvert --to markdown ${notebook} +done + +# Create output file +echo "GITHUB_REPOSITORY: ${GITHUB_REPOSITORY}" > "${OUTPUT_FILE}" +echo "BRANCH: ${BRANCH}" >> "${OUTPUT_FILE}" +echo "" >> "${OUTPUT_FILE}" + +# Apply RedPen to all markdowns +files=`find ${TEMP_DIR}/${GITHUB_REPOSITORY}/site/ja -type f | grep .md` +for file in ${files}; do + echo "[${file}]" >> "${OUTPUT_FILE}" + redpen --result-format plain2 ${file} >> "${OUTPUT_FILE}" +done diff --git a/bin/run-check b/bin/run-check deleted file mode 100755 index fcc6270..0000000 --- a/bin/run-check +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -docker run \ --it \ ---rm \ --v $(PWD)/..:/usr/local/documents \ -tfug/proofreading \ -/bin/ash proofreading/proofreading.sh $@ diff --git a/bin/run-docker b/bin/run-docker new file mode 100755 index 0000000..b11c21d --- /dev/null +++ b/bin/run-docker @@ -0,0 +1,8 @@ +#!/bin/bash + +docker run \ + -it \ + --rm \ + -v $(PWD):/usr/local/documents \ + tfug/proofreading \ + /bin/ash ./bin/run ${1} ${2} ${3} diff --git a/proofreading.sh b/proofreading.sh deleted file mode 100644 index aa9f3cb..0000000 --- a/proofreading.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/bin/bash - -# convert one ipynb to markdown and save to output directory -function create_markdown() { - dir=`dirname ${file}` - output_dir=${dir//site/proofreading\/output} - echo $output_dir - mkdir -p ${output_dir} - jupyter nbconvert --to markdown ${file} --output-dir ${output_dir} -} - -# find ipynb files, convert to markdown and save to output directory -function create_markdowns() { - files=`find site/ja -maxdepth 5 -type f |grep .ipynb` - for file in ${files}; do - create_markdown - done -} - -# convert one html to markdown and save to output directory -function create_markdown_from_html() { - dir=`dirname ${file}` - output_dir=${dir//site/proofreading\/output} - echo $output_dir - mkdir -p ${output_dir} - python proofreading/src/html_converter.py --input_file ${file} --output_dir ${output_dir} -} - -# find html files, convert to markdown and save to output directory -function create_markdowns_from_html() { - files=`find site/ja -maxdepth 5 -type f |grep .html` - for file in ${files}; do - create_markdown_from_html - done -} -function copy_markdown() { - files=`find site/${lang} -maxdepth 5 -type f |grep .md` - for file in ${files}; do - dir=`dirname ${file}` - output_dir=${dir//site/proofreading\/output} - echo $output_dir - mkdir -p ${output_dir} - cp ${file} ${output_dir}/ - done -} - -# execute redpen check to markdown files in output directory -function exec_redpen() { - docs=`find proofreading/output/${lang} -maxdepth 3 -type f |grep .md` - redpen --conf proofreading/redpen-conf.xml ${docs} -} - -lang=ja - -# 引数の数が1つあったらその引数で与えられたファイルのみチェックする -if [ $# -eq 1 ]; then - file=$1 - echo "check 1 file: ${file}" - if [ ${file##*.} = "ipynb" ]; then - create_markdown - base_filename=${file##*/} - redpen --conf proofreading/redpen-conf.xml ${output_dir}/${base_filename%.*}.md - elif [ ${file##*.} = "html" ]; then - create_markdown_from_html - base_filename=${file##*/} - redpen --conf proofreading/redpen-conf.xml ${output_dir}/${base_filename%.*}.md - elif [ ${file##*.} = "md" ]; then - redpen --conf proofreading/redpen-conf.xml ${file} - else - echo "invalid file type" - exit 1 - fi -# 引数が1つ以外だったら全ファイルチェックする -else - echo "check all files" - create_markdowns - create_markdowns_from_html - copy_markdown - exec_redpen -fi diff --git a/src/html_converter.py b/src/html_converter.py deleted file mode 100644 index 46d4f83..0000000 --- a/src/html_converter.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -import sys -import click -import html2text - -@click.command() -@click.option('--input_file', '-i', default=None) -@click.option('--output_dir', '-o', default=None) -def main(input_file, output_dir): - if input_file is None or output_dir is None: - print('invalid arguments') - sys.exit(1) - else: - try: - with open(input_file, 'r') as f: - html = f.read() - except: - print(f'{input_file} does not exist') - sys.exit(1) - text = html2text.html2text(html) - output_file = '{}.md'.format(os.path.basename(input_file).split('.')[0]) - output = os.path.join(output_dir, output_file) - try: - with open(output, 'w') as f: - f.write(text) - except: - print('output path does not exist') - print(f'converted {input_file} to {output_dir}') - sys.exit(0) - -if __name__ == '__main__': - main()