Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ RUN apk update
RUN apk --no-cache add tar wget openjdk8 gcc pkgconfig zeromq zeromq-dev musl-dev

# install python package
RUN pip install jupyter
RUN pip install jupyter click html2text

# download redpen
# RUN wget https://github.com/redpen-cc/redpen/archive/redpen-1.10.2.tar.gz
Expand Down
2 changes: 1 addition & 1 deletion bin/build-docker
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/bash
docker build --no-cache -t chie8842/proofreading .
docker build --no-cache -t tfug/proofreading .

2 changes: 1 addition & 1 deletion bin/run-check
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ docker run \
-it \
--rm \
-v $(PWD)/..:/usr/local/documents \
chie8842/tensorflow_docs_proofreading \
tfug/proofreading \
/bin/ash proofreading/proofreading.sh $@
21 changes: 21 additions & 0 deletions proofreading.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,22 @@ function create_markdowns() {
done
}

# convert one html to markdown and save to output directory
function create_markdown_from_html() {
dir=`dirname ${file}`
output_dir=${dir//site/proofreading\/output}
echo $output_dir
mkdir -p ${output_dir}
python proofreading/src/html_converter.py --input_file ${file} --output_dir ${output_dir}
}

# find html files, convert to markdown and save to output directory
function create_markdowns_from_html() {
files=`find site/ja -maxdepth 5 -type f |grep .html`
for file in ${files}; do
create_markdown_from_html
done
}
function copy_markdown() {
files=`find site/${lang} -maxdepth 5 -type f |grep .md`
for file in ${files}; do
Expand Down Expand Up @@ -44,6 +60,10 @@ if [ $# -eq 1 ]; then
create_markdown
base_filename=${file##*/}
redpen --conf proofreading/redpen-conf.xml ${output_dir}/${base_filename%.*}.md
elif [ ${file##*.} = "html" ]; then
create_markdown_from_html
base_filename=${file##*/}
redpen --conf proofreading/redpen-conf.xml ${output_dir}/${base_filename%.*}.md
elif [ ${file##*.} = "md" ]; then
redpen --conf proofreading/redpen-conf.xml ${file}
else
Expand All @@ -54,6 +74,7 @@ if [ $# -eq 1 ]; then
else
echo "check all files"
create_markdowns
create_markdowns_from_html
copy_markdown
exec_redpen
fi
32 changes: 32 additions & 0 deletions src/html_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import os
import sys
import click
import html2text

@click.command()
@click.option('--input_file', '-i', default=None)
@click.option('--output_dir', '-o', default=None)
def main(input_file, output_dir):
if input_file is None or output_dir is None:
print('invalid arguments')
sys.exit(1)
else:
try:
with open(input_file, 'r') as f:
html = f.read()
except:
print(f'{input_file} does not exist')
sys.exit(1)
text = html2text.html2text(html)
output_file = '{}.md'.format(os.path.basename(input_file).split('.')[0])
output = os.path.join(output_dir, output_file)
try:
with open(output, 'w') as f:
f.write(text)
except:
print('output path does not exist')
print(f'converted {input_file} to {output_dir}')
sys.exit(0)

if __name__ == '__main__':
main()