From 714b94818f91c9ae34e9fc3063649946c4523b63 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 17 Sep 2013 14:45:41 -0400 Subject: [PATCH 1/9] refs #62: Added a simple entry point for docx2html --- pydocx/parsers/Docx2Html.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index 71a2ebc6..aa65eb20 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -1,4 +1,5 @@ import base64 +import sys import xml.sax.saxutils from pydocx.DocxParser import DocxParser @@ -203,3 +204,18 @@ def indent(self, text, just='', firstLine='', left='', right=''): def break_tag(self): return '
' + + +def main(): + try: + path_to_docx = sys.argv[1] + path_to_html = sys.argv[2] + except IndexError: + print 'Must specific the file to convert and the name of the resulting file.' # noqa + sys.exit() + html = Docx2Html(path_to_docx).parsed + with open(path_to_html, 'w') as f: + f.write(html) + +if __name__ == '__main__': + main() From 1eb7a81c4b99ec0d0a8945eb18a4b4752abc87ed Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 17 Sep 2013 14:46:36 -0400 Subject: [PATCH 2/9] refs #62: Added a simple entry point for the markdown converter. --- pydocx/parsers/Docx2Markdown.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pydocx/parsers/Docx2Markdown.py b/pydocx/parsers/Docx2Markdown.py index d023df7a..49c86a59 100644 --- a/pydocx/parsers/Docx2Markdown.py +++ b/pydocx/parsers/Docx2Markdown.py @@ -1,3 +1,5 @@ +import sys + from pydocx.DocxParser import DocxParser @@ -24,3 +26,18 @@ def italics(self, text): def underline(self, text): return '***' + text + '***' + + +def main(): + try: + path_to_docx = sys.argv[1] + path_to_html = sys.argv[2] + except IndexError: + print 'Must specific the file to convert and the name of the resulting file.' # noqa + sys.exit() + html = Docx2Markdown(path_to_docx).parsed + with open(path_to_html, 'w') as f: + f.write(html) + +if __name__ == '__main__': + main() From 186f8f77b30336f1cd059e77b05779618f652378 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 17 Sep 2013 14:50:02 -0400 Subject: [PATCH 3/9] refs 362: Added a readme note --- README.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.rst b/README.rst index 531a3ec2..d5494698 100644 --- a/README.rst +++ b/README.rst @@ -231,3 +231,8 @@ Optional Arguments ################## You can pass in `convert_root_level_upper_roman=True` to the parser and it will convert all root level upper roman lists to headings instead. + +Command Line Execution +###################### + +It is possible to run the conversion from command line without using any python. This can be done by calling `pydocx.docx2html path/to/file.docx path/to/output.html`. Simply change `pydocx.docx2html` to `pydocx.docx2markdown` in order to convert to markdown instead. From 2cb9eb84a469c914eaeea8c1c310f9080c873d83 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 17 Sep 2013 14:50:14 -0400 Subject: [PATCH 4/9] refs #62: Added the command line entry points. --- setup.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a613f6b5..257c595d 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,6 @@ from ez_setup import use_setuptools use_setuptools() from setuptools import setup, find_packages # noqa - rel_file = lambda *args: os.path.join( os.path.dirname(os.path.abspath(__file__)), *args) @@ -55,4 +54,10 @@ def get_description(): "Topic :: Text Processing :: Markup :: XML", ], long_description=get_description(), + entry_points={ + 'console_scripts': [ + 'pydocx.docx2html = pydocx.parsers.Docx2Html:main', + 'pydocx.docx2markdown = pydocx.parsers.Docx2Markdown:main', + ], + }, ) From 04bb89ad6931dc7036759b7290e2d2529753f155 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 17 Sep 2013 14:50:48 -0400 Subject: [PATCH 5/9] refs #62: Added an update note. --- CHANGELOG | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index fbe1aac2..829d1041 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,9 @@ Changelog ========= +* 0.3.12 + * Added command line support to convert from docx to either html or + markdown. * 0.3.11 * The non breaking hyphen tag was not correctly being imported. This issue has been fixed. From 375c03a379eaf817a9bbf41ee419b7cb7857611a Mon Sep 17 00:00:00 2001 From: Damjan Cvetko Date: Wed, 18 Sep 2013 13:30:41 +0200 Subject: [PATCH 6/9] Added output encoding to utf8. --- pydocx/parsers/Docx2Html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index aa65eb20..04933498 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -215,7 +215,7 @@ def main(): sys.exit() html = Docx2Html(path_to_docx).parsed with open(path_to_html, 'w') as f: - f.write(html) + f.write(html.encode('utf-8')) if __name__ == '__main__': main() From 877a3b48f944041779bff7ea522bf37089c6e6a1 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 18 Sep 2013 11:26:05 -0400 Subject: [PATCH 7/9] refs #62: Small refactor, only one entry point now. --- pydocx/__init__.py | 23 +++++++++++++++++++++++ pydocx/parsers/Docx2Html.py | 16 ---------------- pydocx/parsers/Docx2Markdown.py | 17 ----------------- setup.py | 3 +-- 4 files changed, 24 insertions(+), 35 deletions(-) diff --git a/pydocx/__init__.py b/pydocx/__init__.py index 075fe444..e17b9f8e 100644 --- a/pydocx/__init__.py +++ b/pydocx/__init__.py @@ -1,3 +1,4 @@ +import sys from .parsers import Docx2Html, Docx2Markdown @@ -9,3 +10,25 @@ def docx2markdown(path): return Docx2Markdown(path).parsed VERSION = '0.3.11' + + +def main(): + try: + parser_to_use = sys.argv[1] + path_to_docx = sys.argv[2] + path_to_html = sys.argv[3] + except IndexError: + print 'Must specify which parser as well as the file to convert and the name of the resulting file.' # noqa + sys.exit() + if parser_to_use == '--html': + html = Docx2Html(path_to_docx).parsed + elif parser_to_use == '--markdown': + html = Docx2Markdown(path_to_docx).parsed + else: + print 'Only valid parsers are --html and --markdown' + sys.exit() + with open(path_to_html, 'w') as f: + f.write(html) + +if __name__ == '__main__': + main() diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index aa65eb20..71a2ebc6 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -1,5 +1,4 @@ import base64 -import sys import xml.sax.saxutils from pydocx.DocxParser import DocxParser @@ -204,18 +203,3 @@ def indent(self, text, just='', firstLine='', left='', right=''): def break_tag(self): return '
' - - -def main(): - try: - path_to_docx = sys.argv[1] - path_to_html = sys.argv[2] - except IndexError: - print 'Must specific the file to convert and the name of the resulting file.' # noqa - sys.exit() - html = Docx2Html(path_to_docx).parsed - with open(path_to_html, 'w') as f: - f.write(html) - -if __name__ == '__main__': - main() diff --git a/pydocx/parsers/Docx2Markdown.py b/pydocx/parsers/Docx2Markdown.py index 49c86a59..d023df7a 100644 --- a/pydocx/parsers/Docx2Markdown.py +++ b/pydocx/parsers/Docx2Markdown.py @@ -1,5 +1,3 @@ -import sys - from pydocx.DocxParser import DocxParser @@ -26,18 +24,3 @@ def italics(self, text): def underline(self, text): return '***' + text + '***' - - -def main(): - try: - path_to_docx = sys.argv[1] - path_to_html = sys.argv[2] - except IndexError: - print 'Must specific the file to convert and the name of the resulting file.' # noqa - sys.exit() - html = Docx2Markdown(path_to_docx).parsed - with open(path_to_html, 'w') as f: - f.write(html) - -if __name__ == '__main__': - main() diff --git a/setup.py b/setup.py index 257c595d..3ce0276f 100644 --- a/setup.py +++ b/setup.py @@ -56,8 +56,7 @@ def get_description(): long_description=get_description(), entry_points={ 'console_scripts': [ - 'pydocx.docx2html = pydocx.parsers.Docx2Html:main', - 'pydocx.docx2markdown = pydocx.parsers.Docx2Markdown:main', + 'pydocx = pydocx.__init__:main', ], }, ) From c672b9f00b86865b16dca7578bd475807991827b Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 18 Sep 2013 11:28:37 -0400 Subject: [PATCH 8/9] refs #62: Update note --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index d5494698..7640b8d1 100644 --- a/README.rst +++ b/README.rst @@ -235,4 +235,4 @@ You can pass in `convert_root_level_upper_roman=True` to the parser and it will Command Line Execution ###################### -It is possible to run the conversion from command line without using any python. This can be done by calling `pydocx.docx2html path/to/file.docx path/to/output.html`. Simply change `pydocx.docx2html` to `pydocx.docx2markdown` in order to convert to markdown instead. +It is possible to run the conversion from command line without using any python. This can be done by calling `pydocx --html path/to/file.docx path/to/output.html`. Simply change `pydocx --html` to `pydocx --markdown` in order to convert to markdown instead. From d20a7dd785dafa0f53ff69a50a4c2ab67ebe8307 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 18 Sep 2013 11:31:34 -0400 Subject: [PATCH 9/9] refs #62: Better update note --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 7640b8d1..fe21f717 100644 --- a/README.rst +++ b/README.rst @@ -235,4 +235,4 @@ You can pass in `convert_root_level_upper_roman=True` to the parser and it will Command Line Execution ###################### -It is possible to run the conversion from command line without using any python. This can be done by calling `pydocx --html path/to/file.docx path/to/output.html`. Simply change `pydocx --html` to `pydocx --markdown` in order to convert to markdown instead. +First you have to install pydocx, this can be done by running the command `pip install pydocx`. From there you can simply call the command `pydocx --html path/to/file.docx path/to/output.html`. Change `pydocx --html` to `pydocx --markdown` in order to convert to markdown instead.