From 7d6dd2ea3d42029d1c12a7563be5e007971ef6e1 Mon Sep 17 00:00:00 2001 From: badbye Date: Thu, 2 Mar 2017 17:09:58 +0800 Subject: [PATCH 01/11] extension for links --- .gitignore | 2 + docx2txt/__init__.py | 3 +- docx2txt/docx2txt.py | 145 +++++++++++++++++++++++++------------------ setup.py | 7 ++- 4 files changed, 94 insertions(+), 63 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c10666e --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.idea +*.pyc diff --git a/docx2txt/__init__.py b/docx2txt/__init__.py index 778804c..5266222 100644 --- a/docx2txt/__init__.py +++ b/docx2txt/__init__.py @@ -1,4 +1,5 @@ from .docx2txt import process from .docx2txt import process_args +from .docx2txt import DOCReader -VERSION = '0.6' +VERSION = '0.7' diff --git a/docx2txt/docx2txt.py b/docx2txt/docx2txt.py index d48f9e5..3aea6a4 100755 --- a/docx2txt/docx2txt.py +++ b/docx2txt/docx2txt.py @@ -1,14 +1,11 @@ #! /usr/bin/env python import argparse -import re import xml.etree.ElementTree as ET import zipfile import os import sys - - -nsmap = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} +import re def process_args(): @@ -42,69 +39,99 @@ def qn(tag): example, ``qn('p:cSld')`` returns ``'{http://schemas.../main}cSld'``. Source: https://github.com/python-openxml/python-docx/ """ + nsmap = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} prefix, tagroot = tag.split(':') uri = nsmap[prefix] return '{{{}}}{}'.format(uri, tagroot) -def xml2text(xml): - """ - A string representing the textual content of this run, with content - child elements like ```` translated to their Python - equivalent. - Adapted from: https://github.com/python-openxml/python-docx/ - """ - text = u'' - root = ET.fromstring(xml) - for child in root.iter(): - if child.tag == qn('w:t'): - t_text = child.text - text += t_text if t_text is not None else '' - elif child.tag == qn('w:tab'): - text += '\t' - elif child.tag in (qn('w:br'), qn('w:cr')): - text += '\n' - elif child.tag == qn("w:p"): - text += '\n\n' - return text +class DOCReader(object): + def __init__(self, docx, img_dir=None): + if not os.path.exists(docx): + raise Exception('Can not file document: %s' % docx) + self.file = docx + self.img_dir = img_dir + self.data = {'links': []} # save header, footer, document, links + self.links = {} + + # read file + self.zipf = zipfile.ZipFile(self.file) + self.filelist = self.zipf.namelist() + + # parse hyperlinks + hyperlink_document = 'word/_rels/document.xml.rels' + if hyperlink_document in self.filelist: + self.process_hyperlink(self.zipf.read(hyperlink_document)) + + def process_hyperlink(self, doc): + """ + external hyperlink from a string of xml document(typically the `word/_rels/document.xml.rels` file) + """ + root = ET.fromstring(doc) + nodes = [node.attrib for node in root] + nodes = filter(lambda x: x.get('TargetMode', '') == 'External', nodes) + self.links = {node['Id']: node['Target'] for node in nodes} + + def xml2text(self, xml): + """ + A string representing the textual content of this run, with content + child elements like ```` translated to their Python + equivalent. + Adapted from: https://github.com/python-openxml/python-docx/ + """ + text = u'' + root = ET.fromstring(xml) + for child in root.iter(): + attr = child.attrib + for k, v in attr.iteritems(): + if k.endswith('id') and v in self.links: + self.data['links'].append({ET.tostring(child, method='text'): self.links[v]}) + if child.tag == qn('w:t'): + t_text = child.text + text += t_text if t_text is not None else '' + elif child.tag == qn('w:tab'): + text += '\t' + elif child.tag in (qn('w:br'), qn('w:cr')): + text += '\n' + elif child.tag == qn("w:p"): + text += '\n\n' + return text + + def process(self): + text = u'' + # get header text + # there can be 3 header files in the zip + header_xmls = re.compile('word/header[0-9]*.xml') + self.data['header'] = [self.xml2text(self.zipf.read(fname)) for fname in self.filelist if header_xmls.match(fname)] + text += '\n'.join(self.data['header']) + + # get main text + doc_xml = 'word/document.xml' + self.data['document'] = self.xml2text(self.zipf.read(doc_xml)) + text += self.data['document'] + + # get footer text + # there can be 3 footer files in the zip + footer_xmls = re.compile('word/footer[0-9]*.xml') + self.data['footer'] = [self.xml2text(self.zipf.read(fname)) for fname in self.filelist if footer_xmls.match(fname)] + text += '\n'.join(self.data['footer']) + + if self.img_dir is not None: + # extract images + for fname in self.filelist: + _, extension = os.path.splitext(fname) + if extension in [".jpg", ".jpeg", ".png", ".bmp"]: + dst_fname = os.path.join(self.img_dir, os.path.basename(fname)) + with open(dst_fname, "w") as dst_f: + dst_f.write(self.zipf.read(fname)) + self.zipf.close() + return text.strip() def process(docx, img_dir=None): - text = u'' - - # unzip the docx in memory - zipf = zipfile.ZipFile(docx) - filelist = zipf.namelist() - - # get header text - # there can be 3 header files in the zip - header_xmls = 'word/header[0-9]*.xml' - for fname in filelist: - if re.match(header_xmls, fname): - text += xml2text(zipf.read(fname)) - - # get main text - doc_xml = 'word/document.xml' - text += xml2text(zipf.read(doc_xml)) - - # get footer text - # there can be 3 footer files in the zip - footer_xmls = 'word/footer[0-9]*.xml' - for fname in filelist: - if re.match(footer_xmls, fname): - text += xml2text(zipf.read(fname)) - - if img_dir is not None: - # extract images - for fname in filelist: - _, extension = os.path.splitext(fname) - if extension in [".jpg", ".jpeg", ".png", ".bmp"]: - dst_fname = os.path.join(img_dir, os.path.basename(fname)) - with open(dst_fname, "w") as dst_f: - dst_f.write(zipf.read(fname)) - - zipf.close() - return text.strip() + obj = DOCReader(docx, img_dir=img_dir) + res = obj.process() + return res if __name__ == '__main__': diff --git a/setup.py b/setup.py index f0c5c10..7a33d3e 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ import glob from distutils.core import setup +from docx2txt import VERSION # get all of the scripts scripts = glob.glob('bin/*') @@ -7,14 +8,14 @@ setup( name='docx2txt', packages=['docx2txt'], - version='0.6', - description='A pure python-based utility to extract text and images ' + version=VERSION, + description='A pure python-based utility to extract text, links and images' 'from docx files.', author='Ankush Shah', author_email='ankush.shah.nitk@gmail.com', url='https://github.com/ankushshah89/python-docx2txt', download_url='https://github.com/ankushshah89/python-docx2txt/tarball/0.6', - keywords=['python', 'docx', 'text', 'images', 'extract'], + keywords=['python', 'docx', 'text', 'links', 'images', 'extract'], scripts=scripts, classifiers=[], ) From 7ddc7423fc65b15f6167680074fa354510181eae Mon Sep 17 00:00:00 2001 From: badbye Date: Thu, 2 Mar 2017 17:30:00 +0800 Subject: [PATCH 02/11] dict->tuple --- docx2txt/docx2txt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docx2txt/docx2txt.py b/docx2txt/docx2txt.py index 3aea6a4..b1be275 100755 --- a/docx2txt/docx2txt.py +++ b/docx2txt/docx2txt.py @@ -85,7 +85,7 @@ def xml2text(self, xml): attr = child.attrib for k, v in attr.iteritems(): if k.endswith('id') and v in self.links: - self.data['links'].append({ET.tostring(child, method='text'): self.links[v]}) + self.data['links'].append((ET.tostring(child, method='text'), self.links[v])) if child.tag == qn('w:t'): t_text = child.text text += t_text if t_text is not None else '' From 793a4c43632382a0c1ec1ad8427ba08a41760e03 Mon Sep 17 00:00:00 2001 From: badbye Date: Thu, 2 Mar 2017 22:47:46 +0800 Subject: [PATCH 03/11] fix encoding peoblem --- .gitignore | 1 + docx2txt/docx2txt.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c10666e..e9af22b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .idea *.pyc +build diff --git a/docx2txt/docx2txt.py b/docx2txt/docx2txt.py index b1be275..63dd380 100755 --- a/docx2txt/docx2txt.py +++ b/docx2txt/docx2txt.py @@ -85,7 +85,7 @@ def xml2text(self, xml): attr = child.attrib for k, v in attr.iteritems(): if k.endswith('id') and v in self.links: - self.data['links'].append((ET.tostring(child, method='text'), self.links[v])) + self.data['links'].append((ET.tostring(child, encoding='utf-8', method='text'), self.links[v])) if child.tag == qn('w:t'): t_text = child.text text += t_text if t_text is not None else '' From 9facf376aac100134bc57e6775289a20d400c241 Mon Sep 17 00:00:00 2001 From: badbye Date: Sun, 5 Mar 2017 22:13:07 +0800 Subject: [PATCH 04/11] pydocx --- README.md | 25 +++++++++++++----- bin/docx2txt | 6 ++--- docx2txt/__init__.py | 5 ---- pydocx/__init__.py | 5 ++++ docx2txt/docx2txt.py => pydocx/docxreader.py | 0 setup.py | 13 +++++----- tests/Hello.docx | Bin 0 -> 28305 bytes tests/__init__.py | 0 tests/test-hello.py | 26 +++++++++++++++++++ 9 files changed, 58 insertions(+), 22 deletions(-) delete mode 100644 docx2txt/__init__.py create mode 100644 pydocx/__init__.py rename docx2txt/docx2txt.py => pydocx/docxreader.py (100%) create mode 100644 tests/Hello.docx create mode 100644 tests/__init__.py create mode 100644 tests/test-hello.py diff --git a/README.md b/README.md index d1b360b..bb6e5c5 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,13 @@ -# python-docx2txt # +# pydocx # -A pure python-based utility to extract text from docx files. +This project is forked from [ankushshah89/python-docx2txt](https://github.com/ankushshah89/python-docx2txt/pull/10/files). +A new feature is added: extract the hyperlinks and its corresponding texts. -The code is taken and adapted from [python-docx](https://github.com/python-openxml/python-docx). It can however also extract text from header, footer and hyperlinks. __It can now also extract images.__ +It is a pure python-based utility to extract text from docx files. The code is taken and adapted from [python-docx](https://github.com/python-openxml/python-docx). It can however also extract **text** from header, footer and **hyperlinks**. It can now also extract **images**. ## How to install? ## ```bash -pip install docx2txt +pip install pydocx ``` ## How to run? ## @@ -18,13 +19,23 @@ docx2txt file.docx # extract text and images docx2txt -i /tmp/img_dir file.docx ``` + + b. From python: ```python -import docx2txt +import pydocx + +c = 'file.docx' # extract text -text = docx2txt.process("file.docx") +text = pydocx.process(file) # extract text and write images in /tmp/img_dir -text = docx2txt.process("file.docx", "/tmp/img_dir") +text = pydocx.process(file, "/tmp/img_dir") + + +# if you want the hyperlinks +doc = pydocx.DOCReader(file) +doc.process() # process file +hyperlinks = doc.data['links'] ``` diff --git a/bin/docx2txt b/bin/docx2txt index 62157c2..af03687 100755 --- a/bin/docx2txt +++ b/bin/docx2txt @@ -1,9 +1,9 @@ #! /usr/bin/env python -import docx2txt +import pydocx if __name__ == '__main__': import sys - args = docx2txt.process_args() - text = docx2txt.process(args.docx, args.img_dir) + args = pydocx.process_args() + text = pydocx.process(args.docx, args.img_dir) sys.stdout.write(text.encode('utf-8')) diff --git a/docx2txt/__init__.py b/docx2txt/__init__.py deleted file mode 100644 index 5266222..0000000 --- a/docx2txt/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .docx2txt import process -from .docx2txt import process_args -from .docx2txt import DOCReader - -VERSION = '0.7' diff --git a/pydocx/__init__.py b/pydocx/__init__.py new file mode 100644 index 0000000..0d11c2d --- /dev/null +++ b/pydocx/__init__.py @@ -0,0 +1,5 @@ +from .docxreader import process +from .docxreader import process_args +from .docxreader import DOCReader + +VERSION = '0.1' diff --git a/docx2txt/docx2txt.py b/pydocx/docxreader.py similarity index 100% rename from docx2txt/docx2txt.py rename to pydocx/docxreader.py diff --git a/setup.py b/setup.py index 7a33d3e..c831cc6 100644 --- a/setup.py +++ b/setup.py @@ -6,15 +6,14 @@ scripts = glob.glob('bin/*') setup( - name='docx2txt', - packages=['docx2txt'], + name='pydocx', + packages=['pydocx'], version=VERSION, - description='A pure python-based utility to extract text, links and images' + description='A pure python-based utility to extract text, hyperlinks and images' 'from docx files.', - author='Ankush Shah', - author_email='ankush.shah.nitk@gmail.com', - url='https://github.com/ankushshah89/python-docx2txt', - download_url='https://github.com/ankushshah89/python-docx2txt/tarball/0.6', + author='Ankush Shah, Yalei Du', + author_email='yaleidu@163.com', + url='https://github.com/badbye/python-docx2txt', keywords=['python', 'docx', 'text', 'links', 'images', 'extract'], scripts=scripts, classifiers=[], diff --git a/tests/Hello.docx b/tests/Hello.docx new file mode 100644 index 0000000000000000000000000000000000000000..3b2f14a6d7d578d4fe2d33a8a1304f5a6dc9902f GIT binary patch literal 28305 zcmeFZgLh}!mNp#Qwr#Uw+qP{x72B@Zww;P?r{bhyn{UoJeYJ@@7w?WW%!TI|K<3lBR3;mR-TwEH=f@!jAX3lPoPLU$1}iv6IlCueO-#;i^EO7F0D7>89M5ZP-AT0t3r`*fx2WPKj8jFsdIE{IbNhx+rw`#D?L_{Qc>)it zz};vQQZa;|cLqVNpy?tAzz)oCh0BeIh96U0ZJ69V%Ii=sho+}nJ#b+Q<8cB%_apmR z<_B+1>nHI3;|1UEvh-j=2)3mx1Sf0+70vfpi?+!Vco(#Sh9b^unD=pq7lXC>%t>XN z@TuKqB3=`rTaPDTUz-N{1h>r+gm}60WBjpAyc(Mu9``~(J&|8H18Cqqy2@pWhCE7YOCut3kz#M+6T?vLaD zVZi_5!2hSNS0>0v0W%_mTn2m+PIPgvQxVPm(iJ|EF@6UWVtN%|$0t#;`RoEj)hV@t z=P|vRN;Laf$qvq4&K_fNRUKfW5MWr+g{iI9XLZ}y3Z5L4nK#Q_y*&cSc7MP45-gUm zk@P}Hl1xdbG7}jDK2Ye@oCJlxa`0nGOe!I^uAkpi40o@b=91yVl%L0-ysC`Ab`rMq z5aS7_*=}9nwqV(7TvqnmcW(sOkV&F}4SbW4`O*->Va4y)FPy2!iVz~Az}al=U4`+R z>(lPR|2A9* zXgKg9zf$MNS0=>)fBScyMRIK0FZPs|XbMVoT0KIa}zIrOaKQZ9Og zQ$!6ABe?_K1&kF@yrLnp`%zNCd7WhX&7f!-As!7ygO;8ijb5H9I#gMAVN3|Unkd0a zXG^Jk7l*3YTLE}DS$QPFg2qJD3hh!I%oz!UnMww}{_XI%Dlu%t1u0};y^-~8Si8^y zu|F{E5!L=(3_b%YPavN-5%Va_`F=o*Sa2FZS{rD;7YRAHqD0mI_=#R0yww%D_ddi% ziUBhwA$fO*R74f3D9!qnhdj+Um~5DodSQYzX{ceS!8B^@GGI<99o|&L=nqFP0dC!gp$;#`s|Ml3a#>n$iPbBNN zi=j)qPu;+4zm^@qQi zm9=p}vjebGKFfV9CYX65yC0su9N4Wf%3-FZhA8A0U{a5sclYCqTvO`pm5eHI{PLJJ z9vQ7gir*f-rKhxt-gJ_c=_eeVd^2epzR4_H(PEDG$isg!9RiiAOWM9fn?+5q1y*a= zo3Tu4o}YOHuO(~j=P(}ND6UOsRv-qIQt%H}r+LQIcPt#qR2H^jQSAxTAiYqWY3<`U zbvOTk0Wna~ZkfF5)t+L`+=#Y|k+8I1Ezy?u8&Vt4K-5IyiOnuYByh4G)PsFHOMm%W zm@M^67QYonv;XkSbftoh#^EI~*asr&EJD^GBg%tMv$X3EyZ5$*GZOaJCk*~4EPwP8 z)ZYrVq+9v%^D|`5vj)c%`@rfl@oqwiCGqLWVf~Pm^X`iBWHX&WqVi1O^%qCYu449s zlN!yl#NZAnP9!rVQX5wvwlnx*?A7s9Z~2XP_0Qi~LQYw{B(+;(LuP~d&V=1K#Kt9$ zvr5r_>BdB(^P-xjmB|DPxCECEyobbh2LUf*>v&|vc$CgEE1)hJny>|{a2l_9VZaUb zM_!Q>zjDtTjrfr(zH2Wz(XhO`NT)Wb5HFTnCcoL*C7h3TPUXVBZOz^Tf09LjwR{%9 z?fBx3!4BsWIq!s}`71GwM)e~BnHd7xDF&KbEhX9FH-nnnW0O+Q@^f5?$^JH~nYmhl zJu_YDIg9S-E#6g;#&`1lXfNS{dtj*z_SKj|L4vAv-tKmlQGhM^{YZ>mu%K`2a#>bg zwP?*$+CnP3=u971EJ)TC=SY)n9|P(gX}$zyPC}<^(5llB1YcRw(#Wi}hZ~s3I)bSr z)(bA<*IncrhM(5|!d8$8(NrZ-C!l;-;a_}WU@jVS;RCxq%%eZsW_?-Q5Azo z?IR%r%^YesC??=c&Ck3;sX)naalf8jd#+#nFm~UZlUmU8x8XJ_NaWJ{l?^IE0RRyG z5pJe-cFrb_%zq?}!XyQ|9}Eb?Tjb~51U=;Z8I`n9W#z_MH*8hE!0n?URc&%iu+6*Q z;edi{6os+zYJIw2uOGA8?)y6u1O}^+bCr;$iIMCl36Njn61M2&8n}@-(NI8k%n%hBZ%i2k}lrU$%Lv)Q8Q5u0$FKd0M>YQ;54-n`OOmo`g zmFl0&57qv5byV9Fp1O?cV`dTEkI5n8>k6fKQ~1Wj7(=F9C;)?~MDq4NsXO{~vzQ|_ zNzrzFV&p&A1K`wJ&A~^LbSRCyJp1rsqtB|d4_sM2vRBY{I`9pBVH>|xy`Im!_D&_A zh(^O-13tq_hHd2+3|(q?Udly7(#B?Gw_QtYOJS4e$Ay;FV=ikST*kX3)b8)ThPz&MH2s*mA^J(lXaRZN(E*FY|A_0D?)4beyxO&YM31gE9ruc9Bv>#^Z z!gl+0OuUa!1k4J)w!yqOTSVRAih)aIGx=l;eFuV#=Y%D2A&A)Z=jJS(v1rTI5?2)H zZ5mQumaa@IM%9K2%`2iQ9ZWfORfrV{ypBf{r?Y1UN@fZy*VGU49NmXBlr-J-$Q&@W zsiVV6Q^_%vd_oFvyM=+*DFV6iJNv?{^hgj}8_eob(dXM0eo%-X1Lo zdFyNfVjRrAYGkjjN&Lc<@4vCr%n(1vIW#j{MyAlvQHF?UOjPKAilGRo z_Mo&dXP)xBVSjFCwPXw!PWz0abU@eTXS3rpb-~eAhV)=roh$P-zC8Ir;J>2it!{d( zpR*QYqM&c{7~`Ez`DwGH9=AW(^fQMJ8N%j-pr;u4?WIA9{ah zFQ+PXhzVx141qbfxN0&6Y?mBlRil;#H|ENWf+Y0wjeot~GxmPHa_7rLjGPtj7TSo{ zby~;m=VMq|z1m<8Oj+DCTS2ZP=0x8itFckznKBJ3(yV;K#e4hm(tlkXLfgnp{2Rn# z{9D9wg@h_A7RkC%Z94rQh~@Inh&B2CH;9$W2j74SnOVYBfW#s<8qe+6^ZsF@MBdf{ zC~B3u5K>flwMk-dq~3?>;zXJT3f`8(EKj)KrvI(7&vb_3&`WrRx6=v0T{o%af8+$#IR?j?4{_P~wy&AExX_A4wXqUm&0QE0wih zRdvnVg!^)J2q!r0^on*&XNBnS+?`j}2F6b#x*HmYc3?7z__#qSJrsS@F~pFm<_JQ< zD3LziO>2z3pHFIvOp`XA9-RCK#r!$7)iSrxB%O-GPtCpF*=jTBZXi|F_OE9*Tnw|J zEwA8}X;$%B)L*C+5L0M+E5Rq5$h6LE!BWQu7iHJ)Fc#U>P3iI_;@0eovA*|7aNRe-t9{;+ng_qT|(o^XBo zFSBiKVqk3I_!nX&P1+WG<)_Q!2i&+zNQ5F8qu_mn1zXz+n^%Bp#=i0%V{G$HUvQwH zU&%yk_Pr18hr?UUrfeN@lmQVM!%dR%-(ux^Q5G@PhC#Zzcqj1`w!bN0B5KxwWTgpp zoX2*^L`P15Ayna1!^+6-boCh=nfH-6IE|Jq%KrA1ZGv&hb{hMsxlW@k?65EuwV1>S zp0kFmf*8t`xiD{@Fyq@EmM^C6#LDoYRtT$9n{GuN9$LgB81zmt36mD${xJpmSQ2v2 zpkEG>thy7!i0#b6hyk)K8L_FMcgq8P9>V)m7SRJvf|Zt3sXG~6lZ?MswR-z7q2+Q; zzu|~b4gMRKwaxB!(vg}%#%%H;0uPj4Ei%r1@+C7`2`yzJt*T9xMT#pSP`uUdM<<{ z8vjz@p=JTCGdu7&DceATP)hEB?|ob~8l`1bw$IB@`w3pH;Zg4DC5QmNCjDi>y}qP| ztmKXuH|=|>O(?n_5{x!A5z<{^e3S-?ExW<**{E}D&+`ZPznyJ(u_4lbGu!_W+rJbm z{+R8oYuvPr7XGT?3`!T#itJ5JF&}>n!O^rD380l0w>*AMu6coRPzkmUzK>Pdj?U*e zB8qeu?Fx{=c?eKxg)GM=nVHk^e&g>zzr!WNXCg?=hjH39vT|stGfUX~%@6EN%mW-R zVc~?toXEm|NXk~Za@{JGo0K+0DWR#Z74OhQ6quS;7Ef?KE#NAUI+PAd@WhZoo)tpRcW20|eS}x__DUp@dLbXIm)D*<@5dfqr zI#0c%UFw>1J0rSAjrfdc&I>XfV0+}50D)K?INHK_ywbw5-NHHE^#=BrZvBYe~yI<+dTdwB)VwnGm__|K)DiLJ5i7yESj1Ij0o zwrqbepbTA-pMM26vb1DEfFKo8Qkk1l2ai8ASshff$&Z=N_ZY&s%INw=iz0_Tzu#|9 zQ_sB(9jW#9&B_*J?m?Y|qHqUgBN%#OM~^j&+i09?2-)$1_E>qii$}H}@9ULtl+&#Y z%H00jT$LtRdC`na&N^AP<`Q?c#$`yF`=)6TbEqCuiYH5gCKnK1t?3e^ish`Gb`no1 z=8eP2yG)uAbGBSRpU{?~9-~?!f(miqobn_M^~;vx^0KKp0g6dNcC)fm& zcXzTGk6czV5kP|0>yH8k+g4PMe2)&A!EH9_u3WZDJnV)g3@%UY6JWzRnTiM+>SiEpC#$prgz|+iM`A!BC$$?S;jpH1( z8}V7O@M$~9;$~z80mzUl3_t|yN17mFxyxh*vG6sTJ=StUs%~;zAupU^y%?TdIDf7v zx7s#@LKxtHCmv7cZ%LZVmjA@bV!6v1gVRhvNfM$Ng$>pMUhfQ8)s1=cy_R296gL5J z{LS1VXNRc+cph&FESrQ-gC>FGCkc6>_2$lapC`h0%6w&exj;Fu*ay3(;-tdc&4!S3 zW!Db1`(u#e3I3g1W1O!OXLyuJJYiJLqb-W@q3#n@7w^#X<8RqQUYiyP_m_{=e)$;E z7v}saC;YL#`EO44r6>I9WJFs+9|#1|5U8XK_qLWT;P%Mim1NB(+=X9G1_@~)5QRoY z%N}#?azQ)jXE^$)pj8e6r#<2jB#y73Oe6D*+4}GaYoL0hL-Dp`O$%aB*)&_e|zV zl|i)T1esuw@lN)vl}p+mUiO2?F5tOsMf1qloN1qw=rk#f+(aO4bF_hURDi+EQf6>x zqgyg?(gm1P%f4hqvh~BIco<2I0mnaeMDDbZs3tjwynPBZ21EFVLwL~RmUWUdz21FO z!7)xX_Zv>{D+EHt>BKXk2PGVfC+>NCS2NAL6mQ?!j}2NG9r8nuXwbLsB1pW|7^zRB z;3E3zpe45HR?)>-cxEbjn^0l$!usK?sSqjVSb5nU&?s@gLrpIc(Xv+3=|zdn2~1TI z*=|Ro!ZxS{UNx(cc1K!C z^`iaYRt!2z3GSuu=dJ0r-_iD{=Ev^g6(Jti?aHT^ZTsHN1TKxOi^gW#YI^DMzfrD; z$^Mm?{zX;(V=4WQm zdiL>T#(sA5qVomJOAb{?D00Lo3?eRLi);d)dKYBSC>(Oil0m980^El)_b1b<^*=o< zfx}ec)I!K89yhlcoLS5gTFdqqEK8XAiZnyG#yLz3ifpi^@;NL`hb+f31Lb}ps33)E zW-jb~Q59RR$b8VswiC=PREvHTYtuE!T?3021cNOoX24>?>>kIy_i9s}yEG<&V3k%A zX-`}i7%;%rqM-T4Vxs>QpDEk##$Z+H|+L0&qN> zW~S@6#{3%EGJOt!;Ssq<0mZXq{4c5!7O@b*rZ^W|P03or@TIiD~LuTTu@ z^AS#VE(UNczF0N|o1Ut_M0|TKBfovi;Fg(il7)Hup0GQqY9XZyN)eC%8h4-Dy<`sh zYqHld2DZ*Pk<8q;;|W{mtwT6^vB6DGi+&xv5`)Y9C5hljNbUCasGPk@LlkNUoE!Fi zmaV^G3lglF6cN%>pTq<$xWZ(`bHV5~M7QJRZ>fs3>&+H27yv*T761V1KY+#A+{DI& z{?Fr|eC13-FA|#_!H@oqJIu$evtzSqSZ}gJhqM(qrL#tRtd{nt0UBOfZ)u zHdxR?^F`4-PhLC^WNq0u!%AuGPDr`pJ|~%$z0iRmDRYM=+L!so!jxsslU|Q=8iGS4 z$|%E)=68Y(V3HWikjGrz1s=TBchFXU4;aWvh2acBTb>c0Wah27Ol&=Ss<>DK;BE(pLB9r`zWr zu3yYi+3L~Oqe83`X~}0-_H*iL6a40yj3*_V65+b-Uft4I10Za@3)Z#U&{zV%5V7wl zMW<4^ZXMD!xPdCa%Cdq7W;;H6h58x|+qregs#zVPmRo;pv-a2@!aj2arOfsY`!)j! zR8zKtT58S4ekHJO=jsDn!%x}w{Iu285EMnxW*THQq*m%lIw<+BZ4QYpiKfH`tX3xO zQ0Pb(2v316nG3~)!U1l`CP0i~v)c!9m?YJ9z9!T`_=%1$*QDIu%L77vQbe(fogaX) zjo6PoTURYrp$h7Fz26ZzyruWe!}ap0H>-&LMJ5wyLD+9qAzxQzDAJs^`NiydwoerY*4&F2xHdacMHd z*$sD8S*5nu{M>AO`6`;uOv&3W&?l{duf+LR|T;~pmt`7pFn})K` z6~#2T3noL!SME3WB#kpmB+Z#x-E(xX)Odz_+Ug=uIO~N9n=oYw_w(mBA#?`>X|Bqu zH^R~c{CrbIJF&t;ZTkeE@!h%VN{F9e7nj!_C|l4IcKaQ_c_{6(T5aHQ9&l<)R25%Z z_(L{7A%rq&w%vWW&bCIde3n|fPND#{Gg=*8#JGwrPW$pZbK7sMZWiP|iD9hK*F0-e zWQ$mI7vuzzOZE6P?w20r6v(}Ncn{5=3-WCrFwg@4ed!bTmKfsQyN9h+hS8T4ZO!i& z9YmI)l?fZv z^wr4~&#DW)71}lm;$)PWITVYAx<6ICBueVx2AzFWJpH=U5(^>buyfjLRH?wIM4ZL} z6*DsqU#=hEo}nsKel4pf7kPCpHK8}zu2Wtgf(r((Su#ay_O0;SkS>;;UkS>m+oor% zPlf5^fSqb+;UGkM6`k00mgJJd8y`L%xVT7ys4363d5c1z^Ub%H;*g-M^(>!)}gsz|eT znCwQGt(0BlX{}CLtiTxl^+5%uf1DmF-fqob0T6NH7yKLnW_R#e z1K1eZ*KSwI7d`z-B!AIUGiy61Cj&|g~!WouIGam0u;2nqa99%-DYH*yzi7EcA)01s)^>U z>|vg{jym>DX~OWg7N1TF%qe97kX8CfHod|n_3RR2DjSq|*x6I+|t) zNA>%hjXC0ETBk^2LszL*C$8@Y{ zvT0l;rZJUEofty!#3SMe)O5D5nZ@Ml9w}V4^4&xZ`@i$qnE77W!HN2DG8m>xVjYE; zjnwW)XoXB9+Cdk^n*^v!9NbY990i%T?bcq*hAIFYXopj{-T+CeLUWsGSh}FH#E1rh zO{KzU*aFT+94s_JSJ7A^-a)k3gq&92U9~>q`z6zM*H9?Y6U}B#KSHu^V4$0_AG& zYk5vR7Y9%>Xeq64Ql|N(mu)Xb5nH|$Lt%}&K@W{1#R1uy8i+dYmM(WstMx)@7T85$ z+s#L$I2!!i(Y3->1`mwvzn(SuQ6{7(xJwSt2=*FQR&M~)o{O~$-m$?kMY?TUrT zo9%>p1!In9fGu%K?@+92HP&G$~ujs9J1$NHJaSD!!#n3Rn#Xr3$_|ysj^W$^H^pz%~f}&?W=8@=I2@i zUH%!@9tvc7mcC<+^sfJY+L1yDr1Ut(V5G7bN2&!w9={z~gv-S&)e_+Q^hT-`rD?ri zrx5rIJ+4z%_zRC~m13m%?jg{+CowwEdSetW;4C*07i={zk=8<5!Tw&n6IP6zz6rq- zc2Fgh4>|Ze!-+_sJzk@Qm|ew zgS_W&1%@#q!9Ap}JEdQS^N;BIt1#l9L+j7Nh^ROLDPRGZ;ID!b1I()i3h-#S+Q69R=AFWr^H`zjioo} zFbKF$@zhJ453~Gzu+p_z!NLr^{NynFqyMnmEu`6c6IkS#7)Tx@a?2H^n5pFF5KA0O zxmz~WuLc1=M>LMZ8Ku7#RR8f%3sult0=JZ_rC zC8@6VK;Y~n)3UmSO_MU(@n`DK-e&D5XEaH~t^ilqo{x3kujTT;iZ#Wr( zjrmb)HVH+?9t&;p1*Nn3N7@6KOPW}1wNr(0k)Tx)kLWaalnz>n+#2ke5U0ds84M!= zP!0KCDD`I61=tEDyR=1ZIH@tSem9Ri_Deikk4E+jr6_2v3}Lhp%m@XjT)*F{#poL5 z2%J?5UqVg1mg$|F@xC#Fuf_xS%a#=e<5?8N+j`6;_nq$gQWlD$0;>vg@{CZl18Vvv zCp@wR6SaqQ0LqqFByj-93e!nO(g3bd{7fVFn1lcd{b5*!2<{0M-XgF>Bn~(YsWzc; z60oaKcGX}fZ1h&?i8WeUxa!Lz^`9PZklQ8;oU*TB61FVx;eqwxGV|QAOLcvEh0n+g^|eJ4AjU7t<{f z(vbDAwL9#fw?RLc%1xE6$}T}e@Sbh@jtm7@Kyqg|ne~Yl9rvc6c?2*vGxef=oXtb% zGMuGfpyhn86JV9O72s{dXi4X~L)pKf!Y}U5i-}*axQ+bYBB7gq^Haue_{{ZyVqvdp z5ae1?Y2ioPBcTIUvM-hr;#EX%FY&ARsxn~I9vTA)^G&S2_~MB&c@lLAfWf_Td^MCBKcO$|i@d`fu7d1aGYB!>LQT+!`VtMh3K zw{Jgw{zQF6^AgUc(oGF}k~*eKB`~ss6)8B&ql$o-aF$^-HV_I6et0|n>QlE*!9lUA*@O9kiy|cHzJM=IhITe2B+1Wl>r}=88C&v}8 z(=Wj5QyR_47~-N5J`<{sW8ulj;1MYPHIQamzoew1PhZ(|l;jedHzgttAQ9m)-$Z z@%Tzvkmd@jKD+bd2J-rxcMi1kn9a6ro41z2Gf>!1ca9s z%=aKrqc9A#wDoLsXU%1E4EdfYkb?6AvRC;eFi-KSV$yHLkBeOTEM<+33YYIgFXXFl z-vmM^fg_lUS6HEbR=0sF#Nd7QyV`IsJe{3%iR?mC8w(#gYe-Auav6h$l1S0SrJ`P> zC_nu?IgcGA$wY3`Q!I2DD5{3ZW{H{29I%vtTw8|{YJVkhh&m)$ifj%L3!ImIn+mEy~PtJEN#pdl0a687@%lT73sDUmjz{$>I(w{F4I9m z;4P#iUZ~+G|Db{h9GA8rSio?*q)s7aD;xJr{FO19eZ2*Nl5EQiN?Yq9 zmDL8}L$&ae5w-bjgrnhi#{Hbp7zV1QD6P?~*8*caa+qXaM?ZsWGono>ouFL2wP1UK z-krBh8#0)MV{x@hN9y6%_6SqP6~U@xu-t7%_j0RaKPra^pG5$O-NbOX0Ye@!Ykq86 z{?+T$l=0J3`RkW(X#3=it$&Z6TkkRHu`FLo9X(lqyI*Fu*pq~q*x|L|`&6GNqc;_ykc3UWY< zFaXn*?UjMZeqHy%FxQ}4OHJ={#fhujFNWaZ zxWSsyAw^UK>XzE-%P03HysavrX2@83l>Ai>y_RfUooUW0;1#6%G|AdwNUefe6*5zG za~bDgGgom$AQH_m0Q~v&Josn+IKZsRz+q497UZYfBpL1RWv}ZI{cC zona{Mdqkh+zc@9;%EH5G7{+;zdU#mqaMEWN@gcHg1{MzUXq?hYOA39IAL#FTdhb8| zuMzpRc&dO&8*Utd(hCu!We&hl8iXDJog^Q_xk3tix}zT#c_ip52kZ%yp3uIl5q+vIX4q5pkr6%H<_X5fNICcsEV z_juQnpUmFXbEl05d}^U$h^|SnfbBjA6aoIjT-Ae%g0o2Z7;l8BC`rH0oo@Wi;IbC!y_?l_ykD&#Bv9SLp9P>kImn3>$!c z=C#c|%o9Q3mF}zLqj?JV! zrc-Wv91eSI^xZ4RJ|Pq%yXizK2@!iAFT2qwqofpcwb!Hb*HJd{Yv8@S9ndrYNPM3! zKA7NJ?kQpxj=Wq?7JevKB+_OIj()r^o^JOi&M5n!Ql>Pr$iBRuyU$02r!+zYdXw?gPPh32v(0HlW=msA<4n~+` z4JuqWvTttj;bEp(Lr8Mvh!Ohz2stH{gCgxoy;dhEmgo*s^$36906AxzG_9b4^~jDC zr8kOhF;-CQ!@&W{H9;}8$exPK5jK*JMoB{8TT%O@-Gl8#gwxjsr0HK?ew4_lv!H3D zw?u-douwCiZ(#8r|80$n-u9OE%lNS1x$r}~NFp9QwnhU_bT*4Z!=Hxu`{M`EXhD2y zzlN^%KH_O-TimDfCgg3t>$40?q^MW@07LY=^v37V?(OVs`dhr7nqGI(|)3(qKd)|d(Na#JfTgD0*kYb$pS(G{vkuBbsU9qOj*$dri zH2IjHK7ENboH;htpRPqwzqBFdn-(7To%JvkGN(6v;GZSiV_JVONF0+>B8=PB6v*sI zzVVT+d^gCK^6g8H!v9Xx2A5YJ8-O7d-nq19JK<7sexg+xKsK#1v*U#7e5VbH%cmwl zKCKexc>bG0yzgx!Am4bX$4V?{5MaTE1IDUPx)%WZ#yQ{?tecm&TmLV-sbBUjVs>Ab z`mkavLN)uE>>CQN9_l?Z&?Gdk_~GihX4Q#ml@&z-$+>y5VS^ySTU(q-2= zei16=v}naT>OX3tGl{{>c`(FYET@mm`%q6M+I>UAbm}!_{w@ZQc5RrY!Ln<3iJ5a7 zMNU}4;>jhSP6{^a$%$)S=30r`LEgVR8l~<-N+i)+3ZuEd1bs|`_+5csBdzE|MG8~H z`w8LTqdkG3?}rPHQ?N=bHF!U>TkuWUa4SHiSF57Qvl$B15MQ1=!^d+_z_lE?rL|(& zNG_tqSS}*vL@wg7%sPGYCJa{mUzX9cd67@l4DtzxXE)pxrNmiwq0_T|#A{$Uvb`mc zj=8cOVsgf7R3So8GSb41t2On*`2l4^;Vu9bNhKv$`nO^um*mn1lufPzr%tfgzAGwT z{GN*YNA;ruOyxn`1rHhb$#v3wBgWwb+L6BgqtEzm6CTCuD+%@6x&0oQnnFq4$HQ{_=fsbr zAS7U{3rL8LV^KGIp5|FhR-?eq$@V;_W%|Mq>{cC3un%*6qy|>R5R)K=^q?-cXd?GF zP?ME@#rrQ7v)$g`!Q;Jzw#!-YGkM*6=#+9QvbL(uIop2-#eijg+6&cDwu4*rfJEE$ zKC?FiiY=~;ryY10>j1e96)ijxi`B}j*wckhyR97Ci2 z_7jZy+sOk`M$}K@G#q93&yjpgOiZY#2uM&-kwBn=UHfn$r=x3oD;SaB==8h*k#<&*Xmb@f8)+cP?0fISc=qlO&)Z} zK$@(w;ld$Z-SBFWk>(lM)d~KQ7VI5A#GI|MW}J%pd{gCHS=fQKu7(rX*Fk-OE@IzN zx`ckRGW@DVI^Nh#*xc1iXBsoiV~GDuqe(v=@l#ARhP*%ce!xZN$-d*tv~xD@8x{rR zIEx(9y@c)19PDV~_k`Mso3>J$gs~sk2|l{FDJJk)A@*}-d@5w-jNOU6)a60kk>|{X zlJ(;taxayoJ5+rl7-OuUBU21Hb>r|(cpcw{BF`u=-0BNp9;ms6a{_&Kwun|gjRY&G zYStF(SUY7GR@QmCKbmE2tN`vw>86L>st4t1M?DxS8^eDGXcAYireou5pCJSiXW<&! zk2DCuMEIyWC?hl$z{G^OhV>cD!hMg4t%d(NC`J(*87sd@$}5PH`gFv2Q7Z(4=f6TY z69e^SCyZ=$Lb*7oVceWl!UEh>LHvIi&dm-E>e$|wb#PD#J2)tZ1-L3heE;t@Z2gn! zV!spt^t==S=r8l+Ke;HC$NOC>zd5Kt7!g?@h|*Xf2=PBHsQ}1L%8s?-*a{Kz)?(TnU_8-k703DR_Z;UDsLaiwfg8XHRM8%;6f*zS)hW^zgT?~}M zUw!+dfB$9jF;D|pIH>u#xTu1E*xhQ01{dyITrc1(%*j;=(l_)?Y2cnaK1Drh`&|wF zRYBqKY&rK%iTP0d72*QqEDVE`$N~$TB1@#LdY(MEn&y;Tv^YM!2R;`SvByT9Tj_93 zFR<-UA<{ma8H1e2B~k%J&z0ZlE)ooqyl1DhCkeUhUw=uIaVXgz!!D2^9)kxnuD*+O z*0`003!y&VQ?d$_?sR7tl0l$uEc-2h!~(6RHx|W$ogBBWl|UoBOw@(g8;Tt>)l98?dAoM|LD)ZyZ!aN~Tc#&>7?dsg7(H;R^c-2%BvL0Qws&= zu6uiX(XUA^tCbu8DJpV@^lfGzQj9AjqBOGzQ;#kut3I_|`a* zBHWfZWUKe();ZeYxWBQ|TI1N1Ru0ZHk*SU|K!38NFk=4{jqr;Hzxgme2PnIv}>k}Ul4SY#VqGwHq-%B$Nv&O4!shxN*_=JHD6 z@+>$^l&xxS-2}v3omUsIr_%yta@%1H%BA|Pf;Lm#rjMkIrP;pzv{^o8)^!+7Id*6C zqAXd5`8$9A#c_2*g|!F!icQor#kZ%4vAWrBu)}5@Q+4%6Iykp&40uo`4Ktlh8%(Sw zQ{3gcQ3bNxmg@lr@+8o)2c=HXu@9c;juF)*QRSF!zq!&O z{l%PSx~h-QxaZ|}KS*QuVx|4Kbyj<@ZrhyE9Uk@dx+?2k{K}_wYRAkh4OgLRmn)+_ zR_w@M8b`$WuJyBiCN-4co~Xt`KU=PmT0%D1E{hJ)*zQWn^;&|HW#5O8E@&^2t<|<& zeG|%yVq)U1;q}%S-UZS^x8T{A4BxI5m?&5U0=VdS-pD_9bi4Xe_y z)(uownv%*q_u1AcdaJ{vwW+^ui1Fwy_LUAc8tg;Z6lyNIPc`IFrn4(4k}Cbj3ynV^NqD zJ1DeT$y!o{<1?NrQ{bV18J+KNt=`5wQk(!muUF|so4rvpE^-lwh;i-FwU4Ye@ z?r1r^7R$9keNQ(Xw`h~~=8LiT-ZXO(ebo%TG4XAWCDVB6V-sa;C1j%FiLuL>SnY5wA^DMfHI6aChSM-yudLbRQb(9Hj>$ zB#w3AsQFB7;76l%LmC{M1P7uuIK2fnV6?4-EVbw(R~vmK3vHO$?~xkMyO8R2etV{m z72h%wWu-b{(pSSjn&!^6_+qNwUsN>@t z221Lwio{!(e{;|85fZ|4sI|=Z&`NJA+V-*8q;<`xT(w1Bc)uAki=dIY$g4RT-#lSR z?+68yj^@gCB2M51D5^sCX_6vcd54B7vzxLzYw!ZJRbXItH+7Wrog1^5ns4fN)JTWC z4jL%F&%$~*hqq#ZRi%jsYp)swNw+Pq%x)7h(7xY&v9{ht$>gz_0J#sz=1Fij90w^s zH5p%)$9DZrY~j|pisfyi3%#a_>#js8>wsH+eQYVz%p^m-khh3IO0 zx~;Y3$IhAyB+>zM4ZlQQ+h$Q7;k!y`^+aZ|nVqsL-)q|7bJcrK_C~52TiJ;~Tig8T zhvjg4xsuL06xeY0%UZt4r8XwqX|_i@y`T5Q;l*uHdNKfPMGs&s*vz84UAy0BkkgYxXEOpssRhEB<{k<-(e9dTRzi?^r|y0k zM^hc;``eDccDxO0m3)hV66eRDID(ut$qf8P`Kym=gp4BazP&&0g}9}kK~hD_V`+>a zHdx)s1FSKl%@5~&$Aoz^;;gFE8r<%D7pP{!ZwDHaCB{d9@3gS|RG5T@w-7Z_xY6ub z=w)C)G|U}eO{SgOTVa=Ol99A{e&-I?|L1S6E7J+GN4TT$=5S#Q#MskJy$a4xCzzdk zg#-4lUjM7~6+U!wO8^4^VB-P+fbsRjpVHTVZX5lV2G*IS<+R?8IK0I@f7v^$^t!Fy zysvf0VP-sRxpBCQ%sI&{EQl5>A$UOA{CW)Sp9JCo83Aq0;}Ire=?75vs;BRP==b{0 zDHEXgjovU`2QFq4794O|5OHv||6qIg{chVE0fls&C?zaP4^G#e@#(!^@AGDT{x#oQ z!W~hZU$s-Nf4r}rM(q+FKGJ?vNwpK1I(sw=I=-Yb3I8^3&mSS zG&AgpSq%RNmVH^+b3hJejBmg<t*oZho*8sSn}PG0nwrsp z`_%{fZI;P6jdtPGzUi|mgoIAkEq-E)b*_1r_uN~$8#|Avr!NvxKi&+QA@bgstB&EO z2Ig5x+P*n>r4AFN8hCKFwwkj|xnuc76oi=#p*NV|T<1t-zf-t3{`weJ4pnU+L{TC8 z#LI$j8c!f<_keqr1TEkA!N}^Vmn@Lqtuwej)b#APR9j#=v38c4N!bJ@R)g67N-vDM`)N%vMf9N!jmQRE`pSXE zn$L+=m&b`l`(i6Olbwh|0$4m{Jt=%91D*cS$tTz-{_FRxqKeLUb&$xXyfKr5({+dJ_ucSH0bn$d z1TlyS4~>HqM2GO>OnUdJhBzfKHe8l)W;|mRF7mKRR4)RfYZ@ZFZq|4bN^77Dl>^0a z5;BOBq~Nxz%M5+v=N%{Qr`Y5G5H*zqd!nV~7zE~orFgpcUx_KU<2|l37@uCwtt*pWqh{&8aBx<0id|j)$EoH;-WnN z6|3zA7}OI?kVEXgi!S9A)!cQkcGvXE3vcP@(G8B}{v~Af{o= zkKfH)d3Wt9Y`i@O(6>d>=eb;m&-45L_fYK!` z-CdGONXLKz(kUg4bayvMOQ&=QA|TyJ3%p}IqI2Kh^Syt-%XQ7Zn8U2~+vm*QhaJDQ zPUfx<^oW+qBkfOYnkDksz*I@G|kkW(WP<4LC(M$-wmP3 zGFCH0gPHy-uRUyhGmckHyi(f9a867?if0Co5YLQr`jt4@INF$$<0xrNxXe&)Poqa} z6~^0xnWL%*GP6=JQyz_#ZzmW=6-ZN3QuaN4mfS_{n7?-Yx^)nKfQoode|Vc;N=89E zxxz`El@FpWV* zEyvTgZ+Q%`YeMowY_}HuUKfh02!r74>n1;ccZR5-b$kktGj&3V){LGm%yhonqVaJ# zv+t~fV{rfbQ7+ zpXmZqN%it6Wf3nU%`8K+V>I9czw|8Dh84USoL;TZX}uNvu6`w6cD*T6gt`~tgB!1* zWV$~JmU+g@V%#017$xt9XeA1GpjwbHDjwyis6Q%OvQmbiLjLHYM1JHlrJ9t;&*Mnd zN)#|ZJ~nA@ZOiF`&6xd@M-)yc|7G(Ag_?N;4r8?o7Grij5}pc}=??LO_@A*#($bs> z7^{1b@#dPq5IsK>2J?*XbeP`wjN<=cm&tGYD4iea^ZU@2EF!6>|0z#~s2f3sXvpio zs{cDEGF}2d^6z?NGCfM#-l{bd3qLRuEQv_`+UhjvwAFNR1xIT+K~}RDn=*L|{g#P> z)ORZHsbu5g24e8KG&m>Ba-f-& zepDZ06Vb(Z+Vn4G*aVQ(99~&_j5#JNaC1w*ifZx@`?2$9FfnJ8O>GjQWR;EY__Zun z;aoO;P!PTt4{7J03Kw;o33qVhj3Jtu+!L+ADK8Cxqtk>9Z7QPZpPj_~vSyh;SOGp( zT(l5fm9n&I>1FT2DbCwOocU1?J?~+05e4+NrBqwvoTF&1@kf1h<>Ar*dT!0mF2K}e zq?a=%wV^&bvO*~Wp^K(iOcj`PXfj>qaT782_dsBDd*39~%ARGA1Bg@Hxj`pQV%DCs zzcYZ>2J0E&GOcMRW_&WbSfFKJIjA@e&Bvq$&ZFj(w^Zk0$}_sJC#fl;(_tW({xl7= z#R?GaTqh_dsmOj(C8=n^-;&NyKddOOxU^ecHsyB#(p)Lb;rdxL(V6js!`E9DjQ^+T z+q2ZGnOOUy7(t#50iSWH&XyCnsB9wE1lBbCRcLx9~TF=)za3Sh%RI0dv6 z1B)YLto?@+UunO!eAVc3Oc-(Vd2t$D_ths&7gK(?ZCh@!#%5d{PuulMGqYA%&Q*ZN z6wBuQthwj4_nvSV)t3cU3LR&^GE4oo!u6VZso({w?!x#+;v{{;?OVD&0u4{y;vEC| z^UUHiQuGt5>ax?3Z8Bi>+&Z7K7Ia`!N`Vj;Yfn|E%k^8|HI!OT>R4i(0NqatpZUj5 zTXuVCicxqubhOFC^XOK_$G1PMt{kjdaipeVM6l9kZD)OO5=co+Q81L-XW#f#B2PD= z#38hC+sKw`-tR(HM)?@r|JIvwCx!cvKCFtciQS92p_7CFFm5HZNrKU&si+;8+OTerf_DqbosXD42Ua!YaHd>AI7KH*xCh)g}?NDMCxFU^}KSV`Kh z8b6%uvUA|%OG0BU4Ii-$AIWQLcS8p)TU=`%e)jy=S%vsnDrx~ZQD+7`vmp7$tl}Bi zc~s5F*y`!`$>_&8McZ*U!tQ-3_dxnHpl4i(ehp}=5uSqL9^3oblaJk$wN&%^n>Edj zYfy;P3elK|k@Y+C;&aXh-rNe8bh~^y=swkIvCGq^I6|!g_6|S`y9O*f8|KJH(@Pn@t#$Qna5&|DsyLLYMf58^Rc2`33I}Ty6@*)fJF+&eaX*3 zY}p_QQ68sLKT|0&)l)#gYLiH%FT8AIu0FAG4dvfIQN4hEO?A}IRJ9zH3jd`}-I8l1Wa&)yXhiYs@f=h(g6 z&~^iYxsmhw+m+L#`~_z?0E7*>9=W!bjskw+|e;V3EG`O$v}CM4f`~C<0g|L zt)j*4)QxoTM&47iJZaN~c5o%b9RDhsRmB?_PADZ~8eobZN({xrq4bbY1dq)DhlbEi zz0DOB!?m`efM1zekXp5(=OW$7aQ%Mon-jT>pk>YvJP+O^+}>Dl3`SkQv>rC;yABXI zI{h5?L5j{`>~Px2AvU}WEsQ4mq&41i+F`@urowVP|Cw%Q2wC2n80rdEi8M`71pFSWv$>%#B!TQ zuj((pHe61m1<Jrka0}o2Tlf5D!;TgC?xWTNUR?lCev(a&6czRlseHIw zDK>3kW^&&c2yWNj_SQmu*(sMb=ag>tFu3$UbCN*s$er7&+ZX%sW{%F+Vy26s7M-y> z8R_0{dZ^WLSn>qFp1g9xq8ibnq@z}iIvMpLa$4}b-LqVjkE!nVy&iLd-_4kqd$)ZR zkSm8duEZOSDHntPzC%sXL*(ljWox?3>ieSyBlY^{6x~3z`3)02wJ5v(NAx;Kv?cTV z&*QwaqG-P~Xg3T+*e5Xw_SIw#66R~0#-@?$m0J6lk4V`S#wrI}3qNpvJd9F{I1?sb z`=DN6h+K~ZY3`aMRwmjW;eY}MV^FCzcYmlU3JJ+wJE)>RITfb>4V9dON*lw79?@XA z1mkGHpMz$ud@SpURv$8!fNrsxikA8&ZykqzAA9DDL>0V;?kL_Jp$}$?Q+xoO(Hv-STXvTH67ejso6^U7Ahm$UTfNF%>jF!FjHBi`w z;)|Uf{~Q#N&scbtA%@gCC%FxAX!6LRa6sW`@+Nj4G@&)rsJ)U5;RjwrH3_SB^HkVw zZl0wtUC*9~S2#*(JiA>NA?g^?-ta$Do{brYEwNwGF+RS|EBUN)Z{;+gN8v>BD1R*l zN7v-y-0|Yz`Nes)yQ>2SC?xdu$?eL)i`zPc&QtShjeg1n67&x45Hv+r2Pg}`Wpuqa z{{z11rbow|N*Hpt|K7lglfE)=fb;WKz<$mDyoK60TN^)h0Be`N-$LJM&f6sYlr9NN zdC1ihnppXZgH$vN#tJRRFtj@7iYp1Ki|xOirF>bF1rU_S5U}nHCJUN9BRm*dAjx_h z{`4^%hWz>*7y2L-7ETMwkPz!}Q5(lYf7j6bB_57C9`=13AH9Q*ClYLac zR!U6D_Y4M(mN%=Wif3IHyCjZJw3b3UKp@C`h)ok-P}Ug54}JtYYVT-@06kP1-9h6Z zbw+;6H6HI;Bom#Pzm4nb%EO=!Q5P@7du)`k&_9S*XV|$u8!~OZ;JQ|+iq|unL2na8 zkkX9DTTjB>b_#sVBIdSQh8~wn11K+~+?O3-p*l`Z$4wQ+mnYTKld4W{Ly~zWJcYh6 zwFVsIsqT#te{(<6fZZ_!xd|Ziv9=`{VJ}AJ^Av|X}vdA=@HuaMNuP+}KN-op4qfPWuFUt>VeF4jV^BNo3#JhCUMC-a=<uJn@O8X}(N(J@f~f7}lsKjA3*nC9*M6UTC|>EbFU7z}YxG@V>-7l5EWrV( z(ZXqFj*Ss5kytzLKfe)74K@*cvK~?|GMYuX7@WIBx85qxwZn{z`!!PSkx`&L$UB=| ze%-~Ej}R#McK9|Sm&Cbrbaam$;W62?lZcCSR#wHRjLLzLJonzBQ;Q1+@uCIa_7X`} z<}=*ZOW!Oty8}FxG=olifJW(;w5s{me;uzxU!#|0g44h6GymaY@mC*P)fuo^;lf=Y z*bpLwX_RB$dOAyJwq*8(G8$h#r1WWGt^XS-@66pp(5u`uwlvOGrH~jahL>6xc~t4z z;tyH(XzXzx0(?$ zuH0jK7f$)NHa|sG# zZ|ru~BEfDtdmsi#sJuV5Z(zws??;Y?n>bsR;$QoXA(PnNnY48zX?+?lRkIyI!f=Gd zw}cqg@d5Xsf^UgMj$z>1yl)Avce6Pf*Y>7p8ZcJD7ntI$3WT`@Fo+-6g1PX;I8eH?IH(m^DOp6Tq3x^uJMgK;Hy+yW;@`4h zn&Dy*2qF<<)ouk-vc&gct|&f%@XN7wu5=$f6Br2%0L#9sbf+%YUgSn^+F zJ)zYzIHf0ZKxVmwy)9=<-R_x%$0`ph`AdSTZgA&5x*r9|8V;_yAI$(y^piL_5krvC z4)sbT^KL~I_Zoy06@>F{g(}C}d{E0PX)&H^YrH@fpU51B?!LNLvW8v?vnubmfVLkD zE4@!dinH;Kp~ly$>t(+EluG_)o#gJ-wCC1xu2&>7r&)MKj^jcR+I)^{pD^iesLCCOx8D#E|Rl z+Swf5@;V|?Kt$#>n+BuN$5m5iLBG&S_;Eh5a~wDPelqX3+%U^lE}W~(Jo}7sXcR?) z@L4AWrDN74C0ta~@69Bdd!q_x?Mi7ApSVd)o0>>G=!q)sij;{O&QEh9lF($>tUTJ* zkFrrp%6NWRlxh01HJg1KV>85|FyCwuxEXK~%|c~~62(#^jHyXVe-S1n6Yzfb7&!T; zEYTjvfPb~|Q2!W)+ukN^n~UClz+KD9ekCNgO+MLct#Oma#yE{_TAs-sCr_xs=G(VC z@bO2hja@!8Bl5j|dUaZ$)dmTYiiTJAm#dettH|`$<N$cQpMx={|BhapAhg=e1){`HQ&^txZ247>%31s%w;j1nt@sT@yh%ku}nKR9quqw*(yv}xCdtKZ;(UzCxY~|*o`wJ(oZSp7) zgOENR0wUM2n!_uVcg>}bF(|XL-AkBX(sv!-*NU8->J8uOGI;p;&mF@K{I^aZ@RfxI{}t=64Q5-1A2a&@Q;&e#8~7(QN?FnO zyS1WneryIVtox>X`;fIek0)6X}M~ zBp)kG`Nst-B<2aP1v1<##%hf=RGV@G4uQI`DP4y|)=-Itz20V;4WYW;Tt%w2CaJNAxV*->eD}^{ql{Q) z@Ozvrkv@{ z)Xbr*JJTli!137Rh}>p&#`JmTJzuliOskI~F9ms`e?tZ}<3BPz)9lZt>*n}GBE*bL zc@Xf_w>hw1^`%FZPNV}q-MUe7z@d9=_N;qc*P(3PdiE{-%nreMt1+Lt01}#pypZ(; zc=-0Su|mTD!3wc|f9}oCZ|1K2m#5$;$^K37_ro&opiod~V7c~BVdW2nJ0$S$#|Hcf z;`|D9sFyDg0x7u zV@hZE#q=i!WJMqq(s%GBAmj=CFFPirboh>vm=%)pzx7Z^aqAsV8{01)h#?E)$^9+q zyu*XF&hDU4P#Y3|gucr^gCu~=LAs+L zkb=C5cNs~LERcH+cP!RwkSu@gKtK{fF74hCDQf;A`mcD%mAgB9xi(}){#s*ztO(?) z#2pbp{}<7J#Y2vP@9_J^kQMp=G_Su0eRo7qP|~Jyrkv9O!AN Date: Sun, 5 Mar 2017 23:12:38 +0800 Subject: [PATCH 05/11] docxpy --- .gitignore | 2 ++ MANIFEST | 7 +++++++ bin/docx2txt | 11 ++++++----- {pydocx => docxpy}/__init__.py | 2 +- {pydocx => docxpy}/docxreader.py | 4 ++-- setup.py | 13 ++++++++++--- tests/test-hello.py | 4 ++-- 7 files changed, 30 insertions(+), 13 deletions(-) create mode 100644 MANIFEST rename {pydocx => docxpy}/__init__.py (86%) rename {pydocx => docxpy}/docxreader.py (98%) diff --git a/.gitignore b/.gitignore index e9af22b..3d00690 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .idea *.pyc build +dist +upload.sh diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..b42dce8 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,7 @@ +# file GENERATED by distutils, do NOT edit +LICENSE.txt +setup.cfg +setup.py +bin/docx2txt +docxpy/__init__.py +docxpy/docxreader.py diff --git a/bin/docx2txt b/bin/docx2txt index af03687..6a4ba27 100755 --- a/bin/docx2txt +++ b/bin/docx2txt @@ -1,9 +1,10 @@ #! /usr/bin/env python -import pydocx +import docxpy + if __name__ == '__main__': - import sys - args = pydocx.process_args() - text = pydocx.process(args.docx, args.img_dir) - sys.stdout.write(text.encode('utf-8')) + args = docxpy.process_args() + text = docxpy.process(args.docx, args.img_dir) + print(text.encode('utf-8')) + diff --git a/pydocx/__init__.py b/docxpy/__init__.py similarity index 86% rename from pydocx/__init__.py rename to docxpy/__init__.py index 0d11c2d..029c5f9 100644 --- a/pydocx/__init__.py +++ b/docxpy/__init__.py @@ -2,4 +2,4 @@ from .docxreader import process_args from .docxreader import DOCReader -VERSION = '0.1' +VERSION = '0.7' diff --git a/pydocx/docxreader.py b/docxpy/docxreader.py similarity index 98% rename from pydocx/docxreader.py rename to docxpy/docxreader.py index 63dd380..3eac2e0 100755 --- a/pydocx/docxreader.py +++ b/docxpy/docxreader.py @@ -83,7 +83,7 @@ def xml2text(self, xml): root = ET.fromstring(xml) for child in root.iter(): attr = child.attrib - for k, v in attr.iteritems(): + for k, v in attr.items(): if k.endswith('id') and v in self.links: self.data['links'].append((ET.tostring(child, encoding='utf-8', method='text'), self.links[v])) if child.tag == qn('w:t'): @@ -137,4 +137,4 @@ def process(docx, img_dir=None): if __name__ == '__main__': args = process_args() text = process(args.docx, args.img_dir) - sys.stdout.write(text.encode('utf-8')) + print(text.encode('utf-8')) diff --git a/setup.py b/setup.py index c831cc6..985a6a4 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,18 @@ +import os import glob from distutils.core import setup -from docx2txt import VERSION +from pydocx import VERSION # get all of the scripts scripts = glob.glob('bin/*') +def read(fname): + return open(os.path.join(os.path.dirname(__file__), fname)).read() + + setup( - name='pydocx', - packages=['pydocx'], + name='docxpy', + packages=['docxpy'], version=VERSION, description='A pure python-based utility to extract text, hyperlinks and images' 'from docx files.', @@ -16,5 +21,7 @@ url='https://github.com/badbye/python-docx2txt', keywords=['python', 'docx', 'text', 'links', 'images', 'extract'], scripts=scripts, + test_suite='nose.collector', + tests_require=['nose'], classifiers=[], ) diff --git a/tests/test-hello.py b/tests/test-hello.py index 37559e9..80c81e0 100644 --- a/tests/test-hello.py +++ b/tests/test-hello.py @@ -1,5 +1,5 @@ import unittest -from pydocx import DOCReader +from docxpy import DOCReader class Test(unittest.TestCase): @@ -15,7 +15,7 @@ def test_file_data(self): def test_hyperlinks(self): links = self.file.data['links'] - self.assertEqual(links, [('This is a hyperlink.', 'https://www.google.com/')]) + self.assertEqual(links, [('This is a hyperlink.'.encode('utf-8'), 'https://www.google.com/')]) def test_text(self): text = self.file.data['document'].replace('\n', '') From 78935b61ba65d33b471c827cc4b5f17de86a44ea Mon Sep 17 00:00:00 2001 From: badbye Date: Sun, 5 Mar 2017 23:26:08 +0800 Subject: [PATCH 06/11] travis --- .travis.yaml | 13 +++++++++++++ README.md | 12 ++++++------ docxpy/__init__.py | 2 +- setup.py | 5 +++-- 4 files changed, 23 insertions(+), 9 deletions(-) create mode 100644 .travis.yaml diff --git a/.travis.yaml b/.travis.yaml new file mode 100644 index 0000000..ab2259e --- /dev/null +++ b/.travis.yaml @@ -0,0 +1,13 @@ +language: python +python: + - "2.6" + - "2.7" + - "3.2" + - "3.3" + - "3.4" + - "3.5" +# command to install dependencies +install: + - python setup.py -q install +# command to run tests +script: cd tests && python test-hello.py \ No newline at end of file diff --git a/README.md b/README.md index bb6e5c5..f3003db 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# pydocx # +# docxpy # This project is forked from [ankushshah89/python-docx2txt](https://github.com/ankushshah89/python-docx2txt/pull/10/files). A new feature is added: extract the hyperlinks and its corresponding texts. @@ -7,7 +7,7 @@ It is a pure python-based utility to extract text from docx files. The code is t ## How to install? ## ```bash -pip install pydocx +pip install docxpy ``` ## How to run? ## @@ -23,19 +23,19 @@ docx2txt -i /tmp/img_dir file.docx b. From python: ```python -import pydocx +import docxpy c = 'file.docx' # extract text -text = pydocx.process(file) +text = docxpy.process(file) # extract text and write images in /tmp/img_dir -text = pydocx.process(file, "/tmp/img_dir") +text = docxpy.process(file, "/tmp/img_dir") # if you want the hyperlinks -doc = pydocx.DOCReader(file) +doc = docxpy.DOCReader(file) doc.process() # process file hyperlinks = doc.data['links'] ``` diff --git a/docxpy/__init__.py b/docxpy/__init__.py index 029c5f9..97c0050 100644 --- a/docxpy/__init__.py +++ b/docxpy/__init__.py @@ -2,4 +2,4 @@ from .docxreader import process_args from .docxreader import DOCReader -VERSION = '0.7' +VERSION = '0.8' diff --git a/setup.py b/setup.py index 985a6a4..7147fc3 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,12 @@ import os import glob from distutils.core import setup -from pydocx import VERSION +from docxpy import VERSION # get all of the scripts scripts = glob.glob('bin/*') + def read(fname): return open(os.path.join(os.path.dirname(__file__), fname)).read() @@ -18,7 +19,7 @@ def read(fname): 'from docx files.', author='Ankush Shah, Yalei Du', author_email='yaleidu@163.com', - url='https://github.com/badbye/python-docx2txt', + url='https://github.com/badbye/docxpy', keywords=['python', 'docx', 'text', 'links', 'images', 'extract'], scripts=scripts, test_suite='nose.collector', From 3c671caf31019f8f2caa39a923629307a7395871 Mon Sep 17 00:00:00 2001 From: badbye Date: Sun, 5 Mar 2017 23:34:11 +0800 Subject: [PATCH 07/11] build status --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index f3003db..2012bb3 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ +![](https://travis-ci.org/badbye/docxpy.svg?branch=master) + + # docxpy # This project is forked from [ankushshah89/python-docx2txt](https://github.com/ankushshah89/python-docx2txt/pull/10/files). From 9e4e9a6dcc3460f45d1afc690b59302849a10e51 Mon Sep 17 00:00:00 2001 From: badbye Date: Sun, 5 Mar 2017 23:37:16 +0800 Subject: [PATCH 08/11] ... --- .travis.yaml => .travis.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .travis.yaml => .travis.yml (100%) diff --git a/.travis.yaml b/.travis.yml similarity index 100% rename from .travis.yaml rename to .travis.yml From ea4da1b1003c4dadf0503df825bd9b98dcb006af Mon Sep 17 00:00:00 2001 From: badbye Date: Sun, 5 Mar 2017 23:41:26 +0800 Subject: [PATCH 09/11] py 2.7 or py3.3 --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index ab2259e..f815681 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,6 @@ language: python python: - - "2.6" - "2.7" - - "3.2" - "3.3" - "3.4" - "3.5" From 2391aed7fe3ad89da11f9208eba69c3600b6d9b9 Mon Sep 17 00:00:00 2001 From: badbye Date: Tue, 7 Mar 2017 15:05:17 +0800 Subject: [PATCH 10/11] ~ --- README.md | 44 ----------------------------------- README.rst | 57 ++++++++++++++++++++++++++++++++++++++++++++++ docxpy/__init__.py | 2 +- setup.py | 30 ++++++++++++++---------- 4 files changed, 76 insertions(+), 57 deletions(-) delete mode 100644 README.md create mode 100644 README.rst diff --git a/README.md b/README.md deleted file mode 100644 index 2012bb3..0000000 --- a/README.md +++ /dev/null @@ -1,44 +0,0 @@ -![](https://travis-ci.org/badbye/docxpy.svg?branch=master) - - -# docxpy # - -This project is forked from [ankushshah89/python-docx2txt](https://github.com/ankushshah89/python-docx2txt/pull/10/files). -A new feature is added: extract the hyperlinks and its corresponding texts. - -It is a pure python-based utility to extract text from docx files. The code is taken and adapted from [python-docx](https://github.com/python-openxml/python-docx). It can however also extract **text** from header, footer and **hyperlinks**. It can now also extract **images**. - -## How to install? ## -```bash -pip install docxpy -``` - -## How to run? ## - -a. From command line: -```bash -# extract text -docx2txt file.docx -# extract text and images -docx2txt -i /tmp/img_dir file.docx -``` - - -b. From python: -```python -import docxpy - -c = 'file.docx' - -# extract text -text = docxpy.process(file) - -# extract text and write images in /tmp/img_dir -text = docxpy.process(file, "/tmp/img_dir") - - -# if you want the hyperlinks -doc = docxpy.DOCReader(file) -doc.process() # process file -hyperlinks = doc.data['links'] -``` diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..017337e --- /dev/null +++ b/README.rst @@ -0,0 +1,57 @@ +docxpy +====== + +|image0| |PyPI| + +This project is forked from +`ankushshah89/python-docx2txt `__. +A new feature is added: extract the hyperlinks and its corresponding +texts. + +It is a pure python-based utility to extract text from docx files. The +code is taken and adapted from +`python-docx `__. It can +however also extract **text** from header, footer and **hyperlinks**. It +can now also extract **images**. + +How to install? +--------------- + +.. code:: bash + + pip install docxpy + +How to run? +----------- + +a. From command line: + +.. code:: bash + + # extract text + docx2txt file.docx + # extract text and images + docx2txt -i /tmp/img_dir file.docx + +b. From python: + +.. code:: python + + import docxpy + + c = 'file.docx' + + # extract text + text = docxpy.process(file) + + # extract text and write images in /tmp/img_dir + text = docxpy.process(file, "/tmp/img_dir") + + + # if you want the hyperlinks + doc = docxpy.DOCReader(file) + doc.process() # process file + hyperlinks = doc.data['links'] + +.. |image0| image:: https://travis-ci.org/badbye/docxpy.svg?branch=master +.. |PyPI| image:: https://img.shields.io/pypi/pyversions/scrapy-corenlp.svg?style=flat-square diff --git a/docxpy/__init__.py b/docxpy/__init__.py index 97c0050..9b080dd 100644 --- a/docxpy/__init__.py +++ b/docxpy/__init__.py @@ -2,4 +2,4 @@ from .docxreader import process_args from .docxreader import DOCReader -VERSION = '0.8' +VERSION = '0.8.2' diff --git a/setup.py b/setup.py index 7147fc3..2cb5490 100644 --- a/setup.py +++ b/setup.py @@ -12,17 +12,23 @@ def read(fname): setup( - name='docxpy', - packages=['docxpy'], - version=VERSION, - description='A pure python-based utility to extract text, hyperlinks and images' + name='docxpy', + packages=['docxpy'], + version=VERSION, + description='A pure python-based utility to extract text, hyperlinks and images' 'from docx files.', - author='Ankush Shah, Yalei Du', - author_email='yaleidu@163.com', - url='https://github.com/badbye/docxpy', - keywords=['python', 'docx', 'text', 'links', 'images', 'extract'], - scripts=scripts, - test_suite='nose.collector', - tests_require=['nose'], - classifiers=[], + long_description=open("README.rst").read(), + author='Ankush Shah, Yalei Du', + author_email='yaleidu@163.com', + url='https://github.com/badbye/docxpy', + keywords=['python', 'docx', 'text', 'links', 'images', 'extract'], + scripts=scripts, + test_suite='nose.collector', + tests_require=['nose'], + classifiers=[ + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5" + ] ) From df393f3298029bcd3d9ad92c1388ec9ba637fd98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Bardolle?= Date: Tue, 7 Mar 2017 15:39:29 +0100 Subject: [PATCH 11/11] Fix a typo in README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 017337e..63d3eed 100644 --- a/README.rst +++ b/README.rst @@ -39,7 +39,7 @@ b. From python: import docxpy - c = 'file.docx' + file = 'file.docx' # extract text text = docxpy.process(file)