diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3d00690
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+.idea
+*.pyc
+build
+dist
+upload.sh
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..f815681
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,11 @@
+language: python
+python:
+  - "2.7"
+  - "3.3"
+  - "3.4"
+  - "3.5"
+# command to install dependencies
+install:
+  - python setup.py -q install
+# command to run tests
+script: cd tests && python test-hello.py
\ No newline at end of file
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..b42dce8
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,7 @@
+# file GENERATED by distutils, do NOT edit
+LICENSE.txt
+setup.cfg
+setup.py
+bin/docx2txt
+docxpy/__init__.py
+docxpy/docxreader.py
diff --git a/README.md b/README.md
deleted file mode 100644
index d1b360b..0000000
--- a/README.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# python-docx2txt #
-
-A pure python-based utility to extract text from docx files. 
-
-The code is taken and adapted from [python-docx](https://github.com/python-openxml/python-docx). It can however also extract text from header, footer and hyperlinks. __It can now also extract images.__ 
-
-## How to install? ##
-```bash
-pip install docx2txt
-```
-
-## How to run? ##
-
-a. From command line:
-```bash
-# extract text
-docx2txt file.docx
-# extract text and images
-docx2txt -i /tmp/img_dir file.docx
-```
-b. From python:
-```python
-import docx2txt
-
-# extract text
-text = docx2txt.process("file.docx")
-
-# extract text and write images in /tmp/img_dir
-text = docx2txt.process("file.docx", "/tmp/img_dir") 
-```
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..63d3eed
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,57 @@
+docxpy
+======
+
+|image0| |PyPI|
+
+This project is forked from
+`ankushshah89/python-docx2txt <https://github.com/ankushshah89/python-docx2txt/pull/10/files>`__.
+A new feature is added: extract the hyperlinks and its corresponding
+texts.
+
+It is a pure python-based utility to extract text from docx files. The
+code is taken and adapted from
+`python-docx <https://github.com/python-openxml/python-docx>`__. It can
+however also extract **text** from header, footer and **hyperlinks**. It
+can now also extract **images**.
+
+How to install?
+---------------
+
+.. code:: bash
+
+    pip install docxpy
+
+How to run?
+-----------
+
+a. From command line:
+
+.. code:: bash
+
+    # extract text
+    docx2txt file.docx
+    # extract text and images
+    docx2txt -i /tmp/img_dir file.docx
+
+b. From python:
+
+.. code:: python
+
+    import docxpy
+
+    file = 'file.docx'
+
+    # extract text
+    text = docxpy.process(file)
+
+    # extract text and write images in /tmp/img_dir
+    text = docxpy.process(file, "/tmp/img_dir")
+
+
+    # if you want the hyperlinks
+    doc = docxpy.DOCReader(file)
+    doc.process()  # process file
+    hyperlinks = doc.data['links']
+
+.. |image0| image:: https://travis-ci.org/badbye/docxpy.svg?branch=master
+.. |PyPI| image:: https://img.shields.io/pypi/pyversions/scrapy-corenlp.svg?style=flat-square
diff --git a/bin/docx2txt b/bin/docx2txt
index 62157c2..6a4ba27 100755
--- a/bin/docx2txt
+++ b/bin/docx2txt
@@ -1,9 +1,10 @@
 #! /usr/bin/env python
 
-import docx2txt
+import docxpy
+
 
 if __name__ == '__main__':
-    import sys
-    args = docx2txt.process_args()
-    text = docx2txt.process(args.docx, args.img_dir)
-    sys.stdout.write(text.encode('utf-8'))
+    args = docxpy.process_args()
+    text = docxpy.process(args.docx, args.img_dir)
+    print(text.encode('utf-8'))
+
diff --git a/docx2txt/__init__.py b/docx2txt/__init__.py
deleted file mode 100644
index 778804c..0000000
--- a/docx2txt/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .docx2txt import process
-from .docx2txt import process_args
-
-VERSION = '0.6'
diff --git a/docx2txt/docx2txt.py b/docx2txt/docx2txt.py
deleted file mode 100755
index d48f9e5..0000000
--- a/docx2txt/docx2txt.py
+++ /dev/null
@@ -1,113 +0,0 @@
-#! /usr/bin/env python
-
-import argparse
-import re
-import xml.etree.ElementTree as ET
-import zipfile
-import os
-import sys
-
-
-nsmap = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
-
-
-def process_args():
-    parser = argparse.ArgumentParser(description='A pure python-based utility '
-                                                 'to extract text and images '
-                                                 'from docx files.')
-    parser.add_argument("docx", help="path of the docx file")
-    parser.add_argument('-i', '--img_dir', help='path of directory '
-                                                'to extract images')
-
-    args = parser.parse_args()
-
-    if not os.path.exists(args.docx):
-        print('File {} does not exist.'.format(args.docx))
-        sys.exit(1)
-
-    if args.img_dir is not None:
-        if not os.path.exists(args.img_dir):
-            try:
-                os.makedirs(args.img_dir)
-            except OSError:
-                print("Unable to create img_dir {}".format(args.img_dir))
-                sys.exit(1)
-    return args
-
-
-def qn(tag):
-    """
-    Stands for 'qualified name', a utility function to turn a namespace
-    prefixed tag name into a Clark-notation qualified tag name for lxml. For
-    example, ``qn('p:cSld')`` returns ``'{http://schemas.../main}cSld'``.
-    Source: https://github.com/python-openxml/python-docx/
-    """
-    prefix, tagroot = tag.split(':')
-    uri = nsmap[prefix]
-    return '{{{}}}{}'.format(uri, tagroot)
-
-
-def xml2text(xml):
-    """
-    A string representing the textual content of this run, with content
-    child elements like ``<w:tab/>`` translated to their Python
-    equivalent.
-    Adapted from: https://github.com/python-openxml/python-docx/
-    """
-    text = u''
-    root = ET.fromstring(xml)
-    for child in root.iter():
-        if child.tag == qn('w:t'):
-            t_text = child.text
-            text += t_text if t_text is not None else ''
-        elif child.tag == qn('w:tab'):
-            text += '\t'
-        elif child.tag in (qn('w:br'), qn('w:cr')):
-            text += '\n'
-        elif child.tag == qn("w:p"):
-            text += '\n\n'
-    return text
-
-
-def process(docx, img_dir=None):
-    text = u''
-
-    # unzip the docx in memory
-    zipf = zipfile.ZipFile(docx)
-    filelist = zipf.namelist()
-
-    # get header text
-    # there can be 3 header files in the zip
-    header_xmls = 'word/header[0-9]*.xml'
-    for fname in filelist:
-        if re.match(header_xmls, fname):
-            text += xml2text(zipf.read(fname))
-
-    # get main text
-    doc_xml = 'word/document.xml'
-    text += xml2text(zipf.read(doc_xml))
-
-    # get footer text
-    # there can be 3 footer files in the zip
-    footer_xmls = 'word/footer[0-9]*.xml'
-    for fname in filelist:
-        if re.match(footer_xmls, fname):
-            text += xml2text(zipf.read(fname))
-
-    if img_dir is not None:
-        # extract images
-        for fname in filelist:
-            _, extension = os.path.splitext(fname)
-            if extension in [".jpg", ".jpeg", ".png", ".bmp"]:
-                dst_fname = os.path.join(img_dir, os.path.basename(fname))
-                with open(dst_fname, "w") as dst_f:
-                    dst_f.write(zipf.read(fname))
-
-    zipf.close()
-    return text.strip()
-
-
-if __name__ == '__main__':
-    args = process_args()
-    text = process(args.docx, args.img_dir)
-    sys.stdout.write(text.encode('utf-8'))
diff --git a/docxpy/__init__.py b/docxpy/__init__.py
new file mode 100644
index 0000000..9b080dd
--- /dev/null
+++ b/docxpy/__init__.py
@@ -0,0 +1,5 @@
+from .docxreader import process
+from .docxreader import process_args
+from .docxreader import DOCReader
+
+VERSION = '0.8.2'
diff --git a/docxpy/docxreader.py b/docxpy/docxreader.py
new file mode 100755
index 0000000..3eac2e0
--- /dev/null
+++ b/docxpy/docxreader.py
@@ -0,0 +1,140 @@
+#! /usr/bin/env python
+
+import argparse
+import xml.etree.ElementTree as ET
+import zipfile
+import os
+import sys
+import re
+
+
+def process_args():
+    parser = argparse.ArgumentParser(description='A pure python-based utility '
+                                                 'to extract text and images '
+                                                 'from docx files.')
+    parser.add_argument("docx", help="path of the docx file")
+    parser.add_argument('-i', '--img_dir', help='path of directory '
+                                                'to extract images')
+
+    args = parser.parse_args()
+
+    if not os.path.exists(args.docx):
+        print('File {} does not exist.'.format(args.docx))
+        sys.exit(1)
+
+    if args.img_dir is not None:
+        if not os.path.exists(args.img_dir):
+            try:
+                os.makedirs(args.img_dir)
+            except OSError:
+                print("Unable to create img_dir {}".format(args.img_dir))
+                sys.exit(1)
+    return args
+
+
+def qn(tag):
+    """
+    Stands for 'qualified name', a utility function to turn a namespace
+    prefixed tag name into a Clark-notation qualified tag name for lxml. For
+    example, ``qn('p:cSld')`` returns ``'{http://schemas.../main}cSld'``.
+    Source: https://github.com/python-openxml/python-docx/
+    """
+    nsmap = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
+    prefix, tagroot = tag.split(':')
+    uri = nsmap[prefix]
+    return '{{{}}}{}'.format(uri, tagroot)
+
+
+class DOCReader(object):
+    def __init__(self, docx, img_dir=None):
+        if not os.path.exists(docx):
+            raise Exception('Can not file document: %s' % docx)
+        self.file = docx
+        self.img_dir = img_dir
+        self.data = {'links': []}  # save header, footer, document, links
+        self.links = {}
+
+        # read file
+        self.zipf = zipfile.ZipFile(self.file)
+        self.filelist = self.zipf.namelist()
+
+        # parse hyperlinks
+        hyperlink_document = 'word/_rels/document.xml.rels'
+        if hyperlink_document in self.filelist:
+            self.process_hyperlink(self.zipf.read(hyperlink_document))
+
+    def process_hyperlink(self, doc):
+        """
+        external hyperlink from a string of xml document(typically the `word/_rels/document.xml.rels` file)
+        """
+        root = ET.fromstring(doc)
+        nodes = [node.attrib for node in root]
+        nodes = filter(lambda x: x.get('TargetMode', '') == 'External', nodes)
+        self.links = {node['Id']: node['Target'] for node in nodes}
+
+    def xml2text(self, xml):
+        """
+        A string representing the textual content of this run, with content
+        child elements like ``<w:tab/>`` translated to their Python
+        equivalent.
+        Adapted from: https://github.com/python-openxml/python-docx/
+        """
+        text = u''
+        root = ET.fromstring(xml)
+        for child in root.iter():
+            attr = child.attrib
+            for k, v in attr.items():
+                if k.endswith('id') and v in self.links:
+                    self.data['links'].append((ET.tostring(child, encoding='utf-8', method='text'), self.links[v]))
+            if child.tag == qn('w:t'):
+                t_text = child.text
+                text += t_text if t_text is not None else ''
+            elif child.tag == qn('w:tab'):
+                text += '\t'
+            elif child.tag in (qn('w:br'), qn('w:cr')):
+                text += '\n'
+            elif child.tag == qn("w:p"):
+                text += '\n\n'
+        return text
+
+    def process(self):
+        text = u''
+        # get header text
+        # there can be 3 header files in the zip
+        header_xmls = re.compile('word/header[0-9]*.xml')
+        self.data['header'] = [self.xml2text(self.zipf.read(fname)) for fname in self.filelist if header_xmls.match(fname)]
+        text += '\n'.join(self.data['header'])
+
+        # get main text
+        doc_xml = 'word/document.xml'
+        self.data['document'] = self.xml2text(self.zipf.read(doc_xml))
+        text += self.data['document']
+
+        # get footer text
+        # there can be 3 footer files in the zip
+        footer_xmls = re.compile('word/footer[0-9]*.xml')
+        self.data['footer'] = [self.xml2text(self.zipf.read(fname)) for fname in self.filelist if footer_xmls.match(fname)]
+        text += '\n'.join(self.data['footer'])
+
+        if self.img_dir is not None:
+            # extract images
+            for fname in self.filelist:
+                _, extension = os.path.splitext(fname)
+                if extension in [".jpg", ".jpeg", ".png", ".bmp"]:
+                    dst_fname = os.path.join(self.img_dir, os.path.basename(fname))
+                    with open(dst_fname, "w") as dst_f:
+                        dst_f.write(self.zipf.read(fname))
+        self.zipf.close()
+        return text.strip()
+
+
+def process(docx, img_dir=None):
+    obj = DOCReader(docx, img_dir=img_dir)
+    res = obj.process()
+    return res
+
+
+if __name__ == '__main__':
+    args = process_args()
+    text = process(args.docx, args.img_dir)
+    print(text.encode('utf-8'))
diff --git a/setup.py b/setup.py
index f0c5c10..2cb5490 100644
--- a/setup.py
+++ b/setup.py
@@ -1,20 +1,34 @@
+import os
 import glob
 from distutils.core import setup
+from docxpy import VERSION
 
 # get all of the scripts
 scripts = glob.glob('bin/*')
 
+
+def read(fname):
+    return open(os.path.join(os.path.dirname(__file__), fname)).read()
+
+
 setup(
-  name='docx2txt',
-  packages=['docx2txt'],
-  version='0.6',
-  description='A pure python-based utility to extract text and images '
+    name='docxpy',
+    packages=['docxpy'],
+    version=VERSION,
+    description='A pure python-based utility to extract text, hyperlinks and images'
               'from docx files.',
-  author='Ankush Shah',
-  author_email='ankush.shah.nitk@gmail.com',
-  url='https://github.com/ankushshah89/python-docx2txt',
-  download_url='https://github.com/ankushshah89/python-docx2txt/tarball/0.6',
-  keywords=['python', 'docx', 'text', 'images', 'extract'],
-  scripts=scripts,
-  classifiers=[],
+    long_description=open("README.rst").read(),
+    author='Ankush Shah, Yalei Du',
+    author_email='yaleidu@163.com',
+    url='https://github.com/badbye/docxpy',
+    keywords=['python', 'docx', 'text', 'links', 'images', 'extract'],
+    scripts=scripts,
+    test_suite='nose.collector',
+    tests_require=['nose'],
+    classifiers=[
+        "Programming Language :: Python :: 2.7",
+        "Programming Language :: Python :: 3.3",
+        "Programming Language :: Python :: 3.4",
+        "Programming Language :: Python :: 3.5"
+  ]
 )
diff --git a/tests/Hello.docx b/tests/Hello.docx
new file mode 100644
index 0000000..3b2f14a
Binary files /dev/null and b/tests/Hello.docx differ
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test-hello.py b/tests/test-hello.py
new file mode 100644
index 0000000..80c81e0
--- /dev/null
+++ b/tests/test-hello.py
@@ -0,0 +1,26 @@
+import unittest
+from docxpy import DOCReader
+
+
+class Test(unittest.TestCase):
+    def setUp(self):
+        self.file = DOCReader('Hello.docx')
+        self.file.process()
+
+    def test_file_data(self):
+        self.assertIsInstance(self.file.data, dict)
+        self.assertTrue('header' in self.file.data)
+        self.assertTrue('footer' in self.file.data)
+        self.assertTrue('document' in self.file.data)
+
+    def test_hyperlinks(self):
+        links = self.file.data['links']
+        self.assertEqual(links, [('This is a hyperlink.'.encode('utf-8'), 'https://www.google.com/')])
+
+    def test_text(self):
+        text = self.file.data['document'].replace('\n', '')
+        self.assertEqual(text, 'TitleThis is a hyperlink.')
+
+
+if __name__ == '__main__':
+    unittest.main()