From dbdd85ac9028c5b54fb1806332509a3df0623eac Mon Sep 17 00:00:00 2001
From: Jason Ward <jason.ward@policystat.com>
Date: Fri, 11 Jan 2013 13:42:25 -0500
Subject: [PATCH 01/10] refs #1: added tests for making sure exceptions are
 being raised and fall back is being used.

---
 docx2html/tests/test_docx.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
diff --git a/docx2html/tests/test_docx.py b/docx2html/tests/test_docx.py
index 259437f..00f1b9b 100644
--- a/docx2html/tests/test_docx.py
+++ b/docx2html/tests/test_docx.py
@@ -10,6 +10,9 @@
     _get_document_data,
     DETECT_FONT_SIZE,
 )
+from docx2html.errors import (
+    ConversionFailed,
+)
 
 
 def assert_html_equal(actual_html, expected_html):
@@ -693,3 +696,22 @@ def test_has_title():
     )
     actual_html = convert(file_path)
     assert_html_equal(actual_html, '''<html><p>Text</p></html>''')
+
+
+def test_missing_converter():
+    file_path = 'test.doc'
+    try:
+        convert(file_path)
+    except ConversionFailed:
+        pass
+    else:
+        raise AssertionError('ConversionFailed was not raised')
+
+
+def test_fall_back():
+    file_path = 'test.doc'
+
+    def fall_back(*args, **kwargs):
+        return 'success'
+    html = convert(file_path, fall_back=fall_back)
+    assert html == 'success'

From 58e437a694fb1eee61c3a8c5008cb11928c6a886 Mon Sep 17 00:00:00 2001
From: Jason Ward <jason.ward@policystat.com>
Date: Fri, 11 Jan 2013 13:43:02 -0500
Subject: [PATCH 02/10] refs #1: added custom exceptions

---
 docx2html/errors.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 docx2html/errors.py

diff --git a/docx2html/errors.py b/docx2html/errors.py
new file mode 100644
index 0000000..e94a093
--- /dev/null
+++ b/docx2html/errors.py
@@ -0,0 +1,14 @@
+class Docx2HtmlException(Exception):
+    pass
+
+
+class InvalidFileExtension(Docx2HtmlException):
+    pass
+
+
+class ConversionFailed(Docx2HtmlException):
+    pass
+
+
+class MissingConverter(Docx2HtmlException):
+    pass

From 3981dfd51e156e8cad7f5b4a775649ea2cb78222 Mon Sep 17 00:00:00 2001
From: Jason Ward <jason.ward@policystat.com>
Date: Fri, 11 Jan 2013 13:43:14 -0500
Subject: [PATCH 03/10] refs #1: refactor, used custom exceptions

---
 docx2html/core.py | 56 +++++++++++++++++++++--------------------------
 1 file changed, 25 insertions(+), 31 deletions(-)

diff --git a/docx2html/core.py b/docx2html/core.py
index 72f0370..5168110 100644
--- a/docx2html/core.py
+++ b/docx2html/core.py
@@ -8,29 +8,20 @@
 from collections import namedtuple, defaultdict
 from zipfile import ZipFile, BadZipfile
 
+from docx2html.errors import (
+    ConversionFailed,
+    InvalidFileExtension,
+    MissingConverter,
+)
+
 DETECT_FONT_SIZE = False
 EMUS_PER_PIXEL = 9525
-# Abiword supported formats
-VALID_EXTRACT_EXTENSIONS = [
-    '.doc', '.docx', '.dotx', '.docm', '.dotm', '.wri', '.rtf', '.txt',
-    '.text', '.wpd', '.wp', '.odt', '.ott', '.abw', '.atw', '.pdf', '.html',
-    '.dot',
-]
 
 ###
 # Help functions
 ###
 
 
-def is_extractable(path):
-    """
-    Determine if a file is something that we can extract.
-    """
-    _, extension = os.path.splitext(path)
-    extension = extension.lower()
-    return (extension in VALID_EXTRACT_EXTENSIONS)
-
-
 def replace_ext(file_path, new_ext):
     """
     >>> replace_ext('one/two/three.four.doc', '.html')
@@ -1177,15 +1168,9 @@ def get_zip_file_handler(file_path):
     return ZipFile(file_path)
 
 
-def convert(file_path, image_handler=None, fall_back=None):
+def convert(file_path, image_handler=None, fall_back=None, converter=None):
     file_base, extension = os.path.splitext(os.path.basename(file_path))
 
-    if not is_extractable(file_path):
-        #XXX create better exception, used to be InvalidFileExtension
-        raise Exception(
-            'The file type "%s" is not supported' % extension
-        )
-
     if extension == '.html':
         with open(file_path) as f:
             html = f.read()
@@ -1198,25 +1183,34 @@ def convert(file_path, image_handler=None, fall_back=None):
         # If the file is already html, just leave it in place.
         docx_path = file_path
     else:
-        # Convert the file to docx
-        # TODO make this configurable.
-        subprocess.call(
-            ['abiword', '--to=docx', '--to-name', docx_path, file_path],
-        )
+        if converter is None:
+            def converter(file_path):
+                subprocess.call(
+                    [
+                        'abiword',
+                        '--to=docx',
+                        '--to-name',
+                        docx_path,
+                        file_path,
+                    ],
+                )
+        else:
+            raise MissingConverter(
+                'pass in a converter for filetypes that are not docx.'
+            )
+
     try:
         # Docx files are actually just zip files.
         zf = get_zip_file_handler(docx_path)
     except BadZipfile:
         # If its a malformed zip file raise InvalidFileExtension
-        # XXX
-        raise Exception('This file is not a docx')
+        raise InvalidFileExtension('This file is not a docx')
     except IOError:
         # This means that the conversion from abiword failed.
         if fall_back is not None:
             return fall_back(file_path)
         else:
-            # XXX
-            raise Exception('Conversion to docx failed.')
+            raise ConversionFailed('Conversion to docx failed.')
 
     # Need to populate the xml based on word/document.xml
     tree, meta_data = _get_document_data(zf, image_handler)

From 6ae04a3dc5546d65efcb414f5263449bd024a803 Mon Sep 17 00:00:00 2001
From: Jason Ward <jason.ward@policystat.com>
Date: Fri, 11 Jan 2013 13:45:38 -0500
Subject: [PATCH 04/10] refs #1: removed MissingConverter, created a fall back
 in stead

---
 docx2html/core.py   | 6 +-----
 docx2html/errors.py | 4 ----
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/docx2html/core.py b/docx2html/core.py
index 5168110..1664171 100644
--- a/docx2html/core.py
+++ b/docx2html/core.py
@@ -11,7 +11,6 @@
 from docx2html.errors import (
     ConversionFailed,
     InvalidFileExtension,
-    MissingConverter,
 )
 
 DETECT_FONT_SIZE = False
@@ -1194,10 +1193,7 @@ def converter(file_path):
                         file_path,
                     ],
                 )
-        else:
-            raise MissingConverter(
-                'pass in a converter for filetypes that are not docx.'
-            )
+        converter(file_path)
 
     try:
         # Docx files are actually just zip files.
diff --git a/docx2html/errors.py b/docx2html/errors.py
index e94a093..b2fd09b 100644
--- a/docx2html/errors.py
+++ b/docx2html/errors.py
@@ -8,7 +8,3 @@ class InvalidFileExtension(Docx2HtmlException):
 
 class ConversionFailed(Docx2HtmlException):
     pass
-
-
-class MissingConverter(Docx2HtmlException):
-    pass

From dbce7ae48bae3fb4c7989e97456087a2d79cbfe6 Mon Sep 17 00:00:00 2001
From: Jason Ward <jason.ward@policystat.com>
Date: Fri, 11 Jan 2013 16:01:48 -0500
Subject: [PATCH 05/10] Revert "backward compatibility for the win."

This reverts commit e1e209454e6efe35eb5552239266b963c3fc9b90.
---
 docx2html/core.py            | 1 -
 docx2html/tests/test_docx.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/docx2html/core.py b/docx2html/core.py
index 0b7bf38..1664171 100644
--- a/docx2html/core.py
+++ b/docx2html/core.py
@@ -1,4 +1,3 @@
-from __future__ import with_statement
 import cgi
 import os
 import subprocess
diff --git a/docx2html/tests/test_docx.py b/docx2html/tests/test_docx.py
index 87535c6..00f1b9b 100644
--- a/docx2html/tests/test_docx.py
+++ b/docx2html/tests/test_docx.py
@@ -1,4 +1,3 @@
-from __future__ import with_statement
 import tempfile
 import shutil
 from os import path

From 42e832500c688c82834220650f4a18df10bf7e91 Mon Sep 17 00:00:00 2001
From: Jason Ward <jason.ward@policystat.com>
Date: Fri, 11 Jan 2013 16:02:06 -0500
Subject: [PATCH 06/10] refs #1: dropped support for python 2.5

---
 .travis.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 4a31043..e969c4f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,5 @@
 language: python
 python:
-  - "2.5"
   - "2.6"
   - "2.7"
 script: ./run_tests.sh

From cece70c70942d3622708770036910331051d3dce Mon Sep 17 00:00:00 2001
From: Jason Ward <jason.ward@policystat.com>
Date: Fri, 11 Jan 2013 17:13:50 -0500
Subject: [PATCH 07/10] refs #1: put in dummy converters in the test

---
 docx2html/tests/test_docx.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/docx2html/tests/test_docx.py b/docx2html/tests/test_docx.py
index 00f1b9b..58b29f1 100644
--- a/docx2html/tests/test_docx.py
+++ b/docx2html/tests/test_docx.py
@@ -698,10 +698,16 @@ def test_has_title():
     assert_html_equal(actual_html, '''<html><p>Text</p></html>''')
 
 
-def test_missing_converter():
+def _converter(*args, **kwargs):
+    # Having a converter that does nothing is the same as if abiword fails to
+    # convert.
+    pass
+
+
+def test_converter_broken():
     file_path = 'test.doc'
     try:
-        convert(file_path)
+        convert(file_path, converter=_converter)
     except ConversionFailed:
         pass
     else:
@@ -713,5 +719,5 @@ def test_fall_back():
 
     def fall_back(*args, **kwargs):
         return 'success'
-    html = convert(file_path, fall_back=fall_back)
+    html = convert(file_path, fall_back=fall_back, converter=_converter)
     assert html == 'success'

From cab9512a0ec67f1c825a04b3f1df8b3e586e27f3 Mon Sep 17 00:00:00 2001
From: Jason Ward <jason.ward@policystat.com>
Date: Mon, 14 Jan 2013 12:09:33 -0500
Subject: [PATCH 08/10] refs #1: name change

---
 docx2html/{errors.py => exceptions.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename docx2html/{errors.py => exceptions.py} (100%)

diff --git a/docx2html/errors.py b/docx2html/exceptions.py
similarity index 100%
rename from docx2html/errors.py
rename to docx2html/exceptions.py

From 3623e0000a20ec99bd4bbd11a7640a9151481606 Mon Sep 17 00:00:00 2001
From: Jason Ward <jason.ward@policystat.com>
Date: Mon, 14 Jan 2013 12:09:51 -0500
Subject: [PATCH 09/10] refs #1: made changes based on review notes

---
 docx2html/converters.py      | 17 +++++++++++++++++
 docx2html/core.py            | 34 ++++++++++++----------------------
 docx2html/exceptions.py      |  8 ++++++--
 docx2html/tests/test_docx.py | 13 ++++++-------
 4 files changed, 41 insertions(+), 31 deletions(-)
 create mode 100644 docx2html/converters.py

diff --git a/docx2html/converters.py b/docx2html/converters.py
new file mode 100644
index 0000000..36bc283
--- /dev/null
+++ b/docx2html/converters.py
@@ -0,0 +1,17 @@
+import subprocess
+
+
+def convert_with_abiword(docx_path, file_path):
+    """
+    This will convert ``file_path`` to docx and place the converted file at
+    ``docx_path``
+    """
+    subprocess.call(
+        [
+            'abiword',
+            '--to=docx',
+            '--to-name',
+            docx_path,
+            file_path,
+        ],
+    )
diff --git a/docx2html/core.py b/docx2html/core.py
index 1664171..1c5d8ca 100644
--- a/docx2html/core.py
+++ b/docx2html/core.py
@@ -1,6 +1,6 @@
 import cgi
 import os
-import subprocess
+import os.path
 from PIL import Image
 from lxml import etree
 from lxml.etree import XMLSyntaxError
@@ -8,9 +8,10 @@
 from collections import namedtuple, defaultdict
 from zipfile import ZipFile, BadZipfile
 
-from docx2html.errors import (
+from docx2html.exceptions import (
     ConversionFailed,
-    InvalidFileExtension,
+    FileNotDocx,
+    MalformedDocx,
 )
 
 DETECT_FONT_SIZE = False
@@ -1183,30 +1184,19 @@ def convert(file_path, image_handler=None, fall_back=None, converter=None):
         docx_path = file_path
     else:
         if converter is None:
-            def converter(file_path):
-                subprocess.call(
-                    [
-                        'abiword',
-                        '--to=docx',
-                        '--to-name',
-                        docx_path,
-                        file_path,
-                    ],
-                )
-        converter(file_path)
+            raise FileNotDocx('The file passed in is not a docx.')
+        converter(docx_path, file_path)
+        if not os.path.isfile(docx_path):
+            if fall_back is None:
+                raise ConversionFailed('Conversion to docx failed.')
+            else:
+                return fall_back(file_path)
 
     try:
         # Docx files are actually just zip files.
         zf = get_zip_file_handler(docx_path)
     except BadZipfile:
-        # If its a malformed zip file raise InvalidFileExtension
-        raise InvalidFileExtension('This file is not a docx')
-    except IOError:
-        # This means that the conversion from abiword failed.
-        if fall_back is not None:
-            return fall_back(file_path)
-        else:
-            raise ConversionFailed('Conversion to docx failed.')
+        raise MalformedDocx('This file is not a docx')
 
     # Need to populate the xml based on word/document.xml
     tree, meta_data = _get_document_data(zf, image_handler)
diff --git a/docx2html/exceptions.py b/docx2html/exceptions.py
index b2fd09b..c8e01bf 100644
--- a/docx2html/exceptions.py
+++ b/docx2html/exceptions.py
@@ -2,9 +2,13 @@ class Docx2HtmlException(Exception):
     pass
 
 
-class InvalidFileExtension(Docx2HtmlException):
+class ConversionFailed(Docx2HtmlException):
     pass
 
 
-class ConversionFailed(Docx2HtmlException):
+class FileNotDocx(Docx2HtmlException):
+    pass
+
+
+class MalformedDocx(Docx2HtmlException):
     pass
diff --git a/docx2html/tests/test_docx.py b/docx2html/tests/test_docx.py
index 58b29f1..6591ba4 100644
--- a/docx2html/tests/test_docx.py
+++ b/docx2html/tests/test_docx.py
@@ -3,6 +3,7 @@
 from os import path
 from zipfile import ZipFile
 from nose.plugins.skip import SkipTest
+from nose.tools import assert_raises
 
 from docx2html.tests import collapse_html
 from docx2html import convert
@@ -10,7 +11,7 @@
     _get_document_data,
     DETECT_FONT_SIZE,
 )
-from docx2html.errors import (
+from docx2html.exceptions import (
     ConversionFailed,
 )
 
@@ -706,12 +707,10 @@ def _converter(*args, **kwargs):
 
 def test_converter_broken():
     file_path = 'test.doc'
-    try:
-        convert(file_path, converter=_converter)
-    except ConversionFailed:
-        pass
-    else:
-        raise AssertionError('ConversionFailed was not raised')
+    assert_raises(
+        ConversionFailed,
+        lambda: convert(file_path, converter=_converter),
+    )
 
 
 def test_fall_back():

From 57a8086065a4d46539c827ad56b6b332c6f4eb4b Mon Sep 17 00:00:00 2001
From: Jason Ward <jason.ward@policystat.com>
Date: Mon, 14 Jan 2013 12:14:11 -0500
Subject: [PATCH 10/10] refs #1: added a docstring

---
 docx2html/core.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/docx2html/core.py b/docx2html/core.py
index 1c5d8ca..a09803a 100644
--- a/docx2html/core.py
+++ b/docx2html/core.py
@@ -1169,6 +1169,19 @@ def get_zip_file_handler(file_path):
 
 
 def convert(file_path, image_handler=None, fall_back=None, converter=None):
+    """
+    ``file_path`` is a path to the file on the file system that you want to be
+        converted to html.
+    ``image_handler`` is a function that takes an image_id and a
+        relationship_dict to generate the src attribute for images. (see readme
+        for more details)
+    ``fall_back`` is a function that takes a ``file_path``. This function will
+        only be called if for whatever reason the conversion fails.
+    ``converter`` is a function to convert a document that is not docx to docx
+        (examples in docx2html.converters)
+
+    Returns html extracted from ``file_path``
+    """
     file_base, extension = os.path.splitext(os.path.basename(file_path))
 
     if extension == '.html':