From 855ce764ad63e1ffd9ae8c970719f3f1b081abc3 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 13:14:20 -0400 Subject: [PATCH 1/7] refs #29: updated the tests for expected output --- pydocx/tests/__init__.py | 23 ---- pydocx/tests/test_docx.py | 272 +++++++++++--------------------------- 2 files changed, 75 insertions(+), 220 deletions(-) diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index e509c397..b636109d 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -92,29 +92,6 @@ def get_list_style(self, num_id, ilvl): def _parse_styles(self): return {} - def head(self): - return '' - - def table(self, text): - return '' + text + '
' - - def ordered_list(self, text, list_style): - list_type_conversions = { - 'decimal': 'decimal', - 'decimalZero': 'decimal-leading-zero', - 'upperRoman': 'upper-roman', - 'lowerRoman': 'lower-roman', - 'upperLetter': 'upper-alpha', - 'lowerLetter': 'lower-alpha', - 'ordinal': 'decimal', - 'cardinalText': 'decimal', - 'ordinalText': 'decimal', - } - return '
    {text}
'.format( - list_style=list_type_conversions.get(list_style, 'decimal'), - text=text, - ) - DEFAULT_NUMBERING_DICT = { '1': { diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index ab00db5d..a55df801 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -10,33 +10,19 @@ from pydocx.parsers.Docx2Html import Docx2Html -class TestDocx2HTML(Docx2Html): - def head(self): - return '' - - def table(self, text): - return '' + text + '
' - - def ordered_list(self, text, list_style): - list_type_conversions = { - 'decimal': 'decimal', - 'decimalZero': 'decimal-leading-zero', - 'upperRoman': 'upper-roman', - 'lowerRoman': 'lower-roman', - 'upperLetter': 'upper-alpha', - 'lowerLetter': 'lower-alpha', - 'ordinal': 'decimal', - 'cardinalText': 'decimal', - 'ordinalText': 'decimal', - } - return '
    {text}
'.format( - list_style=list_type_conversions.get(list_style, 'decimal'), - text=text, - ) +def convert(path): + return Docx2Html(path).parsed +STYLE = '' # noqa -def convert(path): - return TestDocx2HTML(path).parsed +BASE_HTML = ''' + + + %s + + %%s + +''' % STYLE def test_extract_html(): @@ -47,17 +33,16 @@ def test_extract_html(): 'simple.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

Simple text

-
    +
    1. one
    2. two
    3. three
    - +
    @@ -67,7 +52,6 @@ def test_extract_html():
    Cell1 Cell2Cell4
    - ''') @@ -79,17 +63,16 @@ def test_nested_list(): 'nested_lists.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -
      + assert_html_equal(actual_html, BASE_HTML % ''' +
      1. one
      2. two
      3. three -
          +
          1. AAA
          2. BBB
          3. CCC -
              +
              1. alpha
              @@ -97,9 +80,9 @@ def test_nested_list():
            1. four
            -
              +
              1. xxx -
                  +
                  1. yyy
                  @@ -111,7 +94,6 @@ def test_nested_list(): - ''') @@ -123,15 +105,13 @@ def test_simple_list(): 'simple_lists.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -
                    + assert_html_equal(actual_html, BASE_HTML % ''' +
                    1. One
                    • two
                    - ''') @@ -143,8 +123,8 @@ def test_inline_tags(): 'inline_tags.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' -

                    This sentence has some bold, some italics and some underline, as well as a hyperlink.

                    ''') # noqa + assert_html_equal(actual_html, BASE_HTML % ''' +

                    This sentence has some bold, some italics and some underline, as well as a hyperlink.

                    ''') # noqa def test_unicode(): @@ -167,8 +147,8 @@ def test_special_chars(): 'special_chars.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' -

                    & < > link

                    ''') # noqa + assert_html_equal(actual_html, BASE_HTML % ''' +

                    & < > link

                    ''') # noqa def test_table_col_row_span(): @@ -179,9 +159,8 @@ def test_table_col_row_span(): 'table_col_row_span.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - - + assert_html_equal(actual_html, BASE_HTML % ''' +
                    @@ -204,7 +183,7 @@ def test_table_col_row_span():
                    AAA
                    - +
                    @@ -227,7 +206,6 @@ def test_table_col_row_span():
                    1 213
                    - ''') @@ -239,16 +217,15 @@ def test_nested_table_rowspan(): 'nested_table_rowspan.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - - + assert_html_equal(actual_html, BASE_HTML % ''' +
                    AAA
                    BBB - +
                    @@ -260,7 +237,6 @@ def test_nested_table_rowspan():
                    CCC DDD
                    - ''') @@ -273,9 +249,8 @@ def test_nested_tables(): ) actual_html = convert(file_path) # Find out why br tag is there. - assert_html_equal(actual_html, ''' - - + assert_html_equal(actual_html, BASE_HTML % ''' +
                    @@ -283,7 +258,7 @@ def test_nested_tables():
                    AAA BBB
                    CCC - +
                    @@ -296,7 +271,6 @@ def test_nested_tables():
                    DDD EEE
                    - ''') @@ -308,12 +282,11 @@ def test_list_in_table(): 'list_in_table.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - - + assert_html_equal(actual_html, BASE_HTML % ''' +
                    -
                      +
                      1. AAA
                      2. BBB
                      3. CCC
                      4. @@ -321,7 +294,6 @@ def test_list_in_table():
                    - ''') @@ -333,12 +305,11 @@ def test_tables_in_lists(): 'tables_in_lists.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -
                      + assert_html_equal(actual_html, BASE_HTML % ''' +
                      1. AAA
                      2. BBB - +
                        @@ -351,7 +322,6 @@ def test_tables_in_lists():
                      3. GGG
                      4. - ''') @@ -363,8 +333,8 @@ def test_track_changes_on(): 'track_changes_on.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' -

                        This was some content.

                        + assert_html_equal(actual_html, BASE_HTML % ''' +

                        This was some content.

                        ''') @@ -376,8 +346,7 @@ def test_headers(): 'headers.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

                        This is an H1

                        This is an H2

                        This is an H3

                        @@ -388,7 +357,6 @@ def test_headers():
                        This is an H8
                        This is an H9
                        This is an H10
                        - ''') @@ -415,8 +383,8 @@ def test_split_headers(): new_file_path, _ = _copy_file_to_tmp_dir(file_path, filename) actual_html = convert(new_file_path) - assert_html_equal(actual_html, ''' -

                        AAA

                        BBB

                        CCC

                        + assert_html_equal(actual_html, BASE_HTML % ''' +

                        AAA

                        BBB

                        CCC

                        ''') @@ -435,10 +403,8 @@ def test_has_image(): actual_html = convert(new_file_path) # Ignore height, width for now. - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

                        AAA

                        - ''') @@ -452,10 +418,8 @@ def test_local_dpi(): ) new_file_path, dp = _copy_file_to_tmp_dir(file_path, filename) actual_html = convert(new_file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

                        - ''') @@ -476,45 +440,11 @@ def test_has_image_using_image_handler(): def image_handler(*args, **kwargs): return 'test' actual_html = convert(new_file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

                        AAA

                        - ''') -#def test_attachment_is_tiff(): -# filename = 'attachment_is_tiff.docx' -# file_path = path.join( -# path.abspath(path.dirname(__file__)), -# '..', -# 'fixtures', -# 'attachment_is_tiff.docx', -# ) -# # preserve_images must be true in order for the image to not be removed. -# # This is handled in build_import, however here we need to manually set it -# # to True. -# new_file_path, _ = _copy_file_to_tmp_dir(file_path, filename) -# -# # First open the file and verify that the image attachment is a tiff. -# try: -# zf = ZipFile(new_file_path) -# # Get the document data. -# _, meta_data = _get_document_data(zf) -# finally: -# zf.close() -# # Find the path to the image. -# image_file = None -# for file_path in meta_data.relationship_dict.values(): -# if file_path.endswith('.gif'): -# image_file = file_path -# assert image_file is not None -# with open(image_file) as f: -# magic_number = f.read()[:4] -# # Make sure the image is actually a gif. -# assert magic_number == 'GIF8' - - def test_headers_with_full_line_styles(): raise SkipTest('This test is not yet passing') # Show that if a natural header is completely bold/italics that @@ -526,12 +456,10 @@ def test_headers_with_full_line_styles(): 'headers_with_full_line_styles.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

                        AAA

                        BBB

                        CCC

                        - ''') @@ -546,17 +474,16 @@ def test_convert_p_to_h(): 'convert_p_to_h.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

                        AAA

                        BBB

                        CCC

                        -
                          +
                          1. DDD
                          2. EEE
                          3. FFF
                          -
                        CCC DDD
                        +
                        @@ -566,43 +493,9 @@ def test_convert_p_to_h():
                        GGG HHHJJJ
                        - ''') -#def test_bigger_font_size_to_header(): -# # Show when it is appropriate to convert p tags to h tags based on font -# # size. -# if not DETECT_FONT_SIZE: -# raise SkipTest('Font size detection is disabled.') -# file_path = path.join( -# path.abspath(path.dirname(__file__)), -# '..', -# 'fixtures', -# 'bigger_font_size_to_header.docx', -# ) -# actual_html = convert(file_path) -# assert_html_equal(actual_html, ''' -# -#

                        Paragraphs:

                        -#

                        Header

                        -#

                        paragraph 1

                        -#

                        Lists:

                        -#
                          -#
                        1. bigger
                        2. -#
                        3. smaller
                        4. -#
                        -#

                        Tables:

                        -# -# -# -# -# -#
                        biggersmaller
                        -# -# ''') - - def test_fake_headings_by_length(): raise SkipTest('This test is not yet passing') # Show that converting p tags to h tags has a length limit. If the p tag is @@ -615,14 +508,12 @@ def test_fake_headings_by_length(): 'fake_headings_by_length.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

                        Heading.

                        Still a heading.

                        This is not a heading because it is too many words.

                        - ''') @@ -637,15 +528,14 @@ def test_shift_enter(): # Test just the convert without clean_html to make sure the first # break tag is present. actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

                        AAA
                        BBB

                        CCC

                        -
                          +
                          1. DDD
                            EEE
                          2. FFF
                          - +
                          @@ -655,7 +545,6 @@ def test_shift_enter():
                          GGG
                          HHH
                          III
                          JJJ
                          LLL
                          - ''') @@ -667,17 +556,16 @@ def test_lists_with_styles(): 'lists_with_styles.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -
                            + assert_html_equal(actual_html, BASE_HTML % ''' +
                            1. AAA
                            2. BBB -
                                +
                                1. CCC
                                2. DDD -
                                    +
                                    1. EEE -
                                        +
                                        1. FFF
                                        @@ -686,7 +574,6 @@ def test_lists_with_styles():
                                    - ''') @@ -701,25 +588,23 @@ def test_list_to_header(): actual_html = convert(file_path) # It should be noted that list item `GGG` is upper roman in the word # document to show that only top level upper romans get converted. - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

                                    AAA

                                    -
                                      +
                                      1. BBB

                                      CCC

                                      -
                                        +
                                        1. DDD

                                        EEE

                                        -
                                          +
                                          1. FFF -
                                              +
                                              1. GGG
                                            - ''') @@ -731,11 +616,10 @@ def test_has_title(): 'has_title.docx', ) actual_html = convert(file_path) - assert_html_equal( - actual_html, - '''

                                            Title

                                            -

                                            Text

                                            ''', - ) + assert_html_equal(actual_html, BASE_HTML % ''' +

                                            Title

                                            +

                                            Text

                                            + ''') def test_upper_alpha_all_bold(): @@ -747,12 +631,10 @@ def test_upper_alpha_all_bold(): 'upper_alpha_all_bold.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

                                            AAA

                                            BBB

                                            CCC

                                            - ''') @@ -764,14 +646,12 @@ def test_simple_table(): 'simple_table.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - - + assert_html_equal(actual_html, BASE_HTML % ''' +
                                            Cell1
                                            Cell3
                                            Cell2
                                            And I am writing in the table
                                            Cell4
                                            - ''') @@ -783,8 +663,7 @@ def test_justification(): 'justification.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

                                            Center Justified

                                            @@ -808,7 +687,6 @@ def test_justification(): Left justified and pushed in from left

                                            - ''') From bf2705bd2f23e700e7e9e7910b295f46994e6cb0 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 13:23:44 -0400 Subject: [PATCH 2/7] refs #29: updated the xml based tests for the new expected html --- pydocx/tests/__init__.py | 17 +++++- pydocx/tests/test_docx.py | 13 +--- pydocx/tests/test_xml.py | 124 ++++++++++++++------------------------ 3 files changed, 60 insertions(+), 94 deletions(-) diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index b636109d..74685f1e 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -11,6 +11,17 @@ ) from unittest import TestCase +STYLE = '' # noqa + +BASE_HTML = ''' + + + %s + + %%s + +''' % STYLE + def assert_html_equal(actual_html, expected_html): assert collapse_html( @@ -99,8 +110,8 @@ def _parse_styles(self): '1': 'decimal', }, '2': { - '0': 'none', - '1': 'none', + '0': 'lowerLetter', + '1': 'lowerLetter', }, } @@ -136,4 +147,4 @@ def test_expected_output(self): numbering_dict=self.numbering_dict, ).parsed - assert_html_equal(html, self.expected_output) + assert_html_equal(html, BASE_HTML % self.expected_output) diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index a55df801..70b2e09f 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -6,24 +6,13 @@ from nose.plugins.skip import SkipTest #from nose.tools import assert_raises -from pydocx.tests import assert_html_equal +from pydocx.tests import assert_html_equal, BASE_HTML from pydocx.parsers.Docx2Html import Docx2Html def convert(path): return Docx2Html(path).parsed -STYLE = '' # noqa - -BASE_HTML = ''' - - - %s - - %%s - -''' % STYLE - def test_extract_html(): file_path = path.join( diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index c1a5bf8a..6d1464d0 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -15,10 +15,8 @@ class BoldTestCase(_TranslationTestCase): expected_output = """ -

                                            AAA

                                            BBB

                                            - """ def get_xml(self): @@ -40,9 +38,7 @@ class HyperlinkVanillaTestCase(_TranslationTestCase): } expected_output = ''' -

                                            link.

                                            - ''' def get_xml(self): @@ -61,9 +57,7 @@ class HyperlinkWithMultipleRunsTestCase(_TranslationTestCase): } expected_output = ''' -

                                            link.

                                            - ''' def get_xml(self): @@ -81,8 +75,6 @@ class HyperlinkNoTextTestCase(_TranslationTestCase): } expected_output = ''' - - ''' def get_xml(self): @@ -99,9 +91,7 @@ class HyperlinkNotInRelsDictTestCase(_TranslationTestCase): } expected_output = ''' -

                                            link.

                                            - ''' def get_xml(self): @@ -120,9 +110,7 @@ class HyperlinkWithBreakTestCase(_TranslationTestCase): } expected_output = ''' -

                                            link

                                            - ''' def get_xml(self): @@ -141,14 +129,12 @@ class ImageTestCase(_TranslationTestCase): 'rId1': 'media/image2.jpeg', } expected_output = ''' -

                                            - ''' def get_xml(self): @@ -217,8 +203,6 @@ class ImageNotInRelsDictTestCase(_TranslationTestCase): # 'rId0': 'media/image1.jpeg', } expected_output = ''' - - ''' def get_xml(self): @@ -271,8 +255,7 @@ def get_xml(self): class TableTag(_TranslationTestCase): expected_output = ''' - - +
                                            @@ -282,7 +265,6 @@ class TableTag(_TranslationTestCase):
                                            AAA BBBDDD
                                            - ''' def get_xml(self): @@ -299,8 +281,7 @@ def get_xml(self): class NestedTableTag(_TranslationTestCase): expected_output = ''' - - +
                                            @@ -308,7 +289,7 @@ class NestedTableTag(_TranslationTestCase): ' + text + '' From cbea7a986a0eeb5d852ba4fce45c00dddc13b8d3 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 14:10:58 -0400 Subject: [PATCH 5/7] refs #29: updated tests based on merged master --- pydocx/tests/test_xml.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 6cc1ddfc..16014fdd 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -673,14 +673,12 @@ def get_xml(self): class DeleteTagInList(_TranslationTestCase): expected_output = ''' - -
                                              +
                                              1. AAA
                                                BBB
                                              2. CCC
                                              - ''' def get_xml(self): @@ -697,14 +695,12 @@ def get_xml(self): class InsertTagInList(_TranslationTestCase): expected_output = ''' - -
                                                +
                                                1. AAA
                                                  BBB
                                                2. CCC
                                                - ''' def get_xml(self): @@ -722,14 +718,12 @@ def get_xml(self): class SmartTagInList(_TranslationTestCase): expected_output = ''' - -
                                                  +
                                                  1. AAA
                                                    BBB
                                                  2. CCC
                                                  - ''' def get_xml(self): From e605958b8792a9025bb047961b57d46fb4e8d8ef Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 14:13:27 -0400 Subject: [PATCH 6/7] refs #29: updated tests based on merged master --- pydocx/tests/test_xml.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 85cfea1c..17609d1a 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -874,14 +874,12 @@ def get_xml(self): class SDTTestCase(_TranslationTestCase): expected_output = ''' - -
                                                    +
                                                    1. AAA
                                                      BBB
                                                    2. CCC
                                                    - ''' def get_xml(self): From a0de8a97933edf045e8ce9d90dd09906a09639b5 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 14:52:58 -0400 Subject: [PATCH 7/7] refs #29: namespaced all the css classes --- README.md | 12 ++++++------ pydocx/parsers/Docx2Html.py | 19 ++++++++++--------- pydocx/tests/__init__.py | 13 +++++++------ pydocx/tests/test_docx.py | 15 ++++++++------- pydocx/tests/test_xml.py | 6 +++--- 5 files changed, 34 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 91bd33e3..de86d68f 100644 --- a/README.md +++ b/README.md @@ -166,10 +166,10 @@ OR, let's say FOO is your new favorite markup language. Simply customize your ow The base parser `Docx2Html` relies on certain css class being set for certain behaviour to occur. Currently these include: -* class `insert` -> Turns the text green. -* class `delete` -> Turns the text red and draws a line through the text. -* class `center` -> Aligns the text to the center. -* class `right` -> Aligns the text to the right. -* class `left` -> Aligns the text to the left. -* class `comment` -> Turns the text blue. +* class `pydocx-insert` -> Turns the text green. +* class `pydocx-delete` -> Turns the text red and draws a line through the text. +* class `pydocx-center` -> Aligns the text to the center. +* class `pydocx-right` -> Aligns the text to the right. +* class `pydocx-left` -> Aligns the text to the left. +* class `pydocx-comment` -> Turns the text blue. * class `pydocx-underline` -> Underlines the text. diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index 4b6c6154..782be941 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -21,12 +21,13 @@ def head(self): def style(self): result = ( - '' @@ -53,7 +54,7 @@ def heading(self, text, heading_value): def insertion(self, text, author, date): return ( - "%(text)s" ) % { 'author': author, @@ -87,7 +88,7 @@ def image(self, path, x, y): def deletion(self, text, author, date): return ( - "%(text)s" ) % { 'author': author, @@ -150,7 +151,7 @@ def page_break(self): def indent(self, text, just='', firstLine='', left='', right=''): slug = '.insert {color:green;}' - '.delete {color:red;text-decoration:line-through;}' - '.center {text-align:center;}' - '.right {text-align:right;}' - '.left {text-align:left;}' - '.comment {color:blue;}' + '' diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index e3207ec8..72b98f5b 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -166,13 +166,13 @@ def test_table_col_row_span():
                                            AAA BBB
                                            CCC - +
                                            @@ -321,7 +302,6 @@ class NestedTableTag(_TranslationTestCase):
                                            DDD EEE
                                            - ''' def get_xml(self): @@ -344,8 +324,7 @@ def get_xml(self): class TableWithInvalidTag(_TranslationTestCase): expected_output = ''' - - +
                                            @@ -355,7 +334,6 @@ class TableWithInvalidTag(_TranslationTestCase):
                                            AAA BBBDDD
                                            - ''' def get_xml(self): @@ -374,11 +352,10 @@ def get_xml(self): class TableWithListAndParagraph(_TranslationTestCase): expected_output = ''' - - +
                                            -
                                              +
                                              1. AAA
                                              2. BBB
                                              @@ -387,7 +364,6 @@ class TableWithListAndParagraph(_TranslationTestCase):
                                            - ''' def get_xml(self): @@ -416,13 +392,11 @@ def get_xml(self): class SimpleListTestCase(_TranslationTestCase): expected_output = ''' - -
                                              +
                                              1. AAA
                                              2. BBB
                                              3. CCC
                                              - ''' # Ensure its not failing somewhere and falling back to decimal @@ -448,11 +422,9 @@ def get_xml(self): class SingleListItemTestCase(_TranslationTestCase): expected_output = ''' - -
                                                +
                                                1. AAA
                                                - ''' # Ensure its not failing somewhere and falling back to decimal @@ -476,11 +448,10 @@ def get_xml(self): class ListWithContinuationTestCase(_TranslationTestCase): expected_output = ''' - -
                                                  +
                                                  1. AAA
                                                    BBB
                                                  2. CCC - +
                                                    @@ -493,7 +464,6 @@ class ListWithContinuationTestCase(_TranslationTestCase):
                                                  3. HHH
                                                  4. - ''' def get_xml(self): @@ -520,15 +490,14 @@ def get_xml(self): class ListWithMultipleContinuationTestCase(_TranslationTestCase): expected_output = ''' - -
                                                      +
                                                      1. AAA -
                                                    DDD EEE
                                                    +
                                                    BBB
                                                    - +
                                                    @@ -536,7 +505,6 @@ class ListWithMultipleContinuationTestCase(_TranslationTestCase):
                                                  5. DDD
                                                  6. - ''' def get_xml(self): @@ -562,18 +530,16 @@ def get_xml(self): class MangledIlvlTestCase(_TranslationTestCase): expected_output = ''' - -
                                                      +
                                                      1. AAA
                                                      -
                                                        +
                                                        1. BBB -
                                                            +
                                                            1. CCC
                                                          - ''' def get_xml(self): @@ -592,17 +558,15 @@ def get_xml(self): class SeperateListsTestCase(_TranslationTestCase): expected_output = ''' - -
                                                            +
                                                            1. AAA
                                                            -
                                                              +
                                                              1. BBB
                                                              -
                                                                +
                                                                1. CCC
                                                                - ''' def get_xml(self): @@ -624,19 +588,17 @@ def get_xml(self): class InvalidIlvlOrderTestCase(_TranslationTestCase): expected_output = ''' - -
                                                                  +
                                                                  1. AAA -
                                                                      +
                                                                      1. BBB -
                                                                          +
                                                                          1. CCC
                                                                      - ''' def get_xml(self): @@ -686,10 +648,8 @@ def test_performance(self): class NonStandardTextTagsTestCase(_TranslationTestCase): expected_output = ''' -

                                                                      insert smarttag

                                                                      - ''' def get_xml(self): @@ -705,7 +665,7 @@ def get_xml(self): class RTagWithNoText(_TranslationTestCase): - expected_output = '' + expected_output = '' def get_xml(self): p_tag = DXB.p_tag(None) # No text @@ -720,12 +680,10 @@ def get_xml(self): class SingleListItem(_TranslationTestCase): expected_output = ''' - -
                                                                        +
                                                                        1. AAA

                                                                        BBB

                                                                        - ''' numbering_dict = { @@ -748,11 +706,23 @@ def get_xml(self): class SimpleTableTest(_TranslationTestCase): expected_output = ''' - -
                                                    CCC
                                                    - - -
                                                    BlankColumn 1Column 2
                                                    Row 1FirstSecond
                                                    Row 2ThirdFourth
                                                    ''' + + + + + + + + + + + + + + + + +
                                                    BlankColumn 1Column 2
                                                    Row 1FirstSecond
                                                    Row 2ThirdFourth
                                                    ''' def get_xml(self): table = DXB.table(num_rows=3, num_columns=3, text=chain( @@ -773,14 +743,12 @@ def get_xml(self): class MissingIlvl(_TranslationTestCase): expected_output = ''' - -
                                                      +
                                                      1. AAA
                                                        BBB
                                                      2. CCC
                                                      - ''' def get_xml(self): @@ -800,13 +768,12 @@ def get_xml(self): class SameNumIdInTable(_TranslationTestCase): expected_output = ''' - -
                                                        +
                                                        1. AAA - +
                                                          @@ -815,7 +782,6 @@ class SameNumIdInTable(_TranslationTestCase):
                                                        2. CCC
                                                        3. - ''' # Ensure its not failing somewhere and falling back to decimal numbering_dict = { From 061d8d18f2c56dfd359bf2929e9e692353aae1d0 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 13:26:01 -0400 Subject: [PATCH 3/7] refs #29: updated white space --- pydocx/tests/test_xml.py | 151 +++++++++++++++++++-------------------- 1 file changed, 72 insertions(+), 79 deletions(-) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 6d1464d0..145f0d39 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -15,8 +15,8 @@ class BoldTestCase(_TranslationTestCase): expected_output = """ -

                                                          AAA

                                                          -

                                                          BBB

                                                          +

                                                          AAA

                                                          +

                                                          BBB

                                                          """ def get_xml(self): @@ -74,8 +74,7 @@ class HyperlinkNoTextTestCase(_TranslationTestCase): 'rId0': 'www.google.com', } - expected_output = ''' - ''' + expected_output = '' def get_xml(self): run_tags = [] @@ -90,9 +89,7 @@ class HyperlinkNotInRelsDictTestCase(_TranslationTestCase): # 'rId0': 'www.google.com', missing } - expected_output = ''' -

                                                          link.

                                                          - ''' + expected_output = '

                                                          link.

                                                          ' def get_xml(self): run_tags = [] @@ -109,9 +106,7 @@ class HyperlinkWithBreakTestCase(_TranslationTestCase): 'rId0': 'www.google.com', } - expected_output = ''' -

                                                          link

                                                          - ''' + expected_output = '

                                                          link

                                                          ' def get_xml(self): run_tags = [] @@ -129,12 +124,12 @@ class ImageTestCase(_TranslationTestCase): 'rId1': 'media/image2.jpeg', } expected_output = ''' -

                                                          - -

                                                          -

                                                          - -

                                                          +

                                                          + +

                                                          +

                                                          + +

                                                          ''' def get_xml(self): @@ -202,8 +197,7 @@ class ImageNotInRelsDictTestCase(_TranslationTestCase): relationship_dict = { # 'rId0': 'media/image1.jpeg', } - expected_output = ''' - ''' + expected_output = '' def get_xml(self): drawing = DXB.drawing(height=20, width=40, r_id='rId0') @@ -255,16 +249,16 @@ def get_xml(self): class TableTag(_TranslationTestCase): expected_output = ''' -
                                                          -
                                                            +
                                                            1. BBB
                                                          - - - - - - - - -
                                                          AAABBB
                                                          CCCDDD
                                                          + + + + + + + + + +
                                                          AAABBB
                                                          CCCDDD
                                                          ''' def get_xml(self): @@ -324,16 +318,16 @@ def get_xml(self): class TableWithInvalidTag(_TranslationTestCase): expected_output = ''' - - - - - - - - - -
                                                          AAABBB
                                                          DDD
                                                          + + + + + + + + + +
                                                          AAABBB
                                                          DDD
                                                          ''' def get_xml(self): @@ -392,11 +386,11 @@ def get_xml(self): class SimpleListTestCase(_TranslationTestCase): expected_output = ''' -
                                                            -
                                                          1. AAA
                                                          2. -
                                                          3. BBB
                                                          4. -
                                                          5. CCC
                                                          6. -
                                                          +
                                                            +
                                                          1. AAA
                                                          2. +
                                                          3. BBB
                                                          4. +
                                                          5. CCC
                                                          6. +
                                                          ''' # Ensure its not failing somewhere and falling back to decimal @@ -422,9 +416,9 @@ def get_xml(self): class SingleListItemTestCase(_TranslationTestCase): expected_output = ''' -
                                                            -
                                                          1. AAA
                                                          2. -
                                                          +
                                                            +
                                                          1. AAA
                                                          2. +
                                                          ''' # Ensure its not failing somewhere and falling back to decimal @@ -448,22 +442,22 @@ def get_xml(self): class ListWithContinuationTestCase(_TranslationTestCase): expected_output = ''' -
                                                            -
                                                          1. AAA
                                                            BBB
                                                          2. -
                                                          3. CCC - - - - - - - - - -
                                                            DDDEEE
                                                            FFFGGG
                                                            -
                                                          4. -
                                                          5. HHH
                                                          6. -
                                                          +
                                                            +
                                                          1. AAA
                                                            BBB
                                                          2. +
                                                          3. CCC + + + + + + + + + +
                                                            DDDEEE
                                                            FFFGGG
                                                            +
                                                          4. +
                                                          5. HHH
                                                          6. +
                                                          ''' def get_xml(self): @@ -490,21 +484,21 @@ def get_xml(self): class ListWithMultipleContinuationTestCase(_TranslationTestCase): expected_output = ''' -
                                                            -
                                                          1. AAA - - - - -
                                                            BBB
                                                            - - - - -
                                                            CCC
                                                            -
                                                          2. -
                                                          3. DDD
                                                          4. -
                                                          +
                                                            +
                                                          1. AAA + + + + +
                                                            BBB
                                                            + + + + +
                                                            CCC
                                                            +
                                                          2. +
                                                          3. DDD
                                                          4. +
                                                          ''' def get_xml(self): @@ -616,8 +610,7 @@ def get_xml(self): class DeeplyNestedTableTestCase(_TranslationTestCase): - expected_output = ''' - ''' + expected_output = '' run_expected_output = False def get_xml(self): From 2aa5922d04f45c1ce93277b3506593dedb674326 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 13:27:12 -0400 Subject: [PATCH 4/7] refs #29: updated the parser for valid values --- pydocx/parsers/Docx2Html.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index f97e39be..0a2249a6 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -24,12 +24,7 @@ def head(self): } def style(self): - return textwrap.dedent('''''') % { + return textwrap.dedent('') % { # noqa 'width': (self.page_width * (4 / 3)), } #multiple by (4/3) to get to px @@ -99,8 +94,9 @@ def list_element(self, text): } def ordered_list(self, text, list_style): - return "
                                                            %(text)s
                                                          " % { + return '
                                                            %(text)s
                                                          ' % { 'text': text, + 'list_style': list_style, } def unordered_list(self, text): @@ -123,7 +119,7 @@ def tab(self): return '    ' def table(self, text): - return '' + text + '
                                                          ' + return '' + text + '
                                                          ' def table_row(self, text): return '
                                            -
                                            EEE +
                                            EEE
                                            FFF
                                            -
                                            GGG +
                                            GGG
                                            @@ -611,7 +611,7 @@ def test_has_title(): actual_html = convert(file_path) assert_html_equal(actual_html, BASE_HTML % '''

                                            Title

                                            -

                                            Text

                                            +

                                            Text

                                            ''') @@ -667,18 +667,19 @@ def test_justification(): actual_html = convert(file_path) assert_html_equal(actual_html, BASE_HTML % '''

                                            -

                                            Center Justified
                                            +
                                            Center Justified

                                            -

                                            Right justified
                                            +
                                            Right justified

                                            -

                                            +
                                            Right justified and pushed in from right

                                            -

                                            +
                                            Center justified and pushed in from left and it is great and it is the coolest thing of all time and I like it and I think it is cool diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 7538ca1b..4e5cf1a0 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -641,7 +641,7 @@ def test_performance(self): class NonStandardTextTagsTestCase(_TranslationTestCase): expected_output = ''' -

                                            insert +

                                            insert smarttag

                                            ''' @@ -675,7 +675,7 @@ class DeleteTagInList(_TranslationTestCase): expected_output = '''
                                            1. AAA
                                              - BBB + BBB
                                            2. CCC
                                            @@ -697,7 +697,7 @@ class InsertTagInList(_TranslationTestCase): expected_output = '''
                                            1. AAA
                                              - BBB + BBB
                                            2. CCC