From be9804388b64d65430e2cdf6e5c5b91c3869db38 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 27 May 2025 16:12:01 -0400 Subject: [PATCH 1/2] Port opt break test and change --- machine/corpora/usfm_text_base.py | 2 ++ tests/corpora/test_usfm_memory_text.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/machine/corpora/usfm_text_base.py b/machine/corpora/usfm_text_base.py index c286c001..9afc6c16 100644 --- a/machine/corpora/usfm_text_base.py +++ b/machine/corpora/usfm_text_base.py @@ -178,6 +178,8 @@ def end_note(self, state: UsfmParserState, marker: str, closed: bool) -> None: def opt_break(self, state: UsfmParserState) -> None: super().opt_break(state) + if len(self._row_texts_stack) == 0: + return if self._text._include_markers: self._row_texts_stack[-1] += "//" elif self._current_text_type != ScriptureTextType.VERSE or state.is_verse_text: diff --git a/tests/corpora/test_usfm_memory_text.py b/tests/corpora/test_usfm_memory_text.py index 37b87563..fdb94de9 100644 --- a/tests/corpora/test_usfm_memory_text.py +++ b/tests/corpora/test_usfm_memory_text.py @@ -136,6 +136,20 @@ def test_get_rows_verse_para_comment_first() -> None: assert len(rows) == 2, str.join(",", [tr.text for tr in rows]) +def test_get_rows_opt_break_outside_of_segment() -> None: + rows: List[TextRow] = get_rows( + r"""\id MAT - Test +\c 1 +// +\v 1 This is the first verse. +""", + include_all_text=True, + include_markers=True, + ) + assert rows[0].text == "This is the first verse." + assert len(rows) == 1, str.join(",", [tr.text for tr in rows]) + + def get_rows(usfm: str, include_markers: bool = False, include_all_text: bool = False) -> List[TextRow]: text = UsfmMemoryText( UsfmStylesheet("usfm.sty"), From 408641dc9c19b043b0cc1cbb5b3d9056a38d5a36 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Wed, 28 May 2025 08:43:10 -0400 Subject: [PATCH 2/2] Add paragraph marker back into test --- tests/corpora/test_usfm_memory_text.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/corpora/test_usfm_memory_text.py b/tests/corpora/test_usfm_memory_text.py index fdb94de9..367ec721 100644 --- a/tests/corpora/test_usfm_memory_text.py +++ b/tests/corpora/test_usfm_memory_text.py @@ -141,13 +141,15 @@ def test_get_rows_opt_break_outside_of_segment() -> None: r"""\id MAT - Test \c 1 // +\p \v 1 This is the first verse. """, include_all_text=True, include_markers=True, ) - assert rows[0].text == "This is the first verse." - assert len(rows) == 1, str.join(",", [tr.text for tr in rows]) + assert len(rows) == 2, str.join(",", [tr.text for tr in rows]) + assert rows[0].text == "" + assert rows[1].text == "This is the first verse." def get_rows(usfm: str, include_markers: bool = False, include_all_text: bool = False) -> List[TextRow]: