Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 29 additions & 4 deletions src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,12 @@ protected ScriptureRefUsfmParserHandlerBase()

private static bool IsEmbedStyle(string marker)
{
return marker != null && (EmbedStyles.Contains(marker.Trim('*')) || marker.StartsWith("z"));
return marker != null && EmbedStyles.Contains(marker.Trim('*'));
}

private static bool IsPrivateUseMarker(string marker)
{
return marker != null && marker.StartsWith("z");
}

public override void EndUsfm(UsfmParserState state)
Expand Down Expand Up @@ -63,9 +68,12 @@ string pubNumber
{
if (state.VerseRef.Equals(_curVerseRef) && !_duplicateVerse)
{
EndVerseText(state, CreateVerseRefs());
// ignore duplicate verses
_duplicateVerse = true;
if (state.VerseRef.VerseNum > 0)
{
EndVerseText(state, CreateVerseRefs());
// ignore duplicate verses
_duplicateVerse = true;
}
}
else if (VerseRef.AreOverlappingVersesRanges(verse1: number, verse2: _curVerseRef.Verse))
{
Expand All @@ -92,6 +100,10 @@ public override void StartPara(
IReadOnlyList<UsfmAttribute> attributes
)
{
// ignore private-use markers
if (IsPrivateUseMarker(marker))
return;

if (_curVerseRef.IsDefault)
UpdateVerseRef(state.VerseRef, marker);

Expand All @@ -104,6 +116,10 @@ IReadOnlyList<UsfmAttribute> attributes

public override void EndPara(UsfmParserState state, string marker)
{
// ignore private-use markers
if (IsPrivateUseMarker(marker))
return;

if (CurrentTextType == ScriptureTextType.NonVerse)
{
EndParentElement();
Expand Down Expand Up @@ -185,6 +201,10 @@ public override void StartChar(
IReadOnlyList<UsfmAttribute> attributes
)
{
// ignore private-use markers
if (IsPrivateUseMarker(markerWithoutPlus))
return;

// if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse
// segment
CheckConvertVerseParaToNonVerse(state);
Expand All @@ -199,6 +219,10 @@ public override void EndChar(
bool closed
)
{
// ignore private-use markers
if (IsPrivateUseMarker(marker))
return;

if (IsEmbedStyle(marker))
EndEmbedText(state);
}
Expand Down Expand Up @@ -332,6 +356,7 @@ private void CheckConvertVerseParaToNonVerse(UsfmParserState state)
&& paraTag.Marker != "tr"
&& state.IsVersePara
&& _curVerseRef.VerseNum == 0
&& !IsPrivateUseMarker(paraTag.Marker)
)
{
StartParentElement(paraTag.Marker);
Expand Down
57 changes: 57 additions & 0 deletions tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,63 @@ public void GetRows_StyleStartingNonVerseParagraphAfterEmptyParagraph()
});
}

[Test]
public void GetRows_VerseZero()
{
TextRow[] rows = GetRows(
@"\id MAT - Test
\h
\mt
\c 1
\p \v 0
\s
\p \v 1 Verse one.
"
);

Assert.Multiple(() =>
{
Assert.That(rows, Has.Length.EqualTo(1));

Assert.That(
rows[0].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:1")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(rows[0].Text, Is.EqualTo("Verse one."), string.Join(",", rows.ToList().Select(tr => tr.Text)));
});
}

[Test]
public void GetRows_PrivateUseMarker()
{
TextRow[] rows = GetRows(
@"\id FRT - Test English Apocrypha
\zmt Ignore this paragraph
\mt1 Test English Apocrypha
\pc Copyright Statement \zimagecopyrights
\pc Further copyright statements
",
includeAllText: true
);

Assert.Multiple(() =>
{
Assert.That(rows, Has.Length.EqualTo(3));

Assert.That(
rows[1].Ref,
Is.EqualTo(ScriptureRef.Parse("FRT 1:0/2:pc")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(
rows[1].Text,
Is.EqualTo("Copyright Statement"),
string.Join(",", rows.ToList().Select(tr => tr.Text))
);
});
}

private static TextRow[] GetRows(string usfm, bool includeMarkers = false, bool includeAllText = false)
{
UsfmMemoryText text =
Expand Down
Loading