Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 26 additions & 14 deletions src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
using System.Collections.Generic;
using System.Collections.Generic;
using System.Linq;
using SIL.Extensions;
using SIL.Scripture;

namespace SIL.Machine.Corpora
Expand Down Expand Up @@ -67,31 +66,36 @@ public override void Verse(
string pubNumber
)
{
if (state.VerseRef.Equals(_curVerseRef) && !DuplicateVerse)
if (state.ChapterHasVerseZero && state.VerseRef.VerseNum == 0)
{
// Fall through for the special case of verse 0 being specified in the USFM
}
else if (state.VerseRef.Equals(_curVerseRef) && !DuplicateVerse)
{
if (state.VerseRef.VerseNum > 0)
{
EndVerseText(state, CreateVerseRefs());
// ignore duplicate verses
DuplicateVerse = true;
}

return;
}
else if (VerseRef.AreOverlappingVersesRanges(verse1: number, verse2: _curVerseRef.Verse))
{
// merge overlapping verse ranges in to one range
VerseRef verseRef = _curVerseRef.Clone();
verseRef.Verse = CorporaUtils.MergeVerseRanges(number, _curVerseRef.Verse);
UpdateVerseRef(verseRef, marker);
return;
}

if (CurrentTextType == ScriptureTextType.NonVerse)
EndNonVerseText(state);
else
{
if (CurrentTextType == ScriptureTextType.NonVerse)
EndNonVerseText(state);
else
EndVerseText(state);
UpdateVerseRef(state.VerseRef, marker);
StartVerseText(state);
}
EndVerseText(state);
UpdateVerseRef(state.VerseRef, marker);
StartVerseText(state);
}

public override void StartPara(
Expand Down Expand Up @@ -259,9 +263,9 @@ private void StartVerseText(UsfmParserState state)

private void EndVerseText(UsfmParserState state)
{
if (!DuplicateVerse && _curVerseRef.VerseNum > 0)
if (!DuplicateVerse && (_curVerseRef.VerseNum > 0 || state.ChapterHasVerseZero))
EndVerseText(state, CreateVerseRefs());
if (_curVerseRef.VerseNum > 0)
if (_curVerseRef.VerseNum > 0 || state.ChapterHasVerseZero)
_curTextType.Pop();
}

Expand All @@ -280,7 +284,14 @@ private void EndNonVerseText(UsfmParserState state)

private void UpdateVerseRef(VerseRef verseRef, string marker)
{
if (!VerseRef.AreOverlappingVersesRanges(verseRef, _curVerseRef))
if (_curVerseRef.VerseNum == 0 && verseRef.VerseNum == 0 && marker == "v")
{
// As the verse 0 marker appears within the middle of verse 0,
// we should not break the position of current element stack by clearing it.
// Instead, we just need to pop the current element off the stack.
_curElements.Pop();
}
else if (!VerseRef.AreOverlappingVersesRanges(verseRef, _curVerseRef))
{
_curElements.Clear();
_curElements.Push(new ScriptureElement(0, marker));
Expand Down Expand Up @@ -357,6 +368,7 @@ private void CheckConvertVerseParaToNonVerse(UsfmParserState state)
&& paraTag.Marker != "tr"
&& state.IsVersePara
&& _curVerseRef.VerseNum == 0
&& !state.ChapterHasVerseZero
&& !IsPrivateUseMarker(paraTag.Marker)
)
{
Expand Down
10 changes: 8 additions & 2 deletions src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using System;
using System;
using System.Collections.Generic;
using System.Linq;
using SIL.Scripture;
Expand Down Expand Up @@ -107,7 +107,7 @@ public UpdateUsfmParserHandler(
preserveParagraphStyles == null
? new HashSet<string> { "r", "rem" }
: new HashSet<string>(preserveParagraphStyles);
_remarks = remarks == null ? new List<string>() : remarks.ToList();
_remarks = remarks?.ToList() ?? new List<string>();
_errorHandler = errorHandler;
if (_errorHandler == null)
_errorHandler = (error) => false;
Expand Down Expand Up @@ -457,6 +457,12 @@ IReadOnlyList<ScriptureRef> segScrRefs
var rowTexts = new List<string>();
Dictionary<string, object> rowMetadata = null;
int sourceIndex = 0;

// handle the special case of verse 0, which although first in the rows,
// it will be retrieved some of other segments in the verse.
if (segScrRefs.Count > 0 && segScrRefs[0].VerseNum == 0 && segScrRefs[0].Path.Count == 0)
_verseRowIndex = 0;

// search the sorted rows with updated text, starting from where we left off last.
while (_verseRowIndex < _verseRows.Count && sourceIndex < segScrRefs.Count)
{
Expand Down
3 changes: 3 additions & 0 deletions src/SIL.Machine/Corpora/UsfmParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ public bool ProcessToken()
vref = State.VerseRef;
vref.Chapter = token.Data;
vref.VerseNum = 0;
State.ChapterHasVerseZero = false;
State.VerseRef = vref;
// Verse offset is not zeroed for chapter 1, as it is part of intro
if (State.VerseRef.ChapterNum != 1)
Expand Down Expand Up @@ -391,6 +392,8 @@ public bool ProcessToken()
// Verse
vref = State.VerseRef;
vref.Verse = token.Data;
if (vref.VerseNum == 0)
State.ChapterHasVerseZero = true;
State.VerseRef = vref;
State.VerseOffset = 0;

Expand Down
14 changes: 8 additions & 6 deletions src/SIL.Machine/Corpora/UsfmParserState.cs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ public UsfmParserState(UsfmStylesheet stylesheet, ScrVers versification, IReadOn
/// </summary>
public int SpecialTokenCount { get; internal set; }

/// <summary>
/// <c>true</c> if a chapter has verse 0 specified.
/// </summary>
public bool ChapterHasVerseZero { get; internal set; }

/// <summary>
/// True if the token processed is a figure.
/// </summary>
Expand Down Expand Up @@ -104,10 +109,7 @@ public UsfmTag ParaTag
/// <summary>
/// Innermost character tag or null for none
/// </summary>
public UsfmTag CharTag
{
get { return CharTags.FirstOrDefault(); }
}
public UsfmTag CharTag => CharTags.FirstOrDefault();

/// <summary>
/// Current note tag or null for none
Expand Down Expand Up @@ -157,8 +159,8 @@ public bool IsVerseText
{
get
{
// Anything before verse 1 is not verse text
if (VerseRef.VerseNum == 0)
// Anything before verse 1 is not verse text, unless the USFM specified verse 0
if (VerseRef.VerseNum == 0 && !ChapterHasVerseZero)
return false;

// Sidebars and notes are not verse text
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,63 @@ public void UpdateUsfm_StripParagraphsWithHeaders()
AssertUsfmEquals(target, result);
}

[Test]
public void UpdateUsfm_SupportVerseZero()
{
// Note: Verse 0 has an empty paragraph as the paragraph occurs before verse text,
// so is not included in the verse text as it is for the paragraphs for the other verses.
IReadOnlyList<UpdateUsfmRow> rows =
[
new UpdateUsfmRow(ScrRef("MAT 1:0"), "New verse 0"),
new UpdateUsfmRow(ScrRef("MAT 1:0/1:mt"), "New book header"),
new UpdateUsfmRow(ScrRef("MAT 1:0/2:s"), "New chapter header"),
new UpdateUsfmRow(ScrRef("MAT 1:0/3:p"), ""),
new UpdateUsfmRow(ScrRef("MAT 1:0/4:ms"), "New major section header"),
new UpdateUsfmRow(ScrRef("MAT 1:0/5:s"), "New section header 1"),
new UpdateUsfmRow(ScrRef("MAT 1:1"), "New verse 1"),
new UpdateUsfmRow(ScrRef("MAT 1:1/1:s"), "New section header 2"),
new UpdateUsfmRow(ScrRef("MAT 1:2"), "New verse 2"),
new UpdateUsfmRow(ScrRef("MAT 1:3"), "New verse 3"),
];
string usfm =
@"\id MAT
\mt Old book header
\c 1
\s Old chapter header
\p
\v 0 Old verse 0
\ms Old major section header
\s Old section header 1
\p
\v 1 Old verse 1
\s Old section header 2
\p
\v 2 Old verse 2
\v 3 Old verse 3
";

string target = UpdateUsfm(rows, usfm, usfmUpdateBlockHandlers: [new PlaceMarkersUsfmUpdateBlockHandler()]);

string result =
@"\id MAT
\mt New book header
\c 1
\s New chapter header
\p
\v 0 New verse 0
\ms New major section header
\s New section header 1
\p
\v 1 New verse 1
\s New section header 2
\p
\v 2 New verse 2
\v 3 New verse 3
";

AssertUsfmEquals(target, result);
}

private static ScriptureRef[] ScrRef(params string[] refs)
{
return refs.Select(r => ScriptureRef.Parse(r)).ToArray();
Expand Down
45 changes: 43 additions & 2 deletions tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -294,14 +294,55 @@ public void GetRows_VerseZero()

Assert.Multiple(() =>
{
Assert.That(rows, Has.Length.EqualTo(1));
Assert.That(rows, Has.Length.EqualTo(2));

Assert.That(
rows[0].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:0")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(rows[0].Text, Is.Empty, string.Join(",", rows.ToList().Select(tr => tr.Text)));

Assert.That(
rows[1].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:1")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(rows[1].Text, Is.EqualTo("Verse one."), string.Join(",", rows.ToList().Select(tr => tr.Text)));
});
}

[Test]
public void GetRows_VerseZeroWithText()
{
TextRow[] rows = GetRows(
@"\id MAT - Test
\h
\mt
\c 1
\p \v 0 Verse zero.
\s
\p \v 1 Verse one.
"
);

Assert.Multiple(() =>
{
Assert.That(rows, Has.Length.EqualTo(2));

Assert.That(
rows[0].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:0")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(rows[0].Text, Is.EqualTo("Verse zero."), string.Join(",", rows.ToList().Select(tr => tr.Text)));

Assert.That(
rows[1].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:1")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(rows[0].Text, Is.EqualTo("Verse one."), string.Join(",", rows.ToList().Select(tr => tr.Text)));
Assert.That(rows[1].Text, Is.EqualTo("Verse one."), string.Join(",", rows.ToList().Select(tr => tr.Text)));
});
}

Expand Down
Loading