Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ public string UpdateUsfm(
UpdateUsfmMarkerBehavior paragraphBehavior = UpdateUsfmMarkerBehavior.Preserve,
UpdateUsfmMarkerBehavior embedBehavior = UpdateUsfmMarkerBehavior.Preserve,
UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip,
IReadOnlyCollection<string> preserveParagraphStyles = null,
IEnumerable<string> preserveParagraphStyles = null,
IEnumerable<IUsfmUpdateBlockHandler> updateBlockHandlers = null,
IEnumerable<string> remarks = null
)
{
Expand All @@ -49,6 +50,7 @@ public string UpdateUsfm(
embedBehavior,
styleBehavior,
preserveParagraphStyles,
updateBlockHandlers,
remarks
);
try
Expand Down
157 changes: 43 additions & 114 deletions src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ public enum ScriptureTextType
None,
NonVerse,
Verse,
NoteText
Embed
}

public abstract class ScriptureRefUsfmParserHandlerBase : UsfmParserHandlerBase
Expand All @@ -19,9 +19,6 @@ public abstract class ScriptureRefUsfmParserHandlerBase : UsfmParserHandlerBase
private readonly Stack<ScriptureElement> _curElements;
private readonly Stack<ScriptureTextType> _curTextType;
private bool _duplicateVerse = false;
private bool _inEmbed;
protected bool InNoteText { get; private set; }
private bool _inNestedEmbed;

protected ScriptureRefUsfmParserHandlerBase()
{
Expand All @@ -32,8 +29,12 @@ protected ScriptureRefUsfmParserHandlerBase()
protected ScriptureTextType CurrentTextType =>
_curTextType.Count == 0 ? ScriptureTextType.None : _curTextType.Peek();

private static readonly string[] EmbedStyles = new[] { "f", "fe", "fig", "fm", "x" };
private static readonly char[] EmbedPartStartCharStyles = new[] { 'f', 'x', 'z' };
private static readonly string[] EmbedStyles = new[] { "f", "fe", "x", "fig" };

private static bool IsEmbedStyle(string marker)
{
return marker != null && (EmbedStyles.Contains(marker.Trim('*')) || marker.StartsWith("z"));
}

public override void EndUsfm(UsfmParserState state)
{
Expand Down Expand Up @@ -158,36 +159,6 @@ public override void EndSidebar(UsfmParserState state, string marker, bool close
EndParentElement();
}

public override void StartNote(UsfmParserState state, string marker, string caller, string category)
{
_inEmbed = true;
StartEmbed(state, marker);
}

public override void EndNote(UsfmParserState state, string marker, bool closed)
{
EndNoteTextWrapper(state);
EndEmbed(state, marker, null, closed);
_inEmbed = false;
}

protected void StartEmbed(UsfmParserState state, string marker)
{
if (_curVerseRef.IsDefault)
UpdateVerseRef(state.VerseRef, marker);

if (!_duplicateVerse)
{
// if we hit a note in a verse paragraph and we aren't in a verse, then start a non-verse segment
CheckConvertVerseParaToNonVerse(state);
NextElement(marker);
}

StartEmbed(state, CreateNonVerseRef());
}

protected virtual void StartEmbed(UsfmParserState state, ScriptureRef scriptureRef) { }

protected virtual void EndEmbed(
UsfmParserState state,
string marker,
Expand All @@ -214,23 +185,11 @@ public override void StartChar(
IReadOnlyList<UsfmAttribute> attributes
)
{
if (IsEmbedPartStyle(markerWithoutPlus) & InNoteText)
_inNestedEmbed = true;

// if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse
// segment
CheckConvertVerseParaToNonVerse(state);

if (IsEmbedStyle(markerWithoutPlus))
{
_inEmbed = true;
StartEmbed(state, markerWithoutPlus);
}

if (IsNoteText(markerWithoutPlus))
{
StartNoteTextWrapper(state);
}
StartEmbedText(state, markerWithoutPlus);
}

public override void EndChar(
Expand All @@ -240,22 +199,18 @@ public override void EndChar(
bool closed
)
{
if (IsEmbedPartStyle(marker))
{
if (_inNestedEmbed)
{
_inNestedEmbed = false;
}
else
{
EndNoteTextWrapper(state);
}
}
if (IsEmbedStyle(marker))
{
EndEmbed(state, marker, attributes, closed);
_inEmbed = false;
}
EndEmbedText(state);
}

public override void StartNote(UsfmParserState state, string marker, string caller, string category)
{
StartEmbedText(state, marker);
}

public override void EndNote(UsfmParserState state, string marker, bool closed)
{
EndEmbedText(state);
}

protected virtual void StartVerseText(UsfmParserState state, IReadOnlyList<ScriptureRef> scriptureRefs) { }
Expand All @@ -266,26 +221,9 @@ protected virtual void StartNonVerseText(UsfmParserState state, ScriptureRef scr

protected virtual void EndNonVerseText(UsfmParserState state, ScriptureRef scriptureRef) { }

protected virtual void StartNoteTextWrapper(UsfmParserState state)
{
InNoteText = true;
_curTextType.Push(ScriptureTextType.NoteText);
StartNoteText(state);
}

protected virtual void StartNoteText(UsfmParserState state) { }

protected virtual void EndNoteTextWrapper(UsfmParserState state)
{
if (_curTextType.Count > 0 && _curTextType.Peek() == ScriptureTextType.NoteText)
{
EndNoteText(state, CreateNonVerseRef());
_curTextType.Pop();
InNoteText = false;
}
}
protected virtual void StartEmbedText(UsfmParserState state, ScriptureRef scriptureRef) { }

protected virtual void EndNoteText(UsfmParserState state, ScriptureRef scriptureRef) { }
protected virtual void EndEmbedText(UsfmParserState state, ScriptureRef scriptureRef) { }

private void StartVerseText(UsfmParserState state)
{
Expand Down Expand Up @@ -325,6 +263,28 @@ private void UpdateVerseRef(VerseRef verseRef, string marker)
_curVerseRef = verseRef;
}

private void StartEmbedText(UsfmParserState state, string marker)
{
if (_curVerseRef.IsDefault)
UpdateVerseRef(state.VerseRef, marker);
if (!_duplicateVerse)
{
CheckConvertVerseParaToNonVerse(state);
NextElement(marker);
_curTextType.Push(ScriptureTextType.Embed);
StartEmbedText(state, CreateNonVerseRef());
}
}

private void EndEmbedText(UsfmParserState state)
{
if (!_duplicateVerse && _curTextType.Count > 0 && _curTextType.Peek() == ScriptureTextType.Embed)
{
EndEmbedText(state, CreateNonVerseRef());
_curTextType.Pop();
}
}

private void NextElement(string marker)
{
ScriptureElement prevElem = _curElements.Pop();
Expand Down Expand Up @@ -378,36 +338,5 @@ private void CheckConvertVerseParaToNonVerse(UsfmParserState state)
StartNonVerseText(state);
}
}

protected bool IsInEmbed(string marker)
{
return _inEmbed || IsEmbedStyle(marker);
}

protected bool IsInNestedEmbed(string marker)
{
return _inNestedEmbed
|| (
!(marker is null)
&& marker.StartsWith("+")
&& marker.Length > 1
&& IsEmbedPartStyle(marker.Substring(1))
);
}

protected static bool IsNoteText(string marker)
{
return marker == "ft";
}

protected static bool IsEmbedPartStyle(string marker)
{
return !(marker is null) && marker.Length > 0 && marker[0].IsOneOf(EmbedPartStartCharStyles);
}

protected static bool IsEmbedStyle(string marker)
{
return !(marker is null) && marker.Trim('*').IsOneOf(EmbedStyles);
}
}
}
Loading
Loading