From 4a9f7659b5ddb5d5a276238dbcccdb03a5c04acd Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Thu, 9 Oct 2025 09:56:58 -0400 Subject: [PATCH 01/17] Add detector classes and manual test --- ...extProjectVersificationMismatchDetector.cs | 58 +++++++++ .../UsfmVersificationMismatchDetector.cs | 122 ++++++++++++++++++ ...extProjectVersificationMismatchDetector.cs | 29 +++++ ...extProjectVersificationMismatchDetector.cs | 23 ++++ .../Corpora/UsfmManualTests.cs | 15 +++ 5 files changed, 247 insertions(+) create mode 100644 src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetector.cs create mode 100644 src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs create mode 100644 src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs create mode 100644 tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs diff --git a/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetector.cs new file mode 100644 index 00000000..5e6c2b88 --- /dev/null +++ b/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetector.cs @@ -0,0 +1,58 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace SIL.Machine.Corpora +{ + public abstract class ParatextProjectVersificationMismatchDetector + { + private readonly ParatextProjectSettings _settings; + + protected ParatextProjectVersificationMismatchDetector(ParatextProjectSettings settings) + { + _settings = settings; + } + + protected ParatextProjectVersificationMismatchDetector(ParatextProjectSettingsParserBase settingsParser) + { + _settings = settingsParser.Parse(); + } + + public IReadOnlyList GetUsfmVersificationMismatches( + UsfmVersificationMismatchDetector handler = null + ) + { + handler = handler ?? new UsfmVersificationMismatchDetector(_settings.Versification); + foreach (string fileName in _settings.GetAllScriptureBookFileNames()) + { + if (!Exists(fileName)) + continue; + + string usfm; + using (var reader = new StreamReader(Open(fileName))) + { + usfm = reader.ReadToEnd(); + } + + try + { + UsfmParser.Parse(usfm, handler, _settings.Stylesheet, _settings.Versification); + } + catch (Exception ex) + { + var sb = new StringBuilder(); + sb.Append($"An error occurred while parsing the usfm for '{fileName}`"); + if (!string.IsNullOrEmpty(_settings.Name)) + sb.Append($" in project '{_settings.Name}'"); + sb.Append($". Error: '{ex.Message}'"); + throw new InvalidOperationException(sb.ToString(), ex); + } + } + return handler.Errors; + } + + protected abstract bool Exists(string fileName); + protected abstract Stream Open(string fileName); + } +} diff --git a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs new file mode 100644 index 00000000..540866be --- /dev/null +++ b/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs @@ -0,0 +1,122 @@ +using System.Collections.Generic; +using System.Linq; +using SIL.Scripture; + +namespace SIL.Machine.Corpora +{ + public enum UsfmVersificationMismatchType + { + MissingChapter, + ExtraChapter, + MissingVerse, + ExtraVerse, + MissingSegment, + ExtraSegment + } + + public class UsfmVersificationMismatch //TODO Better name + { + public int BookNum { get; set; } + public int ExpectedChapter { get; set; } + public int ExpectedVerse { get; set; } + public int ActualChapter { get; set; } + public int ActualVerse { get; set; } + + public string ExpectedSegment { get; set; } + public string ActualSegment { get; set; } + public UsfmVersificationMismatchType Type { get; set; } + + public bool IsMismatch() + { + if (ExpectedChapter != ActualChapter) //TODO set type + return true; + if (ExpectedVerse != ActualVerse) + return true; + if (ExpectedSegment != ActualSegment && ExpectedSegment != null) + return true; + return false; + } + + public string ExpectedVerseRef => new VerseRef(BookNum, ExpectedChapter, ExpectedVerse).ToString(); + public string ActualVerseRef => new VerseRef(BookNum, ActualChapter, ActualVerse).ToString(); + } + + public class UsfmVersificationMismatchDetector : UsfmParserHandlerBase + { + private readonly ScrVers _versification; + private int _currentBook; + private int _currentChapter; + private VerseRef _currentVerse; + private readonly List _errors; + + public UsfmVersificationMismatchDetector(ScrVers versification) + { + _versification = versification; + _currentBook = 0; + _currentChapter = 0; + _currentVerse = new VerseRef(); + _errors = new List(); + } + + public bool HasError => _errors.Count > 0; + public IReadOnlyList Errors => _errors; + + public override void StartBook(UsfmParserState state, string marker, string code) + { + if (_currentBook > 0 && Canon.IsCanonical(_currentBook)) + { + var versificationMismatch = new UsfmVersificationMismatch() + { + BookNum = _currentBook, + ExpectedChapter = _versification.GetLastChapter(_currentBook), + ExpectedVerse = _versification.GetLastVerse(_currentBook, _currentChapter), + ActualChapter = _currentChapter, + ActualVerse = _currentVerse.AllVerses().Last().VerseNum, + }; + if (versificationMismatch.IsMismatch()) + _errors.Add(versificationMismatch); + } + + _currentBook = state.VerseRef.BookNum; + _currentChapter = 0; + _currentVerse = new VerseRef(); + } + + public override void Verse( + UsfmParserState state, + string number, + string marker, + string altNumber, + string pubNumber + ) + { + _currentVerse = state.VerseRef; + } + + public override void Chapter( + UsfmParserState state, + string number, + string marker, + string altNumber, + string pubNumber + ) + { + if (_currentChapter != 0) + { + var versificationMismatch = new UsfmVersificationMismatch() + { + BookNum = _currentBook, + ExpectedChapter = _currentChapter, + ExpectedVerse = _versification.GetLastVerse(_currentBook, _currentChapter), + ActualChapter = _currentChapter, + ActualVerse = _currentVerse.AllVerses().Last().VerseNum, + }; + if (versificationMismatch.IsMismatch()) + _errors.Add(versificationMismatch); + } + + _currentChapter = state.VerseRef.ChapterNum; + _currentVerse = new VerseRef(); + } + } +} diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs new file mode 100644 index 00000000..aeb6f0a4 --- /dev/null +++ b/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs @@ -0,0 +1,29 @@ +using System.IO; +using System.IO.Compression; + +namespace SIL.Machine.Corpora +{ + public class ZipParatextProjectVersificationMismatchDetector : ParatextProjectVersificationMismatchDetector + { + private readonly ZipArchive _archive; + + public ZipParatextProjectVersificationMismatchDetector(ZipArchive archive) + : base(new ZipParatextProjectSettingsParser(archive)) + { + _archive = archive; + } + + protected override bool Exists(string fileName) + { + return _archive.GetEntry(fileName) != null; + } + + protected override Stream Open(string fileName) + { + ZipArchiveEntry entry = _archive.GetEntry(fileName); + if (entry == null) + return null; + return entry.Open(); + } + } +} diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs new file mode 100644 index 00000000..62ce3fb6 --- /dev/null +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs @@ -0,0 +1,23 @@ +using System.Text; + +namespace SIL.Machine.Corpora; + +public class MemoryParatextProjectVersificationMismatchDetector( + ParatextProjectSettings settings, + IDictionary files +) : ParatextProjectVersificationMismatchDetector(settings) +{ + public IDictionary Files { get; } = files; + + protected override bool Exists(string fileName) + { + return Files.ContainsKey(fileName); + } + + protected override Stream? Open(string fileName) + { + if (!Files.TryGetValue(fileName, out string? contents)) + return null; + return new MemoryStream(Encoding.UTF8.GetBytes(contents)); + } +} diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs index e2eefd4f..10800efe 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs @@ -78,4 +78,19 @@ public void AnalyzeCorporaQuoteConventions() Assert.NotNull(targetAnalysis); }); } + + [Test] + [Ignore("This is for manual testing only. Remove this tag to run the test.")] + public void ValidateUsfmVersification() + { + using ZipArchive zipArchive = ZipFile.OpenRead(CorporaTestHelpers.UsfmSourceProjectZipPath); + var quoteConventionDetector = new ZipParatextProjectVersificationMismatchDetector(zipArchive); + IReadOnlyList mismatches = quoteConventionDetector.GetUsfmVersificationMismatches(); + + Assert.That( + mismatches, + Has.Count.EqualTo(0), + JsonSerializer.Serialize(mismatches, new JsonSerializerOptions { WriteIndented = true }) + ); + } } From 8bd1add7751b1fd36d4c9380d26aa02da68e0b6f Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Fri, 10 Oct 2025 12:31:55 -0400 Subject: [PATCH 02/17] Add rest of detectable versification mismatches --- ...extProjectVersificationMismatchDetector.cs | 25 +++ .../UsfmVersificationMismatchDetector.cs | 191 ++++++++++++++---- 2 files changed, 179 insertions(+), 37 deletions(-) create mode 100644 src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs diff --git a/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs new file mode 100644 index 00000000..6e3d71ca --- /dev/null +++ b/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs @@ -0,0 +1,25 @@ +using System.IO; + +namespace SIL.Machine.Corpora +{ + public class FileParatextProjectVersificationMismatchDetector : ParatextProjectVersificationMismatchDetector + { + private readonly string _projectDir; + + public FileParatextProjectVersificationMismatchDetector(string projectDir) + : base(new FileParatextProjectSettingsParser(projectDir)) + { + _projectDir = projectDir; + } + + protected override bool Exists(string fileName) + { + return File.Exists(Path.Combine(_projectDir, fileName)); + } + + protected override Stream Open(string fileName) + { + return File.OpenRead(Path.Combine(_projectDir, fileName)); + } + } +} diff --git a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs index 540866be..504b196e 100644 --- a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs @@ -1,4 +1,5 @@ using System.Collections.Generic; +using System.ComponentModel; using System.Linq; using SIL.Scripture; @@ -7,38 +8,146 @@ namespace SIL.Machine.Corpora public enum UsfmVersificationMismatchType { MissingChapter, - ExtraChapter, MissingVerse, + MissingVerseSegment, + ExtraVerseSegment, ExtraVerse, - MissingSegment, - ExtraSegment + UnknownVersification, + InvalidVerseRange } - public class UsfmVersificationMismatch //TODO Better name + public class UsfmVersificationMismatch { - public int BookNum { get; set; } - public int ExpectedChapter { get; set; } - public int ExpectedVerse { get; set; } - public int ActualChapter { get; set; } - public int ActualVerse { get; set; } + private readonly int _bookNum; + private readonly int _expectedChapter; + private readonly int _expectedVerse; + private readonly int _actualChapter; + private readonly int _actualVerse; + private VerseRef? _verseRef = null; - public string ExpectedSegment { get; set; } - public string ActualSegment { get; set; } - public UsfmVersificationMismatchType Type { get; set; } + public UsfmVersificationMismatch( + int bookNum, + int expectedChapter, + int expectedVerse, + int actualChapter, + int actualVerse, + VerseRef? verseRef = null + ) + { + _bookNum = bookNum; + _expectedChapter = expectedChapter; + _expectedVerse = expectedVerse; + _actualChapter = actualChapter; + _actualVerse = actualVerse; + _verseRef = verseRef; + } + + public UsfmVersificationMismatchType Type { get; private set; } - public bool IsMismatch() + // Returns true if there is a mismatch + public bool CheckMismatch() { - if (ExpectedChapter != ActualChapter) //TODO set type - return true; - if (ExpectedVerse != ActualVerse) + if (_expectedChapter > _actualChapter) + { + Type = UsfmVersificationMismatchType.MissingChapter; return true; - if (ExpectedSegment != ActualSegment && ExpectedSegment != null) + } + if (_expectedVerse > _actualVerse) + { + Type = UsfmVersificationMismatchType.MissingVerse; return true; + } + // VerseRef's internal validation covers the other cases as well, + // but in order to provide a user-friendly taxonomy, the others + // have been checked separately + if (_verseRef != null) + { + if (string.IsNullOrEmpty(_verseRef.Value.Segment()) && _verseRef.Value.HasSegmentsDefined) + { + Type = UsfmVersificationMismatchType.MissingVerseSegment; + return true; + } + if (!string.IsNullOrEmpty(_verseRef.Value.Segment()) && !_verseRef.Value.HasSegmentsDefined) + { + Type = UsfmVersificationMismatchType.ExtraVerseSegment; + return true; + } + if (!_verseRef.Value.Valid) + { + Type = Map(_verseRef.Value.ValidStatus); + return true; + } + } return false; } - public string ExpectedVerseRef => new VerseRef(BookNum, ExpectedChapter, ExpectedVerse).ToString(); - public string ActualVerseRef => new VerseRef(BookNum, ActualChapter, ActualVerse).ToString(); + private static UsfmVersificationMismatchType Map(VerseRef.ValidStatusType validStatus) + { + switch (validStatus) + { + case VerseRef.ValidStatusType.UnknownVersification: + return UsfmVersificationMismatchType.UnknownVersification; + case VerseRef.ValidStatusType.OutOfRange: + return UsfmVersificationMismatchType.ExtraVerse; + case VerseRef.ValidStatusType.VerseRepeated: + case VerseRef.ValidStatusType.VerseOutOfOrder: + return UsfmVersificationMismatchType.InvalidVerseRange; + default: + throw new InvalidEnumArgumentException( + nameof(validStatus), + (int)validStatus, + typeof(VerseRef.ValidStatusType) + ); + } + } + + public string ExpectedVerseRef + { + get + { + VerseRef defaultVerseRef = new VerseRef(_bookNum, _expectedChapter, _expectedVerse); + if (Type == UsfmVersificationMismatchType.ExtraVerse) + return ""; + if ( + Type == UsfmVersificationMismatchType.MissingVerseSegment + && VerseRef.TryParse( + $"{defaultVerseRef.Book} {defaultVerseRef.Chapter}:{defaultVerseRef.Verse}a", + out VerseRef verseWithSegment + ) + ) + { + return verseWithSegment.ToString(); + } + if (Type == UsfmVersificationMismatchType.InvalidVerseRange) + { + List sortedAllUniqueVerses = _verseRef + .Value.AllVerses() + .Distinct() + .OrderBy(v => v) + .ToList(); + VerseRef firstVerse = sortedAllUniqueVerses[0]; + VerseRef lastVerse = sortedAllUniqueVerses[sortedAllUniqueVerses.Count - 1]; + if (firstVerse.Equals(lastVerse)) + { + return firstVerse.ToString(); + } + else if ( + VerseRef.TryParse( + $"{firstVerse.Book} {firstVerse.Chapter}:{firstVerse.Verse}-{lastVerse.Verse}", + out VerseRef correctedVerseRangeRef + ) + ) + { + return correctedVerseRangeRef.ToString(); + } + } + return defaultVerseRef.ToString(); + } + } + public string ActualVerseRef => + _verseRef != null + ? _verseRef.Value.ToString() + : new VerseRef(_bookNum, _actualChapter, _actualVerse).ToString(); } public class UsfmVersificationMismatchDetector : UsfmParserHandlerBase @@ -65,15 +174,14 @@ public override void StartBook(UsfmParserState state, string marker, string code { if (_currentBook > 0 && Canon.IsCanonical(_currentBook)) { - var versificationMismatch = new UsfmVersificationMismatch() - { - BookNum = _currentBook, - ExpectedChapter = _versification.GetLastChapter(_currentBook), - ExpectedVerse = _versification.GetLastVerse(_currentBook, _currentChapter), - ActualChapter = _currentChapter, - ActualVerse = _currentVerse.AllVerses().Last().VerseNum, - }; - if (versificationMismatch.IsMismatch()) + var versificationMismatch = new UsfmVersificationMismatch( + _currentBook, + _versification.GetLastChapter(_currentBook), + _versification.GetLastVerse(_currentBook, _currentChapter), + _currentChapter, + _currentVerse.AllVerses().Last().VerseNum + ); + if (versificationMismatch.CheckMismatch()) _errors.Add(versificationMismatch); } @@ -91,6 +199,16 @@ string pubNumber ) { _currentVerse = state.VerseRef; + var versificationMismatch = new UsfmVersificationMismatch( + _currentBook, + _currentChapter, + _currentVerse.AllVerses().Last().VerseNum, + _currentChapter, + _currentVerse.AllVerses().Last().VerseNum, + _currentVerse + ); + if (versificationMismatch.CheckMismatch()) + _errors.Add(versificationMismatch); } public override void Chapter( @@ -103,15 +221,14 @@ string pubNumber { if (_currentChapter != 0) { - var versificationMismatch = new UsfmVersificationMismatch() - { - BookNum = _currentBook, - ExpectedChapter = _currentChapter, - ExpectedVerse = _versification.GetLastVerse(_currentBook, _currentChapter), - ActualChapter = _currentChapter, - ActualVerse = _currentVerse.AllVerses().Last().VerseNum, - }; - if (versificationMismatch.IsMismatch()) + var versificationMismatch = new UsfmVersificationMismatch( + _currentBook, + _currentChapter, + _versification.GetLastVerse(_currentBook, _currentChapter), + _currentChapter, + _currentVerse.AllVerses().Last().VerseNum + ); + if (versificationMismatch.CheckMismatch()) _errors.Add(versificationMismatch); } From 193c9643deab7f16dd9b8c2e55d28fc2ad65abb3 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Fri, 10 Oct 2025 12:44:37 -0400 Subject: [PATCH 03/17] Only flag missing non-empty chapters --- .../UsfmVersificationMismatchDetector.cs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs index 504b196e..e2107f08 100644 --- a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs @@ -9,11 +9,10 @@ public enum UsfmVersificationMismatchType { MissingChapter, MissingVerse, - MissingVerseSegment, - ExtraVerseSegment, ExtraVerse, - UnknownVersification, - InvalidVerseRange + InvalidVerseRange, + MissingVerseSegment, + ExtraVerseSegment } public class UsfmVersificationMismatch @@ -47,7 +46,8 @@ public UsfmVersificationMismatch( // Returns true if there is a mismatch public bool CheckMismatch() { - if (_expectedChapter > _actualChapter) + //A non-empty chapter is expected + if (_expectedChapter > _actualChapter && _expectedVerse != 0) { Type = UsfmVersificationMismatchType.MissingChapter; return true; @@ -57,9 +57,6 @@ public bool CheckMismatch() Type = UsfmVersificationMismatchType.MissingVerse; return true; } - // VerseRef's internal validation covers the other cases as well, - // but in order to provide a user-friendly taxonomy, the others - // have been checked separately if (_verseRef != null) { if (string.IsNullOrEmpty(_verseRef.Value.Segment()) && _verseRef.Value.HasSegmentsDefined) @@ -85,8 +82,6 @@ private static UsfmVersificationMismatchType Map(VerseRef.ValidStatusType validS { switch (validStatus) { - case VerseRef.ValidStatusType.UnknownVersification: - return UsfmVersificationMismatchType.UnknownVersification; case VerseRef.ValidStatusType.OutOfRange: return UsfmVersificationMismatchType.ExtraVerse; case VerseRef.ValidStatusType.VerseRepeated: @@ -177,7 +172,7 @@ public override void StartBook(UsfmParserState state, string marker, string code var versificationMismatch = new UsfmVersificationMismatch( _currentBook, _versification.GetLastChapter(_currentBook), - _versification.GetLastVerse(_currentBook, _currentChapter), + _versification.GetLastVerse(_currentBook, _versification.GetLastChapter(_currentBook)), _currentChapter, _currentVerse.AllVerses().Last().VerseNum ); From b8a908943250b0ec613837b04cbbe158d8ea2198 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Fri, 10 Oct 2025 15:41:55 -0400 Subject: [PATCH 04/17] Create interface for paratext project file handling --- .../Corpora/FileParatextProjectFileHandler.cs | 31 ++++++++++ .../Corpora/FileParatextProjectTextUpdater.cs | 21 +------ ...extProjectVersificationMismatchDetector.cs | 19 +----- .../Corpora/IParatextProjectFileHandler.cs | 11 ++++ .../Corpora/ParatextBackupTermsCorpus.cs | 8 +-- .../Corpora/ParatextProjectTermsParserBase.cs | 15 ++--- .../Corpora/ParatextProjectTextUpdaterBase.cs | 16 +++-- ...extProjectVersificationMismatchDetector.cs | 16 +++-- .../Corpora/ZipParatextProjectFileHandler.cs | 35 +++++++++++ .../Corpora/ZipParatextProjectTermsParser.cs | 23 +------- .../Corpora/ZipParatextProjectTextUpdater.cs | 21 +------ ...extProjectVersificationMismatchDetector.cs | 21 +------ .../ParatextProjectQuoteConventionDetector.cs | 16 +++-- ...pParatextProjectQuoteConventionDetector.cs | 21 +------ .../MemoryParatextProjectFileHandler.cs | 59 +++++++++++++++++++ .../MemoryParatextProjectTermsParser.cs | 21 +------ ...extProjectVersificationMismatchDetector.cs | 23 +------- ...atextProjectQuoteConvetionDetectorTests.cs | 35 +---------- .../ParatextProjectTermsParserTests.cs | 44 ++------------ ...ryParatextProjectQuoteConvetionDetector.cs | 22 +------ 20 files changed, 189 insertions(+), 289 deletions(-) create mode 100644 src/SIL.Machine/Corpora/FileParatextProjectFileHandler.cs create mode 100644 src/SIL.Machine/Corpora/IParatextProjectFileHandler.cs create mode 100644 src/SIL.Machine/Corpora/ZipParatextProjectFileHandler.cs create mode 100644 tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs diff --git a/src/SIL.Machine/Corpora/FileParatextProjectFileHandler.cs b/src/SIL.Machine/Corpora/FileParatextProjectFileHandler.cs new file mode 100644 index 00000000..99601f8e --- /dev/null +++ b/src/SIL.Machine/Corpora/FileParatextProjectFileHandler.cs @@ -0,0 +1,31 @@ +using System.IO; + +namespace SIL.Machine.Corpora +{ + public class FileParatextProjectFileHandler : IParatextProjectFileHandler + { + private readonly string _projectDir; + private readonly ParatextProjectSettings _settings; + + public FileParatextProjectFileHandler(string projectDir) + { + _projectDir = projectDir; + _settings = new FileParatextProjectSettingsParser(projectDir).Parse(); + } + + public bool Exists(string fileName) + { + return File.Exists(Path.Combine(_projectDir, fileName)); + } + + public Stream Open(string fileName) + { + return File.OpenRead(Path.Combine(_projectDir, fileName)); + } + + public ParatextProjectSettings GetSettings() + { + return _settings; + } + } +} diff --git a/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs b/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs index c9c9dd95..be24baa0 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs @@ -1,25 +1,8 @@ -using System.IO; - -namespace SIL.Machine.Corpora +namespace SIL.Machine.Corpora { public class FileParatextProjectTextUpdater : ParatextProjectTextUpdaterBase { - private readonly string _projectDir; - public FileParatextProjectTextUpdater(string projectDir) - : base(new FileParatextProjectSettingsParser(projectDir)) - { - _projectDir = projectDir; - } - - protected override bool Exists(string fileName) - { - return File.Exists(Path.Combine(_projectDir, fileName)); - } - - protected override Stream Open(string fileName) - { - return File.OpenRead(Path.Combine(_projectDir, fileName)); - } + : base(new FileParatextProjectFileHandler(projectDir)) { } } } diff --git a/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs index 6e3d71ca..4c64d838 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs @@ -1,25 +1,8 @@ -using System.IO; - namespace SIL.Machine.Corpora { public class FileParatextProjectVersificationMismatchDetector : ParatextProjectVersificationMismatchDetector { - private readonly string _projectDir; - public FileParatextProjectVersificationMismatchDetector(string projectDir) - : base(new FileParatextProjectSettingsParser(projectDir)) - { - _projectDir = projectDir; - } - - protected override bool Exists(string fileName) - { - return File.Exists(Path.Combine(_projectDir, fileName)); - } - - protected override Stream Open(string fileName) - { - return File.OpenRead(Path.Combine(_projectDir, fileName)); - } + : base(new FileParatextProjectFileHandler(projectDir)) { } } } diff --git a/src/SIL.Machine/Corpora/IParatextProjectFileHandler.cs b/src/SIL.Machine/Corpora/IParatextProjectFileHandler.cs new file mode 100644 index 00000000..610c554d --- /dev/null +++ b/src/SIL.Machine/Corpora/IParatextProjectFileHandler.cs @@ -0,0 +1,11 @@ +using System.IO; + +namespace SIL.Machine.Corpora +{ + public interface IParatextProjectFileHandler + { + bool Exists(string fileName); + Stream Open(string fileName); + ParatextProjectSettings GetSettings(); + } +} diff --git a/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs b/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs index 111cbc5e..d09b3409 100644 --- a/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs +++ b/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs @@ -15,14 +15,12 @@ public ParatextBackupTermsCorpus( { using (var archive = ZipFile.OpenRead(fileName)) { - ParatextProjectSettings settings = new ZipParatextProjectSettingsParser(archive).Parse(); - IEnumerable<(string, IReadOnlyList)> glosses = new ZipParatextProjectTermsParser( - archive, - settings - ) + IEnumerable<(string, IReadOnlyList)> glosses = new ZipParatextProjectTermsParser(archive) .Parse(termCategories, useTermGlosses, chapters) .OrderBy(g => g.TermId); + ParatextProjectSettings settings = new ZipParatextProjectSettingsParser(archive).Parse(); + string textId = $"{settings.BiblicalTermsListType}:{settings.BiblicalTermsProjectName}:{settings.BiblicalTermsFileName}"; diff --git a/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs index 38e3904e..1a1ee524 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs @@ -37,15 +37,12 @@ public abstract class ParatextProjectTermsParserBase private static readonly Regex NumericalInformationRegex = new Regex(@"\s+\d+(\.\d+)*$", RegexOptions.Compiled); private readonly ParatextProjectSettings _settings; + private readonly IParatextProjectFileHandler _paratextProjectFileHandler; - protected ParatextProjectTermsParserBase(ParatextProjectSettings settings) + protected ParatextProjectTermsParserBase(IParatextProjectFileHandler paratextProjectFileHandler) { - _settings = settings; - } - - protected ParatextProjectTermsParserBase(ParatextProjectSettingsParserBase settingsParser) - { - _settings = settingsParser.Parse(); + _settings = paratextProjectFileHandler.GetSettings(); + _paratextProjectFileHandler = paratextProjectFileHandler; } public IEnumerable<(string TermId, IReadOnlyList Glosses)> Parse( @@ -299,8 +296,8 @@ private static IDictionary> GetReferences(XDo ); } - protected abstract Stream Open(string fileName); + private Stream Open(string fileName) => _paratextProjectFileHandler.Open(fileName); - protected abstract bool Exists(string fileName); + private bool Exists(string fileName) => _paratextProjectFileHandler.Exists(fileName); } } diff --git a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs index 85dc470a..5b77d244 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs @@ -8,15 +8,12 @@ namespace SIL.Machine.Corpora public abstract class ParatextProjectTextUpdaterBase { private readonly ParatextProjectSettings _settings; + private readonly IParatextProjectFileHandler _paratextProjectFileHandler; - protected ParatextProjectTextUpdaterBase(ParatextProjectSettings settings) + protected ParatextProjectTextUpdaterBase(IParatextProjectFileHandler paratextProjectFileHandler) { - _settings = settings; - } - - protected ParatextProjectTextUpdaterBase(ParatextProjectSettingsParserBase settingsParser) - { - _settings = settingsParser.Parse(); + _settings = paratextProjectFileHandler.GetSettings(); + _paratextProjectFileHandler = paratextProjectFileHandler; } public string UpdateUsfm( @@ -73,7 +70,8 @@ public string UpdateUsfm( } } - protected abstract bool Exists(string fileName); - protected abstract Stream Open(string fileName); + private bool Exists(string fileName) => _paratextProjectFileHandler.Exists(fileName); + + private Stream Open(string fileName) => _paratextProjectFileHandler.Open(fileName); } } diff --git a/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetector.cs index 5e6c2b88..c197835a 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetector.cs @@ -8,15 +8,12 @@ namespace SIL.Machine.Corpora public abstract class ParatextProjectVersificationMismatchDetector { private readonly ParatextProjectSettings _settings; + private readonly IParatextProjectFileHandler _paratextProjectFileHandler; - protected ParatextProjectVersificationMismatchDetector(ParatextProjectSettings settings) + protected ParatextProjectVersificationMismatchDetector(IParatextProjectFileHandler paratextProjectFileHandler) { - _settings = settings; - } - - protected ParatextProjectVersificationMismatchDetector(ParatextProjectSettingsParserBase settingsParser) - { - _settings = settingsParser.Parse(); + _settings = _paratextProjectFileHandler.GetSettings(); + _paratextProjectFileHandler = paratextProjectFileHandler; } public IReadOnlyList GetUsfmVersificationMismatches( @@ -52,7 +49,8 @@ public IReadOnlyList GetUsfmVersificationMismatches( return handler.Errors; } - protected abstract bool Exists(string fileName); - protected abstract Stream Open(string fileName); + private bool Exists(string fileName) => _paratextProjectFileHandler.Exists(fileName); + + private Stream Open(string fileName) => _paratextProjectFileHandler.Open(fileName); } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectFileHandler.cs b/src/SIL.Machine/Corpora/ZipParatextProjectFileHandler.cs new file mode 100644 index 00000000..de0129c7 --- /dev/null +++ b/src/SIL.Machine/Corpora/ZipParatextProjectFileHandler.cs @@ -0,0 +1,35 @@ +using System.IO; +using System.IO.Compression; + +namespace SIL.Machine.Corpora +{ + public class ZipParatextProjectFileHandler : IParatextProjectFileHandler + { + private readonly ZipArchive _archive; + private readonly ParatextProjectSettings _settings; + + public ZipParatextProjectFileHandler(ZipArchive archive) + { + _archive = archive; + _settings = new ZipParatextProjectSettingsParser(archive).Parse(); + } + + public bool Exists(string fileName) + { + return _archive.GetEntry(fileName) != null; + } + + public Stream Open(string fileName) + { + ZipArchiveEntry entry = _archive.GetEntry(fileName); + if (entry == null) + return null; + return entry.Open(); + } + + public ParatextProjectSettings GetSettings() + { + return _settings; + } + } +} diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs b/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs index 863cb563..5da62eb5 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs @@ -1,29 +1,10 @@ -using System.IO; using System.IO.Compression; namespace SIL.Machine.Corpora { public class ZipParatextProjectTermsParser : ParatextProjectTermsParserBase { - private readonly ZipArchive _archive; - - public ZipParatextProjectTermsParser(ZipArchive archive, ParatextProjectSettings settings = null) - : base(settings ?? new ZipParatextProjectSettingsParser(archive).Parse()) - { - _archive = archive; - } - - protected override bool Exists(string fileName) - { - return _archive.GetEntry(fileName) != null; - } - - protected override Stream Open(string fileName) - { - ZipArchiveEntry entry = _archive.GetEntry(fileName); - if (entry == null) - return null; - return entry.Open(); - } + public ZipParatextProjectTermsParser(ZipArchive archive) + : base(new ZipParatextProjectFileHandler(archive)) { } } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs b/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs index 0eb30f56..3a941dac 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs @@ -1,29 +1,10 @@ -using System.IO; using System.IO.Compression; namespace SIL.Machine.Corpora { public class ZipParatextProjectTextUpdater : ParatextProjectTextUpdaterBase { - private readonly ZipArchive _archive; - public ZipParatextProjectTextUpdater(ZipArchive archive) - : base(new ZipParatextProjectSettingsParser(archive)) - { - _archive = archive; - } - - protected override bool Exists(string fileName) - { - return _archive.GetEntry(fileName) != null; - } - - protected override Stream Open(string fileName) - { - ZipArchiveEntry entry = _archive.GetEntry(fileName); - if (entry == null) - return null; - return entry.Open(); - } + : base(new ZipParatextProjectFileHandler(archive)) { } } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs index aeb6f0a4..c2e7526a 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs @@ -1,29 +1,10 @@ -using System.IO; using System.IO.Compression; namespace SIL.Machine.Corpora { public class ZipParatextProjectVersificationMismatchDetector : ParatextProjectVersificationMismatchDetector { - private readonly ZipArchive _archive; - public ZipParatextProjectVersificationMismatchDetector(ZipArchive archive) - : base(new ZipParatextProjectSettingsParser(archive)) - { - _archive = archive; - } - - protected override bool Exists(string fileName) - { - return _archive.GetEntry(fileName) != null; - } - - protected override Stream Open(string fileName) - { - ZipArchiveEntry entry = _archive.GetEntry(fileName); - if (entry == null) - return null; - return entry.Open(); - } + : base(new ZipParatextProjectFileHandler(archive)) { } } } diff --git a/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs b/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs index 1812fad4..b1b9f648 100644 --- a/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs +++ b/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs @@ -11,15 +11,12 @@ namespace SIL.Machine.PunctuationAnalysis public abstract class ParatextProjectQuoteConventionDetector { private readonly ParatextProjectSettings _settings; + private readonly IParatextProjectFileHandler _paratextProjectFileHandler; - protected ParatextProjectQuoteConventionDetector(ParatextProjectSettings settings) + protected ParatextProjectQuoteConventionDetector(IParatextProjectFileHandler paratextProjectFileHandler) { - _settings = settings; - } - - protected ParatextProjectQuoteConventionDetector(ParatextProjectSettingsParserBase settingsParser) - { - _settings = settingsParser.Parse(); + _settings = paratextProjectFileHandler.GetSettings(); + _paratextProjectFileHandler = paratextProjectFileHandler; } public QuoteConventionAnalysis GetQuoteConventionAnalysis(QuoteConventionDetector handler = null) @@ -81,7 +78,8 @@ string bookId in Canon return handler.DetectQuoteConvention(includeChapters); } - protected abstract bool Exists(string fileName); - protected abstract Stream Open(string fileName); + private bool Exists(string fileName) => _paratextProjectFileHandler.Exists(fileName); + + private Stream Open(string fileName) => _paratextProjectFileHandler.Open(fileName); } } diff --git a/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs b/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs index fa8af932..980e7d3a 100644 --- a/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs +++ b/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs @@ -1,4 +1,3 @@ -using System.IO; using System.IO.Compression; using SIL.Machine.Corpora; @@ -6,25 +5,7 @@ namespace SIL.Machine.PunctuationAnalysis { public class ZipParatextProjectQuoteConventionDetector : ParatextProjectQuoteConventionDetector { - private readonly ZipArchive _archive; - public ZipParatextProjectQuoteConventionDetector(ZipArchive archive) - : base(new ZipParatextProjectSettingsParser(archive)) - { - _archive = archive; - } - - protected override bool Exists(string fileName) - { - return _archive.GetEntry(fileName) != null; - } - - protected override Stream Open(string fileName) - { - ZipArchiveEntry entry = _archive.GetEntry(fileName); - if (entry == null) - return null; - return entry.Open(); - } + : base(new ZipParatextProjectFileHandler(archive)) { } } } diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs new file mode 100644 index 00000000..f5719a5e --- /dev/null +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs @@ -0,0 +1,59 @@ +using System.Text; +using SIL.Scripture; + +namespace SIL.Machine.Corpora; + +public class MemoryParatextProjectFileHandler( + IDictionary? files = null, + ParatextProjectSettings? settings = null +) : IParatextProjectFileHandler +{ + public IDictionary Files { get; } = files ?? new Dictionary(); + private readonly ParatextProjectSettings _settings = settings ?? new DefaultParatextProjectSettings(); + + public bool Exists(string fileName) + { + return Files.ContainsKey(fileName); + } + + public Stream? Open(string fileName) + { + if (!Files.TryGetValue(fileName, out string? contents)) + return null; + return new MemoryStream(Encoding.UTF8.GetBytes(contents)); + } + + public ParatextProjectSettings GetSettings() + { + return _settings; + } + + public class DefaultParatextProjectSettings( + string name = "Test", + string fullName = "TestProject", + Encoding? encoding = null, + ScrVers? versification = null, + UsfmStylesheet? stylesheet = null, + string fileNamePrefix = "", + string fileNameForm = "41MAT", + string fileNameSuffix = "Test.SFM", + string biblicalTermsListType = "Project", + string biblicalTermsProjectName = "Test", + string biblicalTermsFileName = "ProjectBiblicalTerms.xml", + string languageCode = "en" + ) + : ParatextProjectSettings( + name, + fullName, + encoding ?? Encoding.UTF8, + versification ?? ScrVers.English, + stylesheet ?? new UsfmStylesheet("usfm.sty"), + fileNamePrefix, + fileNameForm, + fileNameSuffix, + biblicalTermsListType, + biblicalTermsProjectName, + biblicalTermsFileName, + languageCode + ) { } +} diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs index 7fb93798..b694eba6 100644 --- a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs @@ -1,21 +1,4 @@ -using System.Text; - namespace SIL.Machine.Corpora; -public class MemoryParatextProjectTermsParser(ParatextProjectSettings settings, IDictionary files) - : ParatextProjectTermsParserBase(settings) -{ - public IDictionary Files { get; } = files; - - protected override bool Exists(string fileName) - { - return Files.ContainsKey(fileName); - } - - protected override Stream? Open(string fileName) - { - if (!Files.TryGetValue(fileName, out string? contents)) - return null; - return new MemoryStream(Encoding.UTF8.GetBytes(contents)); - } -} +public class MemoryParatextProjectTermsParser(IDictionary? files, ParatextProjectSettings? settings) + : ParatextProjectTermsParserBase(new MemoryParatextProjectFileHandler(files, settings)) { } diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs index 62ce3fb6..d5e11c3b 100644 --- a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs @@ -1,23 +1,6 @@ -using System.Text; - namespace SIL.Machine.Corpora; public class MemoryParatextProjectVersificationMismatchDetector( - ParatextProjectSettings settings, - IDictionary files -) : ParatextProjectVersificationMismatchDetector(settings) -{ - public IDictionary Files { get; } = files; - - protected override bool Exists(string fileName) - { - return Files.ContainsKey(fileName); - } - - protected override Stream? Open(string fileName) - { - if (!Files.TryGetValue(fileName, out string? contents)) - return null; - return new MemoryStream(Encoding.UTF8.GetBytes(contents)); - } -} + IDictionary files, + ParatextProjectSettings settings +) : ParatextProjectVersificationMismatchDetector(new MemoryParatextProjectFileHandler(files, settings)) { } diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs index 7338d611..555e7b36 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs @@ -1,4 +1,3 @@ -using System.Text; using NUnit.Framework; using SIL.Machine.PunctuationAnalysis; using SIL.Scripture; @@ -130,10 +129,7 @@ public void TestGetQuotationConventionInvalidBookCode() private class TestEnvironment(ParatextProjectSettings? settings = null, Dictionary? files = null) { public ParatextProjectQuoteConventionDetector Detector { get; } = - new MemoryParatextProjectQuoteConventionDetector( - settings ?? new DefaultParatextProjectSettings(), - files ?? new() - ); + new MemoryParatextProjectQuoteConventionDetector(files, settings); public QuoteConventionAnalysis GetQuoteConvention(string? scriptureRange = null) { @@ -165,33 +161,4 @@ private static string GetTestChapter(int number, QuoteConvention? quoteConventio \v 5 Then someone said, {leftQuote}More things someone said.{rightQuote} "; } - - private class DefaultParatextProjectSettings( - string name = "Test", - string fullName = "TestProject", - Encoding? encoding = null, - ScrVers? versification = null, - UsfmStylesheet? stylesheet = null, - string fileNamePrefix = "", - string fileNameForm = "41MAT", - string fileNameSuffix = "Test.SFM", - string biblicalTermsListType = "Project", - string biblicalTermsProjectName = "Test", - string biblicalTermsFileName = "ProjectBiblicalTerms.xml", - string languageCode = "en" - ) - : ParatextProjectSettings( - name, - fullName, - encoding ?? Encoding.UTF8, - versification ?? ScrVers.English, - stylesheet ?? new UsfmStylesheet("usfm.sty"), - fileNamePrefix, - fileNameForm, - fileNameSuffix, - biblicalTermsListType, - biblicalTermsProjectName, - biblicalTermsFileName, - languageCode - ) { } } diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsParserTests.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsParserTests.cs index 1e3fb736..00329912 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsParserTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsParserTests.cs @@ -1,6 +1,4 @@ -using System.Text; using NUnit.Framework; -using SIL.Scripture; namespace SIL.Machine.Corpora; @@ -47,7 +45,7 @@ public void TestGetKeyTermsFromTermsRenderings() public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings() { var env = new TestEnvironment( - new DefaultParatextProjectSettings( + new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml" ), @@ -62,7 +60,7 @@ public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings() public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings_DoNotUseTermGlosses() { var env = new TestEnvironment( - new DefaultParatextProjectSettings( + new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml" ), @@ -76,7 +74,7 @@ public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings_DoNotUseTermG public void TestGetKeyTermsFromTermsLocalizations() { var env = new TestEnvironment( - new DefaultParatextProjectSettings( + new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml", languageCode: "fr" @@ -92,7 +90,7 @@ public void TestGetKeyTermsFromTermsLocalizations() public void TestGetKeyTermsFromTermsLocalizations_FilterByChapters() { var env = new TestEnvironment( - new DefaultParatextProjectSettings( + new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml", languageCode: "fr" @@ -115,7 +113,7 @@ public void TestGetKeyTermsFromTermsLocalizations_FilterByChapters() public void TestGetKeyTermsFromTermsLocalizations_TermRenderingsExists_PreferLocalization() { var env = new TestEnvironment( - new DefaultParatextProjectSettings( + new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml" ), @@ -187,41 +185,11 @@ private class TestEnvironment( private readonly bool _useTermGlosses = useTermGlosses; private readonly IDictionary>? _chapters = chapters; - public ParatextProjectTermsParserBase Parser { get; } = - new MemoryParatextProjectTermsParser(settings ?? new DefaultParatextProjectSettings(), files ?? new()); + public ParatextProjectTermsParserBase Parser { get; } = new MemoryParatextProjectTermsParser(files, settings); public IEnumerable<(string TermId, IReadOnlyList Glosses)> GetGlosses() { return Parser.Parse(new string[] { "PN" }, _useTermGlosses, _chapters); } } - - private class DefaultParatextProjectSettings( - string name = "Test", - string fullName = "TestProject", - Encoding? encoding = null, - ScrVers? versification = null, - UsfmStylesheet? stylesheet = null, - string fileNamePrefix = "", - string fileNameForm = "41MAT", - string fileNameSuffix = "Test.SFM", - string biblicalTermsListType = "Project", - string biblicalTermsProjectName = "Test", - string biblicalTermsFileName = "ProjectBiblicalTerms.xml", - string languageCode = "en" - ) - : ParatextProjectSettings( - name, - fullName, - encoding ?? Encoding.UTF8, - versification ?? ScrVers.English, - stylesheet ?? new UsfmStylesheet("usfm.sty"), - fileNamePrefix, - fileNameForm, - fileNameSuffix, - biblicalTermsListType, - biblicalTermsProjectName, - biblicalTermsFileName, - languageCode - ) { } } diff --git a/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConvetionDetector.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConvetionDetector.cs index d74f7483..e97becbf 100644 --- a/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConvetionDetector.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConvetionDetector.cs @@ -1,24 +1,8 @@ -using System.Text; using SIL.Machine.Corpora; namespace SIL.Machine.PunctuationAnalysis; public class MemoryParatextProjectQuoteConventionDetector( - ParatextProjectSettings settings, - IDictionary files -) : ParatextProjectQuoteConventionDetector(settings) -{ - public IDictionary Files { get; } = files; - - protected override bool Exists(string fileName) - { - return Files.ContainsKey(fileName); - } - - protected override Stream? Open(string fileName) - { - if (!Files.TryGetValue(fileName, out string? contents)) - return null; - return new MemoryStream(Encoding.UTF8.GetBytes(contents)); - } -} + IDictionary? files, + ParatextProjectSettings? settings +) : ParatextProjectQuoteConventionDetector(new MemoryParatextProjectFileHandler(files, settings)) { } From a3b9d1f39e7a8c097e19816293abb08a7db47969 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Wed, 15 Oct 2025 09:16:20 -0400 Subject: [PATCH 05/17] Add missing import --- tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs index 10800efe..29afbbf9 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs @@ -1,4 +1,5 @@ using System.IO.Compression; +using System.Text.Json; using NUnit.Framework; using SIL.Machine.PunctuationAnalysis; From bb91859b4a0f8d1e01dddb672d926569e8f57e99 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Wed, 15 Oct 2025 10:13:45 -0400 Subject: [PATCH 06/17] Move QD tests to punctuation analysis --- .../FallbackQuotationMarkResolverTests.cs | 3 +-- .../ParatextProjectQuoteConvetionDetectorTests.cs | 4 ++-- .../QuotationDenormalizationTests.cs | 4 ++-- .../QuotationDenormalizationUsfmBlockUpdateHandlerTests.cs | 4 ++-- .../QuotationMarkUpdateFirstPassTests.cs | 4 ++-- .../QuoteConventionChangingUsfmBlockUpdateHandlerTests.cs | 4 ++-- 6 files changed, 11 insertions(+), 12 deletions(-) rename tests/SIL.Machine.Tests/{Corpora => PunctuationAnalysis}/FallbackQuotationMarkResolverTests.cs (99%) rename tests/SIL.Machine.Tests/{Corpora => PunctuationAnalysis}/ParatextProjectQuoteConvetionDetectorTests.cs (98%) rename tests/SIL.Machine.Tests/{Corpora => PunctuationAnalysis}/QuotationDenormalizationTests.cs (97%) rename tests/SIL.Machine.Tests/{Corpora => PunctuationAnalysis}/QuotationDenormalizationUsfmBlockUpdateHandlerTests.cs (99%) rename tests/SIL.Machine.Tests/{Corpora => PunctuationAnalysis}/QuotationMarkUpdateFirstPassTests.cs (99%) rename tests/SIL.Machine.Tests/{Corpora => PunctuationAnalysis}/QuoteConventionChangingUsfmBlockUpdateHandlerTests.cs (99%) diff --git a/tests/SIL.Machine.Tests/Corpora/FallbackQuotationMarkResolverTests.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/FallbackQuotationMarkResolverTests.cs similarity index 99% rename from tests/SIL.Machine.Tests/Corpora/FallbackQuotationMarkResolverTests.cs rename to tests/SIL.Machine.Tests/PunctuationAnalysis/FallbackQuotationMarkResolverTests.cs index 09462018..5a6ea8ae 100644 --- a/tests/SIL.Machine.Tests/Corpora/FallbackQuotationMarkResolverTests.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/FallbackQuotationMarkResolverTests.cs @@ -1,7 +1,6 @@ using NUnit.Framework; -using SIL.Machine.PunctuationAnalysis; -namespace SIL.Machine.Corpora; +namespace SIL.Machine.PunctuationAnalysis; [TestFixture] public class FallbackQuotationMarkResolverTests diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/ParatextProjectQuoteConvetionDetectorTests.cs similarity index 98% rename from tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs rename to tests/SIL.Machine.Tests/PunctuationAnalysis/ParatextProjectQuoteConvetionDetectorTests.cs index 555e7b36..e083961b 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/ParatextProjectQuoteConvetionDetectorTests.cs @@ -1,8 +1,8 @@ using NUnit.Framework; -using SIL.Machine.PunctuationAnalysis; +using SIL.Machine.Corpora; using SIL.Scripture; -namespace SIL.Machine.Corpora; +namespace SIL.Machine.PunctuationAnalysis; [TestFixture] public class ParatextProjectQuoteConventionDetectorTests diff --git a/tests/SIL.Machine.Tests/Corpora/QuotationDenormalizationTests.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationDenormalizationTests.cs similarity index 97% rename from tests/SIL.Machine.Tests/Corpora/QuotationDenormalizationTests.cs rename to tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationDenormalizationTests.cs index 81750a7a..117e2ac7 100644 --- a/tests/SIL.Machine.Tests/Corpora/QuotationDenormalizationTests.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationDenormalizationTests.cs @@ -1,7 +1,7 @@ using NUnit.Framework; -using SIL.Machine.PunctuationAnalysis; +using SIL.Machine.Corpora; -namespace SIL.Machine.Corpora; +namespace SIL.Machine.PunctuationAnalysis; [TestFixture] public class QuotationDenormalizationTests diff --git a/tests/SIL.Machine.Tests/Corpora/QuotationDenormalizationUsfmBlockUpdateHandlerTests.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationDenormalizationUsfmBlockUpdateHandlerTests.cs similarity index 99% rename from tests/SIL.Machine.Tests/Corpora/QuotationDenormalizationUsfmBlockUpdateHandlerTests.cs rename to tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationDenormalizationUsfmBlockUpdateHandlerTests.cs index fc709fac..02cd3b80 100644 --- a/tests/SIL.Machine.Tests/Corpora/QuotationDenormalizationUsfmBlockUpdateHandlerTests.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationDenormalizationUsfmBlockUpdateHandlerTests.cs @@ -1,7 +1,7 @@ using NUnit.Framework; -using SIL.Machine.PunctuationAnalysis; +using SIL.Machine.Corpora; -namespace SIL.Machine.Corpora; +namespace SIL.Machine.PunctuationAnalysis; [TestFixture] public class QuotationMarkDenormalizationUsfmUpdateBlockHandlerTests diff --git a/tests/SIL.Machine.Tests/Corpora/QuotationMarkUpdateFirstPassTests.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationMarkUpdateFirstPassTests.cs similarity index 99% rename from tests/SIL.Machine.Tests/Corpora/QuotationMarkUpdateFirstPassTests.cs rename to tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationMarkUpdateFirstPassTests.cs index 2f4ba189..df37f803 100644 --- a/tests/SIL.Machine.Tests/Corpora/QuotationMarkUpdateFirstPassTests.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationMarkUpdateFirstPassTests.cs @@ -1,7 +1,7 @@ using NUnit.Framework; -using SIL.Machine.PunctuationAnalysis; +using SIL.Machine.Corpora; -namespace SIL.Machine.Corpora; +namespace SIL.Machine.PunctuationAnalysis; [TestFixture] public class QuotationMarkUpdateFirstPassTests diff --git a/tests/SIL.Machine.Tests/Corpora/QuoteConventionChangingUsfmBlockUpdateHandlerTests.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuoteConventionChangingUsfmBlockUpdateHandlerTests.cs similarity index 99% rename from tests/SIL.Machine.Tests/Corpora/QuoteConventionChangingUsfmBlockUpdateHandlerTests.cs rename to tests/SIL.Machine.Tests/PunctuationAnalysis/QuoteConventionChangingUsfmBlockUpdateHandlerTests.cs index af5a264e..083b9f50 100644 --- a/tests/SIL.Machine.Tests/Corpora/QuoteConventionChangingUsfmBlockUpdateHandlerTests.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuoteConventionChangingUsfmBlockUpdateHandlerTests.cs @@ -1,7 +1,7 @@ using NUnit.Framework; -using SIL.Machine.PunctuationAnalysis; +using SIL.Machine.Corpora; -namespace SIL.Machine.Corpora; +namespace SIL.Machine.PunctuationAnalysis; [TestFixture] public class QuoteConventionChangingUsfmUpdateBlockHandlerTests From bc76526c8d7b5c872fb65df8e5a28c2033b91ad8 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Wed, 15 Oct 2025 10:49:07 -0400 Subject: [PATCH 07/17] Begin adding tests --- ...extProjectVersificationMismatchDetector.cs | 2 +- .../UsfmVersificationMismatchDetector.cs | 61 +++-- ...extProjectVersificationMismatchDetector.cs | 4 +- ...extProjectVersificationMismatchDetector.cs | 209 ++++++++++++++++++ 4 files changed, 250 insertions(+), 26 deletions(-) create mode 100644 tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetector.cs diff --git a/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetector.cs index c197835a..25496886 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetector.cs @@ -12,7 +12,7 @@ public abstract class ParatextProjectVersificationMismatchDetector protected ParatextProjectVersificationMismatchDetector(IParatextProjectFileHandler paratextProjectFileHandler) { - _settings = _paratextProjectFileHandler.GetSettings(); + _settings = paratextProjectFileHandler.GetSettings(); _paratextProjectFileHandler = paratextProjectFileHandler; } diff --git a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs index e2107f08..47e6fce7 100644 --- a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs @@ -10,7 +10,7 @@ public enum UsfmVersificationMismatchType MissingChapter, MissingVerse, ExtraVerse, - InvalidVerseRange, + InvalidVerseRange, //TODO This would be a nice thing to detect, but does it really fit into the UsfmVersificationMismatch category? MissingVerseSegment, ExtraVerseSegment } @@ -165,7 +165,7 @@ public UsfmVersificationMismatchDetector(ScrVers versification) public bool HasError => _errors.Count > 0; public IReadOnlyList Errors => _errors; - public override void StartBook(UsfmParserState state, string marker, string code) + public override void EndUsfm(UsfmParserState state) { if (_currentBook > 0 && Canon.IsCanonical(_currentBook)) { @@ -179,33 +179,27 @@ public override void StartBook(UsfmParserState state, string marker, string code if (versificationMismatch.CheckMismatch()) _errors.Add(versificationMismatch); } + } + public override void StartBook(UsfmParserState state, string marker, string code) + { + if (_currentBook > 0 && Canon.IsCanonical(_currentBook)) + { + var versificationMismatch = new UsfmVersificationMismatch( + _currentBook, + _versification.GetLastChapter(_currentBook), + _versification.GetLastVerse(_currentBook, _versification.GetLastChapter(_currentBook)), + _currentChapter, + _currentVerse.AllVerses().Last().VerseNum + ); + if (versificationMismatch.CheckMismatch()) + _errors.Add(versificationMismatch); + } _currentBook = state.VerseRef.BookNum; _currentChapter = 0; _currentVerse = new VerseRef(); } - public override void Verse( - UsfmParserState state, - string number, - string marker, - string altNumber, - string pubNumber - ) - { - _currentVerse = state.VerseRef; - var versificationMismatch = new UsfmVersificationMismatch( - _currentBook, - _currentChapter, - _currentVerse.AllVerses().Last().VerseNum, - _currentChapter, - _currentVerse.AllVerses().Last().VerseNum, - _currentVerse - ); - if (versificationMismatch.CheckMismatch()) - _errors.Add(versificationMismatch); - } - public override void Chapter( UsfmParserState state, string number, @@ -230,5 +224,26 @@ string pubNumber _currentChapter = state.VerseRef.ChapterNum; _currentVerse = new VerseRef(); } + + public override void Verse( + UsfmParserState state, + string number, + string marker, + string altNumber, + string pubNumber + ) + { + _currentVerse = state.VerseRef; + var versificationMismatch = new UsfmVersificationMismatch( + _currentBook, + _currentChapter, + _currentVerse.AllVerses().Last().VerseNum, + _currentChapter, + _currentVerse.AllVerses().Last().VerseNum, + _currentVerse + ); + if (versificationMismatch.CheckMismatch()) + _errors.Add(versificationMismatch); + } } } diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs index d5e11c3b..a09009fe 100644 --- a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs @@ -1,6 +1,6 @@ namespace SIL.Machine.Corpora; public class MemoryParatextProjectVersificationMismatchDetector( - IDictionary files, - ParatextProjectSettings settings + IDictionary? files = null, + ParatextProjectSettings? settings = null ) : ParatextProjectVersificationMismatchDetector(new MemoryParatextProjectFileHandler(files, settings)) { } diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetector.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetector.cs new file mode 100644 index 00000000..a1c53998 --- /dev/null +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetector.cs @@ -0,0 +1,209 @@ +using System.Text.Json; +using NUnit.Framework; + +namespace SIL.Machine.Corpora; + +[TestFixture] +public class ParatextProjectQuoteConventionDetectorTests +{ + [Test] + public void GetUsfmVersificationMismatches_NoMismatches() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + \v 13 + \v 14 + \v 15 + " + } + } + ); + Assert.That( + env.GetUsfmVersificationMismatches(), + Has.Count.EqualTo(0), + JsonSerializer.Serialize(env.GetUsfmVersificationMismatches()) + ); + } + + [Test] + public void GetUsfmVersificationMismatches_MissingVerse() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + \v 13 + \v 14 + " + } + } + ); + IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); + Assert.That(mismatches, Has.Count.EqualTo(1), JsonSerializer.Serialize(mismatches)); + Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.MissingVerse)); + } + + [Test] + public void GetUsfmVersificationMismatches_MissingChapter() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + " + } + } + ); + IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); + Assert.That(mismatches, Has.Count.EqualTo(1), JsonSerializer.Serialize(mismatches)); + Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.MissingChapter)); + } + + [Test] + public void GetUsfmVersificationMismatches_ExtraVerse() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + \v 13 + \v 14 + \v 15 + \v 16 + " + } + } + ); + IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); + Assert.That(mismatches, Has.Count.EqualTo(1), JsonSerializer.Serialize(mismatches)); + Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.ExtraVerse)); + } + + [Test] + public void GetUsfmVersificationMismatches_InvalidVerse() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 13-12 + \v 14 + \v 15 + " + } + } + ); + IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); + Assert.That(mismatches, Has.Count.EqualTo(1), JsonSerializer.Serialize(mismatches)); + Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.InvalidVerseRange)); + } + + [Test] + public void GetUsfmVersificationMismatches_ExtraVerseSegment() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + \v 13 + \v 14a + \v 14b + \v 15 + " + } + } + ); + IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); + Assert.That(mismatches, Has.Count.EqualTo(2), JsonSerializer.Serialize(mismatches)); + Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.ExtraVerseSegment)); + } + + private class TestEnvironment(ParatextProjectSettings? settings = null, Dictionary? files = null) + { + public ParatextProjectVersificationMismatchDetector Detector { get; } = + new MemoryParatextProjectVersificationMismatchDetector(files, settings); + + public IReadOnlyList GetUsfmVersificationMismatches() + { + return Detector.GetUsfmVersificationMismatches(); + } + } +} From b0dca36dc212684108fea03ee4c55d330c543bd7 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Wed, 15 Oct 2025 12:29:08 -0400 Subject: [PATCH 08/17] Add additional tests --- .../UsfmVersificationMismatchDetector.cs | 25 ++-- ...extProjectVersificationMismatchDetector.cs | 112 ++++++++++++++++++ 2 files changed, 126 insertions(+), 11 deletions(-) diff --git a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs index 47e6fce7..f50ad4e4 100644 --- a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs @@ -208,7 +208,7 @@ public override void Chapter( string pubNumber ) { - if (_currentChapter != 0) + if (_currentBook > 0 && Canon.IsCanonical(_currentBook) && _currentChapter > 0) { var versificationMismatch = new UsfmVersificationMismatch( _currentBook, @@ -234,16 +234,19 @@ string pubNumber ) { _currentVerse = state.VerseRef; - var versificationMismatch = new UsfmVersificationMismatch( - _currentBook, - _currentChapter, - _currentVerse.AllVerses().Last().VerseNum, - _currentChapter, - _currentVerse.AllVerses().Last().VerseNum, - _currentVerse - ); - if (versificationMismatch.CheckMismatch()) - _errors.Add(versificationMismatch); + if (_currentBook > 0 && Canon.IsCanonical(_currentBook) && _currentChapter > 0) + { + var versificationMismatch = new UsfmVersificationMismatch( + _currentBook, + _currentChapter, + _currentVerse.AllVerses().Last().VerseNum, + _currentChapter, + _currentVerse.AllVerses().Last().VerseNum, + _currentVerse + ); + if (versificationMismatch.CheckMismatch()) + _errors.Add(versificationMismatch); + } } } } diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetector.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetector.cs index a1c53998..b24e22d6 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetector.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetector.cs @@ -1,5 +1,7 @@ +using System.Text; using System.Text.Json; using NUnit.Framework; +using SIL.Scripture; namespace SIL.Machine.Corpora; @@ -196,6 +198,99 @@ public void GetUsfmVersificationMismatches_ExtraVerseSegment() Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.ExtraVerseSegment)); } + [Test] + public void GetUsfmVersificationMismatches_MissingVerseSegment() + { + var env = new TestEnvironment( + settings: new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( + versification: GetCustomVersification(@"*3JN 1:13,a,b") + ), + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + \v 13 + \v 14 + \v 15 + " + } + } + ); + IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); + Assert.That(mismatches, Has.Count.EqualTo(1), JsonSerializer.Serialize(mismatches)); + Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.MissingVerseSegment)); + } + + [Test] + public void GetUsfmVersificationMismatches_IgnoreNonCanonicals() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "98XXETest.SFM", + @"\id XXE + \c 1 + \v 3-2 + " + } + } + ); + IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); + Assert.That(mismatches, Has.Count.EqualTo(0), JsonSerializer.Serialize(mismatches)); + } + + [Test] + public void GetUsfmVersificationMismatches_ExtraVerse_ExcludedInCustomVrs() + { + var env = new TestEnvironment( + settings: new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( + versification: GetCustomVersification(@"-3JN 1:13") + ), + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + \v 13 + \v 14 + \v 15 + " + } + } + ); + IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); + Assert.That(mismatches, Has.Count.EqualTo(1), JsonSerializer.Serialize(mismatches)); + Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.ExtraVerse)); + } + private class TestEnvironment(ParatextProjectSettings? settings = null, Dictionary? files = null) { public ParatextProjectVersificationMismatchDetector Detector { get; } = @@ -206,4 +301,21 @@ public IReadOnlyList GetUsfmVersificationMismatches() return Detector.GetUsfmVersificationMismatches(); } } + + private static ScrVers GetCustomVersification(string customVrsContents, ScrVers? baseVersification = null) + { + baseVersification ??= ScrVers.English; + ScrVers customVersification = baseVersification; + using (var reader = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(customVrsContents)))) + { + customVersification = Versification.Table.Implementation.Load( + reader, + "custom.vrs", + baseVersification, + baseVersification.ToString() + "-" + customVrsContents.GetHashCode() + ); + } + Versification.Table.Implementation.RemoveAllUnknownVersifications(); + return customVersification; + } } From 1ebfe4cd42714bb9d06dee46dbe3e1940303048b Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Wed, 15 Oct 2025 15:27:34 -0400 Subject: [PATCH 09/17] Add additional tests; correct spelling of class names --- .../UsfmVersificationMismatchDetector.cs | 15 +--- ...extProjectVersificationMismatchDetector.cs | 86 +++++++++++++++++++ ...ParatextProjectQuoteConventionDetector.cs} | 0 ...extProjectQuoteConventionDetectorTests.cs} | 0 4 files changed, 87 insertions(+), 14 deletions(-) rename tests/SIL.Machine.Tests/PunctuationAnalysis/{MemoryParatextProjectQuoteConvetionDetector.cs => MemoryParatextProjectQuoteConventionDetector.cs} (100%) rename tests/SIL.Machine.Tests/PunctuationAnalysis/{ParatextProjectQuoteConvetionDetectorTests.cs => ParatextProjectQuoteConventionDetectorTests.cs} (100%) diff --git a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs index f50ad4e4..fd87a95d 100644 --- a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs @@ -52,7 +52,7 @@ public bool CheckMismatch() Type = UsfmVersificationMismatchType.MissingChapter; return true; } - if (_expectedVerse > _actualVerse) + if (_expectedVerse > _actualVerse && _expectedChapter == _actualChapter) { Type = UsfmVersificationMismatchType.MissingVerse; return true; @@ -162,7 +162,6 @@ public UsfmVersificationMismatchDetector(ScrVers versification) _errors = new List(); } - public bool HasError => _errors.Count > 0; public IReadOnlyList Errors => _errors; public override void EndUsfm(UsfmParserState state) @@ -183,18 +182,6 @@ public override void EndUsfm(UsfmParserState state) public override void StartBook(UsfmParserState state, string marker, string code) { - if (_currentBook > 0 && Canon.IsCanonical(_currentBook)) - { - var versificationMismatch = new UsfmVersificationMismatch( - _currentBook, - _versification.GetLastChapter(_currentBook), - _versification.GetLastVerse(_currentBook, _versification.GetLastChapter(_currentBook)), - _currentChapter, - _currentVerse.AllVerses().Last().VerseNum - ); - if (versificationMismatch.CheckMismatch()) - _errors.Add(versificationMismatch); - } _currentBook = state.VerseRef.BookNum; _currentChapter = 0; _currentVerse = new VerseRef(); diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetector.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetector.cs index b24e22d6..79d06f4d 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetector.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetector.cs @@ -291,6 +291,92 @@ public void GetUsfmVersificationMismatches_ExtraVerse_ExcludedInCustomVrs() Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.ExtraVerse)); } + [Test] + public void GetUsfmVersificationMismatches_MultipleBooks() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "642JNTest.SFM", + @"\id 2JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + " + }, + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + \v 13 + \v 14 + \v 15 + " + } + } + ); + IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); + Assert.That(mismatches, Has.Count.EqualTo(1), JsonSerializer.Serialize(mismatches)); + Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.MissingVerse)); + } + + [Test] + public void GetUsfmVersificationMismatches_MultipleChapters() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "642JNTest.SFM", + @"\id 2JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + \c 2 + \v 1 + " + } + } + ); + IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); + Assert.That(mismatches, Has.Count.EqualTo(2), JsonSerializer.Serialize(mismatches)); + Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.MissingVerse)); + Assert.That(mismatches[1].Type, Is.EqualTo(UsfmVersificationMismatchType.ExtraVerse)); + } + private class TestEnvironment(ParatextProjectSettings? settings = null, Dictionary? files = null) { public ParatextProjectVersificationMismatchDetector Detector { get; } = diff --git a/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConvetionDetector.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConventionDetector.cs similarity index 100% rename from tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConvetionDetector.cs rename to tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConventionDetector.cs diff --git a/tests/SIL.Machine.Tests/PunctuationAnalysis/ParatextProjectQuoteConvetionDetectorTests.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/ParatextProjectQuoteConventionDetectorTests.cs similarity index 100% rename from tests/SIL.Machine.Tests/PunctuationAnalysis/ParatextProjectQuoteConvetionDetectorTests.cs rename to tests/SIL.Machine.Tests/PunctuationAnalysis/ParatextProjectQuoteConventionDetectorTests.cs From 3ff1c87ffedb7d314be780d106eb063144e0756c Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Thu, 16 Oct 2025 16:26:58 -0400 Subject: [PATCH 10/17] Rename tests file with suffix "Tests" --- ...or.cs => ParatextProjectVersificationMismatchDetectorTests.cs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/SIL.Machine.Tests/Corpora/{ParatextProjectVersificationMismatchDetector.cs => ParatextProjectVersificationMismatchDetectorTests.cs} (100%) diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetector.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetectorTests.cs similarity index 100% rename from tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetector.cs rename to tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetectorTests.cs From 0165d14c4b2b5eb854203d81382e737db84f4888 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Wed, 29 Oct 2025 11:54:20 -0400 Subject: [PATCH 11/17] Use interface for settings parser --- .../Corpora/FileParatextProjectFileHandler.cs | 16 +++++-- .../FileParatextProjectSettingsParser.cs | 35 +------------- .../Corpora/FileParatextProjectTextUpdater.cs | 5 +- ...extProjectVersificationMismatchDetector.cs | 7 ++- .../Corpora/IParatextProjectFileHandler.cs | 5 +- .../ParatextProjectSettingsParserBase.cs | 26 +++++----- .../Corpora/ParatextProjectTermsParserBase.cs | 7 ++- .../Corpora/ParatextProjectTextUpdaterBase.cs | 7 ++- ...ojectVersificationMismatchDetectorBase.cs} | 17 ++++--- .../UsfmVersificationMismatchDetector.cs | 7 ++- .../Corpora/ZipParatextProjectFileHandler.cs | 48 +++++++++++++++++++ .../ZipParatextProjectSettingsParser.cs | 33 ++----------- .../ZipParatextProjectSettingsParserBase.cs | 46 ------------------ .../Corpora/ZipParatextProjectTermsParser.cs | 3 +- .../Corpora/ZipParatextProjectTextUpdater.cs | 3 +- ...extProjectVersificationMismatchDetector.cs | 5 +- .../ParatextProjectQuoteConventionDetector.cs | 7 ++- ...pParatextProjectQuoteConventionDetector.cs | 3 +- .../MemoryParatextProjectFileHandler.cs | 21 ++++---- .../MemoryParatextProjectTermsParser.cs | 5 +- ...extProjectVersificationMismatchDetector.cs | 6 ++- ...ojectVersificationMismatchDetectorTests.cs | 2 +- ...yParatextProjectQuoteConventionDetector.cs | 6 ++- 23 files changed, 155 insertions(+), 165 deletions(-) rename src/SIL.Machine/Corpora/{ParatextProjectVersificationMismatchDetector.cs => ParatextProjectVersificationMismatchDetectorBase.cs} (76%) delete mode 100644 src/SIL.Machine/Corpora/ZipParatextProjectSettingsParserBase.cs diff --git a/src/SIL.Machine/Corpora/FileParatextProjectFileHandler.cs b/src/SIL.Machine/Corpora/FileParatextProjectFileHandler.cs index 99601f8e..3d336e3f 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectFileHandler.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectFileHandler.cs @@ -1,16 +1,15 @@ using System.IO; +using System.Linq; namespace SIL.Machine.Corpora { public class FileParatextProjectFileHandler : IParatextProjectFileHandler { private readonly string _projectDir; - private readonly ParatextProjectSettings _settings; public FileParatextProjectFileHandler(string projectDir) { _projectDir = projectDir; - _settings = new FileParatextProjectSettingsParser(projectDir).Parse(); } public bool Exists(string fileName) @@ -23,9 +22,18 @@ public Stream Open(string fileName) return File.OpenRead(Path.Combine(_projectDir, fileName)); } - public ParatextProjectSettings GetSettings() + public UsfmStylesheet CreateStylesheet(string fileName) { - return _settings; + string customStylesheetFileName = Path.Combine(_projectDir, "custom.sty"); + return new UsfmStylesheet( + fileName, + File.Exists(customStylesheetFileName) ? customStylesheetFileName : null + ); + } + + public string Find(string extension) + { + return Directory.EnumerateFiles(_projectDir, "*" + extension).FirstOrDefault(); } } } diff --git a/src/SIL.Machine/Corpora/FileParatextProjectSettingsParser.cs b/src/SIL.Machine/Corpora/FileParatextProjectSettingsParser.cs index 996400a4..34860f2b 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectSettingsParser.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectSettingsParser.cs @@ -1,39 +1,8 @@ -using System.IO; -using System.Linq; - -namespace SIL.Machine.Corpora +namespace SIL.Machine.Corpora { public class FileParatextProjectSettingsParser : ParatextProjectSettingsParserBase { - private readonly string _projectDir; - public FileParatextProjectSettingsParser(string projectDir) - { - _projectDir = projectDir; - } - - protected override UsfmStylesheet CreateStylesheet(string fileName) - { - string customStylesheetFileName = Path.Combine(_projectDir, "custom.sty"); - return new UsfmStylesheet( - fileName, - File.Exists(customStylesheetFileName) ? customStylesheetFileName : null - ); - } - - protected override bool Exists(string fileName) - { - return File.Exists(Path.Combine(_projectDir, fileName)); - } - - protected override string Find(string extension) - { - return Directory.EnumerateFiles(_projectDir, "*" + extension).FirstOrDefault(); - } - - protected override Stream Open(string fileName) - { - return File.OpenRead(Path.Combine(_projectDir, fileName)); - } + : base(new FileParatextProjectFileHandler(projectDir)) { } } } diff --git a/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs b/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs index be24baa0..a6e19b34 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs @@ -3,6 +3,9 @@ public class FileParatextProjectTextUpdater : ParatextProjectTextUpdaterBase { public FileParatextProjectTextUpdater(string projectDir) - : base(new FileParatextProjectFileHandler(projectDir)) { } + : base( + new FileParatextProjectFileHandler(projectDir), + new FileParatextProjectSettingsParser(projectDir).Parse() + ) { } } } diff --git a/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs index 4c64d838..b0c269c6 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs @@ -1,8 +1,11 @@ namespace SIL.Machine.Corpora { - public class FileParatextProjectVersificationMismatchDetector : ParatextProjectVersificationMismatchDetector + public class FileParatextProjectVersificationMismatchDetector : ParatextProjectVersificationMismatchDetectorBase { public FileParatextProjectVersificationMismatchDetector(string projectDir) - : base(new FileParatextProjectFileHandler(projectDir)) { } + : base( + new FileParatextProjectFileHandler(projectDir), + new FileParatextProjectSettingsParser(projectDir).Parse() + ) { } } } diff --git a/src/SIL.Machine/Corpora/IParatextProjectFileHandler.cs b/src/SIL.Machine/Corpora/IParatextProjectFileHandler.cs index 610c554d..e0a5a335 100644 --- a/src/SIL.Machine/Corpora/IParatextProjectFileHandler.cs +++ b/src/SIL.Machine/Corpora/IParatextProjectFileHandler.cs @@ -6,6 +6,9 @@ public interface IParatextProjectFileHandler { bool Exists(string fileName); Stream Open(string fileName); - ParatextProjectSettings GetSettings(); + string Find(string extension); + UsfmStylesheet CreateStylesheet(string fileName); + + // ParatextProjectSettings GetSettings(); } } diff --git a/src/SIL.Machine/Corpora/ParatextProjectSettingsParserBase.cs b/src/SIL.Machine/Corpora/ParatextProjectSettingsParserBase.cs index b3431843..52b54cba 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectSettingsParserBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectSettingsParserBase.cs @@ -8,16 +8,23 @@ namespace SIL.Machine.Corpora { public abstract class ParatextProjectSettingsParserBase { + private readonly IParatextProjectFileHandler _paratextProjectFileHandler; + + public ParatextProjectSettingsParserBase(IParatextProjectFileHandler paratextProjectFileHandler) + { + _paratextProjectFileHandler = paratextProjectFileHandler; + } + public ParatextProjectSettings Parse() { Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); string settingsFileName = "Settings.xml"; - if (!Exists(settingsFileName)) - settingsFileName = Find(".ssf"); + if (!_paratextProjectFileHandler.Exists(settingsFileName)) + settingsFileName = _paratextProjectFileHandler.Find(".ssf"); if (string.IsNullOrEmpty(settingsFileName)) throw new InvalidOperationException("The project does not contain a settings file."); XDocument settingsDoc; - using (Stream stream = Open(settingsFileName)) + using (Stream stream = _paratextProjectFileHandler.Open(settingsFileName)) { settingsDoc = XDocument.Load(stream); } @@ -36,7 +43,7 @@ public ParatextProjectSettings Parse() var scrVersType = (int?)settingsDoc.Root.Element("Versification") ?? (int)ScrVersType.English; var versification = new ScrVers((ScrVersType)scrVersType); - if (Exists("custom.vrs")) + if (_paratextProjectFileHandler.Exists("custom.vrs")) { var guid = (string)settingsDoc.Root.Element("Guid"); string versName = ((ScrVersType)scrVersType).ToString() + "-" + guid; @@ -46,7 +53,7 @@ public ParatextProjectSettings Parse() } else { - using (var reader = new StreamReader(Open("custom.vrs"))) + using (var reader = new StreamReader(_paratextProjectFileHandler.Open("custom.vrs"))) { versification = Versification.Table.Implementation.Load( reader, @@ -60,9 +67,9 @@ public ParatextProjectSettings Parse() } var stylesheetFileName = (string)settingsDoc.Root.Element("StyleSheet") ?? "usfm.sty"; - if (!Exists(stylesheetFileName) && stylesheetFileName != "usfm_sb.sty") + if (!_paratextProjectFileHandler.Exists(stylesheetFileName) && stylesheetFileName != "usfm_sb.sty") stylesheetFileName = "usfm.sty"; - UsfmStylesheet stylesheet = CreateStylesheet(stylesheetFileName); + UsfmStylesheet stylesheet = _paratextProjectFileHandler.CreateStylesheet(stylesheetFileName); string prefix = ""; string form = "41MAT"; @@ -122,10 +129,5 @@ public ParatextProjectSettings Parse() languageCode ); } - - protected abstract bool Exists(string fileName); - protected abstract string Find(string extension); - protected abstract Stream Open(string fileName); - protected abstract UsfmStylesheet CreateStylesheet(string fileName); } } diff --git a/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs index 1a1ee524..6cae0da6 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs @@ -39,9 +39,12 @@ public abstract class ParatextProjectTermsParserBase private readonly ParatextProjectSettings _settings; private readonly IParatextProjectFileHandler _paratextProjectFileHandler; - protected ParatextProjectTermsParserBase(IParatextProjectFileHandler paratextProjectFileHandler) + protected ParatextProjectTermsParserBase( + IParatextProjectFileHandler paratextProjectFileHandler, + ParatextProjectSettings settings + ) { - _settings = paratextProjectFileHandler.GetSettings(); + _settings = settings; _paratextProjectFileHandler = paratextProjectFileHandler; } diff --git a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs index 5b77d244..5b0731c4 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs @@ -10,9 +10,12 @@ public abstract class ParatextProjectTextUpdaterBase private readonly ParatextProjectSettings _settings; private readonly IParatextProjectFileHandler _paratextProjectFileHandler; - protected ParatextProjectTextUpdaterBase(IParatextProjectFileHandler paratextProjectFileHandler) + protected ParatextProjectTextUpdaterBase( + IParatextProjectFileHandler paratextProjectFileHandler, + ParatextProjectSettings settings + ) { - _settings = paratextProjectFileHandler.GetSettings(); + _settings = settings; _paratextProjectFileHandler = paratextProjectFileHandler; } diff --git a/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetectorBase.cs similarity index 76% rename from src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetector.cs rename to src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetectorBase.cs index 25496886..9a51dcae 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetectorBase.cs @@ -5,14 +5,17 @@ namespace SIL.Machine.Corpora { - public abstract class ParatextProjectVersificationMismatchDetector + public abstract class ParatextProjectVersificationMismatchDetectorBase { private readonly ParatextProjectSettings _settings; private readonly IParatextProjectFileHandler _paratextProjectFileHandler; - protected ParatextProjectVersificationMismatchDetector(IParatextProjectFileHandler paratextProjectFileHandler) + protected ParatextProjectVersificationMismatchDetectorBase( + IParatextProjectFileHandler paratextProjectFileHandler, + ParatextProjectSettings settings + ) { - _settings = paratextProjectFileHandler.GetSettings(); + _settings = settings; _paratextProjectFileHandler = paratextProjectFileHandler; } @@ -23,11 +26,11 @@ public IReadOnlyList GetUsfmVersificationMismatches( handler = handler ?? new UsfmVersificationMismatchDetector(_settings.Versification); foreach (string fileName in _settings.GetAllScriptureBookFileNames()) { - if (!Exists(fileName)) + if (!_paratextProjectFileHandler.Exists(fileName)) continue; string usfm; - using (var reader = new StreamReader(Open(fileName))) + using (var reader = new StreamReader(_paratextProjectFileHandler.Open(fileName))) { usfm = reader.ReadToEnd(); } @@ -48,9 +51,5 @@ public IReadOnlyList GetUsfmVersificationMismatches( } return handler.Errors; } - - private bool Exists(string fileName) => _paratextProjectFileHandler.Exists(fileName); - - private Stream Open(string fileName) => _paratextProjectFileHandler.Open(fileName); } } diff --git a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs index fd87a95d..11ac3339 100644 --- a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs @@ -10,7 +10,7 @@ public enum UsfmVersificationMismatchType MissingChapter, MissingVerse, ExtraVerse, - InvalidVerseRange, //TODO This would be a nice thing to detect, but does it really fit into the UsfmVersificationMismatch category? + InvalidVerseRange, MissingVerseSegment, ExtraVerseSegment } @@ -100,7 +100,10 @@ public string ExpectedVerseRef { get { - VerseRef defaultVerseRef = new VerseRef(_bookNum, _expectedChapter, _expectedVerse); + if (!VerseRef.TryParse($"{_bookNum} {_expectedChapter}:{_expectedVerse}", out VerseRef defaultVerseRef)) + { + return ""; + } if (Type == UsfmVersificationMismatchType.ExtraVerse) return ""; if ( diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectFileHandler.cs b/src/SIL.Machine/Corpora/ZipParatextProjectFileHandler.cs index de0129c7..1d970f60 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectFileHandler.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectFileHandler.cs @@ -1,5 +1,7 @@ using System.IO; using System.IO.Compression; +using System.Linq; +using SIL.IO; namespace SIL.Machine.Corpora { @@ -31,5 +33,51 @@ public ParatextProjectSettings GetSettings() { return _settings; } + + public string Find(string extension) + { + ZipArchiveEntry entry = _archive.Entries.FirstOrDefault(e => e.FullName.EndsWith(extension)); + if (entry == null) + return null; + return entry.FullName; + } + + public UsfmStylesheet CreateStylesheet(string fileName) + { + TempFile stylesheetTempFile = null; + TempFile customStylesheetTempFile = null; + try + { + string stylesheetPath = fileName; + if (Exists(fileName)) + { + stylesheetTempFile = TempFile.CreateAndGetPathButDontMakeTheFile(); + using (Stream source = Open(fileName)) + using (Stream target = File.OpenWrite(stylesheetTempFile.Path)) + { + source.CopyTo(target); + } + stylesheetPath = stylesheetTempFile.Path; + } + + string customStylesheetPath = null; + if (Exists("custom.sty")) + { + customStylesheetTempFile = TempFile.CreateAndGetPathButDontMakeTheFile(); + using (Stream source = Open("custom.sty")) + using (Stream target = File.OpenWrite(customStylesheetTempFile.Path)) + { + source.CopyTo(target); + } + customStylesheetPath = customStylesheetTempFile.Path; + } + return new UsfmStylesheet(stylesheetPath, customStylesheetPath); + } + finally + { + stylesheetTempFile?.Dispose(); + customStylesheetTempFile?.Dispose(); + } + } } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParser.cs b/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParser.cs index bb2d6c1a..2521840f 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParser.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParser.cs @@ -1,37 +1,10 @@ -using System.IO; -using System.IO.Compression; -using System.Linq; +using System.IO.Compression; namespace SIL.Machine.Corpora { - public class ZipParatextProjectSettingsParser : ZipParatextProjectSettingsParserBase + public class ZipParatextProjectSettingsParser : ParatextProjectSettingsParserBase { - private readonly ZipArchive _archive; - public ZipParatextProjectSettingsParser(ZipArchive archive) - { - _archive = archive; - } - - protected override bool Exists(string fileName) - { - return _archive.GetEntry(fileName) != null; - } - - protected override string Find(string extension) - { - ZipArchiveEntry entry = _archive.Entries.FirstOrDefault(e => e.FullName.EndsWith(extension)); - if (entry == null) - return null; - return entry.FullName; - } - - protected override Stream Open(string fileName) - { - ZipArchiveEntry entry = _archive.GetEntry(fileName); - if (entry == null) - return null; - return entry.Open(); - } + : base(new ZipParatextProjectFileHandler(archive)) { } } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParserBase.cs b/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParserBase.cs deleted file mode 100644 index bb0c593f..00000000 --- a/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParserBase.cs +++ /dev/null @@ -1,46 +0,0 @@ -using System.IO; -using SIL.IO; - -namespace SIL.Machine.Corpora -{ - public abstract class ZipParatextProjectSettingsParserBase : ParatextProjectSettingsParserBase - { - protected override UsfmStylesheet CreateStylesheet(string fileName) - { - TempFile stylesheetTempFile = null; - TempFile customStylesheetTempFile = null; - try - { - string stylesheetPath = fileName; - if (Exists(fileName)) - { - stylesheetTempFile = TempFile.CreateAndGetPathButDontMakeTheFile(); - using (Stream source = Open(fileName)) - using (Stream target = File.OpenWrite(stylesheetTempFile.Path)) - { - source.CopyTo(target); - } - stylesheetPath = stylesheetTempFile.Path; - } - - string customStylesheetPath = null; - if (Exists("custom.sty")) - { - customStylesheetTempFile = TempFile.CreateAndGetPathButDontMakeTheFile(); - using (Stream source = Open("custom.sty")) - using (Stream target = File.OpenWrite(customStylesheetTempFile.Path)) - { - source.CopyTo(target); - } - customStylesheetPath = customStylesheetTempFile.Path; - } - return new UsfmStylesheet(stylesheetPath, customStylesheetPath); - } - finally - { - stylesheetTempFile?.Dispose(); - customStylesheetTempFile?.Dispose(); - } - } - } -} diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs b/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs index 5da62eb5..27406c3f 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs @@ -5,6 +5,7 @@ namespace SIL.Machine.Corpora public class ZipParatextProjectTermsParser : ParatextProjectTermsParserBase { public ZipParatextProjectTermsParser(ZipArchive archive) - : base(new ZipParatextProjectFileHandler(archive)) { } + : base(new ZipParatextProjectFileHandler(archive), new ZipParatextProjectSettingsParser(archive).Parse()) + { } } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs b/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs index 3a941dac..ebb2e017 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs @@ -5,6 +5,7 @@ namespace SIL.Machine.Corpora public class ZipParatextProjectTextUpdater : ParatextProjectTextUpdaterBase { public ZipParatextProjectTextUpdater(ZipArchive archive) - : base(new ZipParatextProjectFileHandler(archive)) { } + : base(new ZipParatextProjectFileHandler(archive), new ZipParatextProjectSettingsParser(archive).Parse()) + { } } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs index c2e7526a..efbb6413 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs @@ -2,9 +2,10 @@ namespace SIL.Machine.Corpora { - public class ZipParatextProjectVersificationMismatchDetector : ParatextProjectVersificationMismatchDetector + public class ZipParatextProjectVersificationMismatchDetector : ParatextProjectVersificationMismatchDetectorBase { public ZipParatextProjectVersificationMismatchDetector(ZipArchive archive) - : base(new ZipParatextProjectFileHandler(archive)) { } + : base(new ZipParatextProjectFileHandler(archive), new ZipParatextProjectSettingsParser(archive).Parse()) + { } } } diff --git a/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs b/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs index b1b9f648..a7821049 100644 --- a/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs +++ b/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs @@ -13,9 +13,12 @@ public abstract class ParatextProjectQuoteConventionDetector private readonly ParatextProjectSettings _settings; private readonly IParatextProjectFileHandler _paratextProjectFileHandler; - protected ParatextProjectQuoteConventionDetector(IParatextProjectFileHandler paratextProjectFileHandler) + protected ParatextProjectQuoteConventionDetector( + IParatextProjectFileHandler paratextProjectFileHandler, + ParatextProjectSettings settings + ) { - _settings = paratextProjectFileHandler.GetSettings(); + _settings = settings; _paratextProjectFileHandler = paratextProjectFileHandler; } diff --git a/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs b/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs index 980e7d3a..6d06d7c9 100644 --- a/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs +++ b/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs @@ -6,6 +6,7 @@ namespace SIL.Machine.PunctuationAnalysis public class ZipParatextProjectQuoteConventionDetector : ParatextProjectQuoteConventionDetector { public ZipParatextProjectQuoteConventionDetector(ZipArchive archive) - : base(new ZipParatextProjectFileHandler(archive)) { } + : base(new ZipParatextProjectFileHandler(archive), new ZipParatextProjectSettingsParser(archive).Parse()) + { } } } diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs index f5719a5e..df869d1e 100644 --- a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs @@ -3,19 +3,25 @@ namespace SIL.Machine.Corpora; -public class MemoryParatextProjectFileHandler( - IDictionary? files = null, - ParatextProjectSettings? settings = null -) : IParatextProjectFileHandler +public class MemoryParatextProjectFileHandler(IDictionary? files = null) : IParatextProjectFileHandler { public IDictionary Files { get; } = files ?? new Dictionary(); - private readonly ParatextProjectSettings _settings = settings ?? new DefaultParatextProjectSettings(); + + public UsfmStylesheet CreateStylesheet(string fileName) + { + throw new NotImplementedException(); + } public bool Exists(string fileName) { return Files.ContainsKey(fileName); } + public string Find(string extension) + { + throw new NotImplementedException(); + } + public Stream? Open(string fileName) { if (!Files.TryGetValue(fileName, out string? contents)) @@ -23,11 +29,6 @@ public bool Exists(string fileName) return new MemoryStream(Encoding.UTF8.GetBytes(contents)); } - public ParatextProjectSettings GetSettings() - { - return _settings; - } - public class DefaultParatextProjectSettings( string name = "Test", string fullName = "TestProject", diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs index b694eba6..a8c4c7c8 100644 --- a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs @@ -1,4 +1,7 @@ namespace SIL.Machine.Corpora; public class MemoryParatextProjectTermsParser(IDictionary? files, ParatextProjectSettings? settings) - : ParatextProjectTermsParserBase(new MemoryParatextProjectFileHandler(files, settings)) { } + : ParatextProjectTermsParserBase( + new MemoryParatextProjectFileHandler(files), + settings ?? new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings() + ) { } diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs index a09009fe..1aaca781 100644 --- a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs @@ -3,4 +3,8 @@ namespace SIL.Machine.Corpora; public class MemoryParatextProjectVersificationMismatchDetector( IDictionary? files = null, ParatextProjectSettings? settings = null -) : ParatextProjectVersificationMismatchDetector(new MemoryParatextProjectFileHandler(files, settings)) { } +) + : ParatextProjectVersificationMismatchDetectorBase( + new MemoryParatextProjectFileHandler(files), + settings ?? new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings() + ) { } diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetectorTests.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetectorTests.cs index 79d06f4d..b98c5095 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetectorTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetectorTests.cs @@ -379,7 +379,7 @@ public void GetUsfmVersificationMismatches_MultipleChapters() private class TestEnvironment(ParatextProjectSettings? settings = null, Dictionary? files = null) { - public ParatextProjectVersificationMismatchDetector Detector { get; } = + public ParatextProjectVersificationMismatchDetectorBase Detector { get; } = new MemoryParatextProjectVersificationMismatchDetector(files, settings); public IReadOnlyList GetUsfmVersificationMismatches() diff --git a/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConventionDetector.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConventionDetector.cs index e97becbf..6116b8f8 100644 --- a/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConventionDetector.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConventionDetector.cs @@ -5,4 +5,8 @@ namespace SIL.Machine.PunctuationAnalysis; public class MemoryParatextProjectQuoteConventionDetector( IDictionary? files, ParatextProjectSettings? settings -) : ParatextProjectQuoteConventionDetector(new MemoryParatextProjectFileHandler(files, settings)) { } +) + : ParatextProjectQuoteConventionDetector( + new MemoryParatextProjectFileHandler(files), + settings ?? new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings() + ) { } From 38311283807215729af39ec017811c7217df125e Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Wed, 29 Oct 2025 12:06:13 -0400 Subject: [PATCH 12/17] Remove GetSettings() --- src/SIL.Machine/Corpora/ZipParatextProjectFileHandler.cs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectFileHandler.cs b/src/SIL.Machine/Corpora/ZipParatextProjectFileHandler.cs index 1d970f60..00338dda 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectFileHandler.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectFileHandler.cs @@ -8,12 +8,10 @@ namespace SIL.Machine.Corpora public class ZipParatextProjectFileHandler : IParatextProjectFileHandler { private readonly ZipArchive _archive; - private readonly ParatextProjectSettings _settings; public ZipParatextProjectFileHandler(ZipArchive archive) { _archive = archive; - _settings = new ZipParatextProjectSettingsParser(archive).Parse(); } public bool Exists(string fileName) @@ -29,11 +27,6 @@ public Stream Open(string fileName) return entry.Open(); } - public ParatextProjectSettings GetSettings() - { - return _settings; - } - public string Find(string extension) { ZipArchiveEntry entry = _archive.Entries.FirstOrDefault(e => e.FullName.EndsWith(extension)); From eb8121300bdaf6d9c531abe5c390c45160343753 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 4 Nov 2025 09:23:52 -0500 Subject: [PATCH 13/17] Address reviewer comments --- .../Corpora/FileParatextProjectSettingsParser.cs | 5 +++++ .../Corpora/FileParatextProjectTextUpdater.cs | 2 +- ...leParatextProjectVersificationMismatchDetector.cs | 2 +- .../Corpora/IParatextProjectFileHandler.cs | 2 -- src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs | 2 +- .../Corpora/ParatextProjectTermsParserBase.cs | 12 ++++-------- .../Corpora/UsfmVersificationMismatchDetector.cs | 2 ++ .../Corpora/ZipParatextProjectSettingsParser.cs | 5 +++++ .../Corpora/ZipParatextProjectTermsParser.cs | 2 +- .../Corpora/ZipParatextProjectTextUpdater.cs | 2 +- ...ipParatextProjectVersificationMismatchDetector.cs | 2 +- .../ZipParatextProjectQuoteConventionDetector.cs | 2 +- 12 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/SIL.Machine/Corpora/FileParatextProjectSettingsParser.cs b/src/SIL.Machine/Corpora/FileParatextProjectSettingsParser.cs index 34860f2b..27a51b42 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectSettingsParser.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectSettingsParser.cs @@ -4,5 +4,10 @@ public class FileParatextProjectSettingsParser : ParatextProjectSettingsParserBa { public FileParatextProjectSettingsParser(string projectDir) : base(new FileParatextProjectFileHandler(projectDir)) { } + + public static ParatextProjectSettings Parse(string projectDir) + { + return new FileParatextProjectSettingsParser(projectDir).Parse(); + } } } diff --git a/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs b/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs index a6e19b34..ad55ad9d 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs @@ -5,7 +5,7 @@ public class FileParatextProjectTextUpdater : ParatextProjectTextUpdaterBase public FileParatextProjectTextUpdater(string projectDir) : base( new FileParatextProjectFileHandler(projectDir), - new FileParatextProjectSettingsParser(projectDir).Parse() + FileParatextProjectSettingsParser.Parse(projectDir) ) { } } } diff --git a/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs index b0c269c6..82b72cf8 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs @@ -5,7 +5,7 @@ public class FileParatextProjectVersificationMismatchDetector : ParatextProjectV public FileParatextProjectVersificationMismatchDetector(string projectDir) : base( new FileParatextProjectFileHandler(projectDir), - new FileParatextProjectSettingsParser(projectDir).Parse() + FileParatextProjectSettingsParser.Parse(projectDir) ) { } } } diff --git a/src/SIL.Machine/Corpora/IParatextProjectFileHandler.cs b/src/SIL.Machine/Corpora/IParatextProjectFileHandler.cs index e0a5a335..ac4b1a9c 100644 --- a/src/SIL.Machine/Corpora/IParatextProjectFileHandler.cs +++ b/src/SIL.Machine/Corpora/IParatextProjectFileHandler.cs @@ -8,7 +8,5 @@ public interface IParatextProjectFileHandler Stream Open(string fileName); string Find(string extension); UsfmStylesheet CreateStylesheet(string fileName); - - // ParatextProjectSettings GetSettings(); } } diff --git a/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs b/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs index d09b3409..60ce8800 100644 --- a/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs +++ b/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs @@ -19,7 +19,7 @@ public ParatextBackupTermsCorpus( .Parse(termCategories, useTermGlosses, chapters) .OrderBy(g => g.TermId); - ParatextProjectSettings settings = new ZipParatextProjectSettingsParser(archive).Parse(); + ParatextProjectSettings settings = ZipParatextProjectSettingsParser.Parse(archive); string textId = $"{settings.BiblicalTermsListType}:{settings.BiblicalTermsProjectName}:{settings.BiblicalTermsFileName}"; diff --git a/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs index 6cae0da6..15e76175 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs @@ -59,9 +59,9 @@ ParatextProjectSettings settings IDictionary> termIdToReferences; if (_settings.BiblicalTermsListType == "Project") { - if (Exists(_settings.BiblicalTermsFileName)) + if (_paratextProjectFileHandler.Exists(_settings.BiblicalTermsFileName)) { - using (Stream keyTermsFile = Open(_settings.BiblicalTermsFileName)) + using (Stream keyTermsFile = _paratextProjectFileHandler.Open(_settings.BiblicalTermsFileName)) { biblicalTermsDoc = XDocument.Load(keyTermsFile); termIdToCategoryDictionary = GetCategoryPerId(biblicalTermsDoc); @@ -115,9 +115,9 @@ ParatextProjectSettings settings } XDocument termRenderingsDoc = null; - if (Exists("TermRenderings.xml")) + if (_paratextProjectFileHandler.Exists("TermRenderings.xml")) { - using (Stream keyTermsFile = Open("TermRenderings.xml")) + using (Stream keyTermsFile = _paratextProjectFileHandler.Open("TermRenderings.xml")) { termRenderingsDoc = XDocument.Load(keyTermsFile); } @@ -298,9 +298,5 @@ private static IDictionary> GetReferences(XDo .ToImmutableHashSet() ); } - - private Stream Open(string fileName) => _paratextProjectFileHandler.Open(fileName); - - private bool Exists(string fileName) => _paratextProjectFileHandler.Exists(fileName); } } diff --git a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs index 11ac3339..c9a00188 100644 --- a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs @@ -100,6 +100,8 @@ public string ExpectedVerseRef { get { + // We do not want to throw an exception here, and the VerseRef constructor can throw + // an exception with certain invalid verse data; use TryParse instead. if (!VerseRef.TryParse($"{_bookNum} {_expectedChapter}:{_expectedVerse}", out VerseRef defaultVerseRef)) { return ""; diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParser.cs b/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParser.cs index 2521840f..aed8cfb3 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParser.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParser.cs @@ -6,5 +6,10 @@ public class ZipParatextProjectSettingsParser : ParatextProjectSettingsParserBas { public ZipParatextProjectSettingsParser(ZipArchive archive) : base(new ZipParatextProjectFileHandler(archive)) { } + + public static ParatextProjectSettings Parse(ZipArchive archive) + { + return new ZipParatextProjectSettingsParser(archive).Parse(); + } } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs b/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs index 27406c3f..f581a6ec 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs @@ -5,7 +5,7 @@ namespace SIL.Machine.Corpora public class ZipParatextProjectTermsParser : ParatextProjectTermsParserBase { public ZipParatextProjectTermsParser(ZipArchive archive) - : base(new ZipParatextProjectFileHandler(archive), new ZipParatextProjectSettingsParser(archive).Parse()) + : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) { } } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs b/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs index ebb2e017..77c5246f 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs @@ -5,7 +5,7 @@ namespace SIL.Machine.Corpora public class ZipParatextProjectTextUpdater : ParatextProjectTextUpdaterBase { public ZipParatextProjectTextUpdater(ZipArchive archive) - : base(new ZipParatextProjectFileHandler(archive), new ZipParatextProjectSettingsParser(archive).Parse()) + : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) { } } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs index efbb6413..5c0b6378 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs @@ -5,7 +5,7 @@ namespace SIL.Machine.Corpora public class ZipParatextProjectVersificationMismatchDetector : ParatextProjectVersificationMismatchDetectorBase { public ZipParatextProjectVersificationMismatchDetector(ZipArchive archive) - : base(new ZipParatextProjectFileHandler(archive), new ZipParatextProjectSettingsParser(archive).Parse()) + : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) { } } } diff --git a/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs b/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs index 6d06d7c9..a476b9d6 100644 --- a/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs +++ b/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs @@ -6,7 +6,7 @@ namespace SIL.Machine.PunctuationAnalysis public class ZipParatextProjectQuoteConventionDetector : ParatextProjectQuoteConventionDetector { public ZipParatextProjectQuoteConventionDetector(ZipArchive archive) - : base(new ZipParatextProjectFileHandler(archive), new ZipParatextProjectSettingsParser(archive).Parse()) + : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) { } } } From 04d630018c9fbafdc3c5cb4e92d1163ebfc12938 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 4 Nov 2025 09:36:59 -0500 Subject: [PATCH 14/17] Fix formatting --- src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs | 4 +--- .../FileParatextProjectVersificationMismatchDetector.cs | 4 +--- src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs | 3 +-- src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs | 3 +-- .../ZipParatextProjectVersificationMismatchDetector.cs | 3 +-- .../ZipParatextProjectQuoteConventionDetector.cs | 3 +-- 6 files changed, 6 insertions(+), 14 deletions(-) diff --git a/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs b/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs index ad55ad9d..467d8fb4 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs @@ -3,9 +3,7 @@ public class FileParatextProjectTextUpdater : ParatextProjectTextUpdaterBase { public FileParatextProjectTextUpdater(string projectDir) - : base( - new FileParatextProjectFileHandler(projectDir), - FileParatextProjectSettingsParser.Parse(projectDir) + : base(new FileParatextProjectFileHandler(projectDir), FileParatextProjectSettingsParser.Parse(projectDir) ) { } } } diff --git a/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs index 82b72cf8..692d4ce8 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs @@ -3,9 +3,7 @@ namespace SIL.Machine.Corpora public class FileParatextProjectVersificationMismatchDetector : ParatextProjectVersificationMismatchDetectorBase { public FileParatextProjectVersificationMismatchDetector(string projectDir) - : base( - new FileParatextProjectFileHandler(projectDir), - FileParatextProjectSettingsParser.Parse(projectDir) + : base(new FileParatextProjectFileHandler(projectDir), FileParatextProjectSettingsParser.Parse(projectDir) ) { } } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs b/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs index f581a6ec..55a9e6f2 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs @@ -5,7 +5,6 @@ namespace SIL.Machine.Corpora public class ZipParatextProjectTermsParser : ParatextProjectTermsParserBase { public ZipParatextProjectTermsParser(ZipArchive archive) - : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) - { } + : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) { } } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs b/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs index 77c5246f..4903a4db 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs @@ -5,7 +5,6 @@ namespace SIL.Machine.Corpora public class ZipParatextProjectTextUpdater : ParatextProjectTextUpdaterBase { public ZipParatextProjectTextUpdater(ZipArchive archive) - : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) - { } + : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) { } } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs index 5c0b6378..4f9e4c39 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs @@ -5,7 +5,6 @@ namespace SIL.Machine.Corpora public class ZipParatextProjectVersificationMismatchDetector : ParatextProjectVersificationMismatchDetectorBase { public ZipParatextProjectVersificationMismatchDetector(ZipArchive archive) - : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) - { } + : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) { } } } diff --git a/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs b/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs index a476b9d6..1df1db84 100644 --- a/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs +++ b/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs @@ -6,7 +6,6 @@ namespace SIL.Machine.PunctuationAnalysis public class ZipParatextProjectQuoteConventionDetector : ParatextProjectQuoteConventionDetector { public ZipParatextProjectQuoteConventionDetector(ZipArchive archive) - : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) - { } + : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) { } } } From a5788a0f5504ee18008d4efc48f9b01303a911fa Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 4 Nov 2025 09:42:09 -0500 Subject: [PATCH 15/17] More formatting fixes --- src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs | 4 ++-- .../FileParatextProjectVersificationMismatchDetector.cs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs b/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs index 467d8fb4..3ba134d8 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs @@ -3,7 +3,7 @@ public class FileParatextProjectTextUpdater : ParatextProjectTextUpdaterBase { public FileParatextProjectTextUpdater(string projectDir) - : base(new FileParatextProjectFileHandler(projectDir), FileParatextProjectSettingsParser.Parse(projectDir) - ) { } + : base(new FileParatextProjectFileHandler(projectDir), FileParatextProjectSettingsParser.Parse(projectDir)) + { } } } diff --git a/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs index 692d4ce8..204760ab 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs @@ -3,7 +3,7 @@ namespace SIL.Machine.Corpora public class FileParatextProjectVersificationMismatchDetector : ParatextProjectVersificationMismatchDetectorBase { public FileParatextProjectVersificationMismatchDetector(string projectDir) - : base(new FileParatextProjectFileHandler(projectDir), FileParatextProjectSettingsParser.Parse(projectDir) - ) { } + : base(new FileParatextProjectFileHandler(projectDir), FileParatextProjectSettingsParser.Parse(projectDir)) + { } } } From 98da0d592f3f9495dc9dc4dc78ed0c09e823f533 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 4 Nov 2025 09:52:21 -0500 Subject: [PATCH 16/17] Fix formatting --- src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs | 2 +- .../Corpora/FileParatextProjectVersificationMismatchDetector.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs b/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs index 3ba134d8..6389b76a 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs @@ -4,6 +4,6 @@ public class FileParatextProjectTextUpdater : ParatextProjectTextUpdaterBase { public FileParatextProjectTextUpdater(string projectDir) : base(new FileParatextProjectFileHandler(projectDir), FileParatextProjectSettingsParser.Parse(projectDir)) - { } + { } } } diff --git a/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs index 204760ab..5abd8e15 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs @@ -4,6 +4,6 @@ public class FileParatextProjectVersificationMismatchDetector : ParatextProjectV { public FileParatextProjectVersificationMismatchDetector(string projectDir) : base(new FileParatextProjectFileHandler(projectDir), FileParatextProjectSettingsParser.Parse(projectDir)) - { } + { } } } From 90b5707807eaf65127c02cffe9f88915b094dae3 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 4 Nov 2025 17:16:51 -0500 Subject: [PATCH 17/17] Make name more generic: mismatch -> error --- ...ratextProjectVersificationErrorDetector.cs | 9 ++ ...extProjectVersificationMismatchDetector.cs | 9 -- ...tProjectVersificationErrorDetectorBase.cs} | 10 +- ...r.cs => UsfmVersificationErrorDetector.cs} | 60 ++++++------ ...ratextProjectVersificationErrorDetector.cs | 10 ++ ...extProjectVersificationMismatchDetector.cs | 10 -- ...atextProjectVersificationErrorDetector.cs} | 4 +- ...ParatextProjectVersificationErrorTests.cs} | 94 +++++++++---------- .../Corpora/UsfmManualTests.cs | 8 +- 9 files changed, 107 insertions(+), 107 deletions(-) create mode 100644 src/SIL.Machine/Corpora/FileParatextProjectVersificationErrorDetector.cs delete mode 100644 src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs rename src/SIL.Machine/Corpora/{ParatextProjectVersificationMismatchDetectorBase.cs => ParatextProjectVersificationErrorDetectorBase.cs} (80%) rename src/SIL.Machine/Corpora/{UsfmVersificationMismatchDetector.cs => UsfmVersificationErrorDetector.cs} (78%) create mode 100644 src/SIL.Machine/Corpora/ZipParatextProjectVersificationErrorDetector.cs delete mode 100644 src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs rename tests/SIL.Machine.Tests/Corpora/{MemoryParatextProjectVersificationMismatchDetector.cs => MemoryParatextProjectVersificationErrorDetector.cs} (69%) rename tests/SIL.Machine.Tests/Corpora/{ParatextProjectVersificationMismatchDetectorTests.cs => ParatextProjectVersificationErrorTests.cs} (62%) diff --git a/src/SIL.Machine/Corpora/FileParatextProjectVersificationErrorDetector.cs b/src/SIL.Machine/Corpora/FileParatextProjectVersificationErrorDetector.cs new file mode 100644 index 00000000..17e1e8aa --- /dev/null +++ b/src/SIL.Machine/Corpora/FileParatextProjectVersificationErrorDetector.cs @@ -0,0 +1,9 @@ +namespace SIL.Machine.Corpora +{ + public class FileParatextProjectVersificationErrorDetector : ParatextProjectVersificationErrorDetectorBase + { + public FileParatextProjectVersificationErrorDetector(string projectDir) + : base(new FileParatextProjectFileHandler(projectDir), FileParatextProjectSettingsParser.Parse(projectDir)) + { } + } +} diff --git a/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs deleted file mode 100644 index 5abd8e15..00000000 --- a/src/SIL.Machine/Corpora/FileParatextProjectVersificationMismatchDetector.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace SIL.Machine.Corpora -{ - public class FileParatextProjectVersificationMismatchDetector : ParatextProjectVersificationMismatchDetectorBase - { - public FileParatextProjectVersificationMismatchDetector(string projectDir) - : base(new FileParatextProjectFileHandler(projectDir), FileParatextProjectSettingsParser.Parse(projectDir)) - { } - } -} diff --git a/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetectorBase.cs b/src/SIL.Machine/Corpora/ParatextProjectVersificationErrorDetectorBase.cs similarity index 80% rename from src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetectorBase.cs rename to src/SIL.Machine/Corpora/ParatextProjectVersificationErrorDetectorBase.cs index 9a51dcae..4faa7861 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectVersificationMismatchDetectorBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectVersificationErrorDetectorBase.cs @@ -5,12 +5,12 @@ namespace SIL.Machine.Corpora { - public abstract class ParatextProjectVersificationMismatchDetectorBase + public abstract class ParatextProjectVersificationErrorDetectorBase { private readonly ParatextProjectSettings _settings; private readonly IParatextProjectFileHandler _paratextProjectFileHandler; - protected ParatextProjectVersificationMismatchDetectorBase( + protected ParatextProjectVersificationErrorDetectorBase( IParatextProjectFileHandler paratextProjectFileHandler, ParatextProjectSettings settings ) @@ -19,11 +19,11 @@ ParatextProjectSettings settings _paratextProjectFileHandler = paratextProjectFileHandler; } - public IReadOnlyList GetUsfmVersificationMismatches( - UsfmVersificationMismatchDetector handler = null + public IReadOnlyList GetUsfmVersificationErrors( + UsfmVersificationErrorDetector handler = null ) { - handler = handler ?? new UsfmVersificationMismatchDetector(_settings.Versification); + handler = handler ?? new UsfmVersificationErrorDetector(_settings.Versification); foreach (string fileName in _settings.GetAllScriptureBookFileNames()) { if (!_paratextProjectFileHandler.Exists(fileName)) diff --git a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs similarity index 78% rename from src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs rename to src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs index c9a00188..4349b91f 100644 --- a/src/SIL.Machine/Corpora/UsfmVersificationMismatchDetector.cs +++ b/src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs @@ -5,7 +5,7 @@ namespace SIL.Machine.Corpora { - public enum UsfmVersificationMismatchType + public enum UsfmVersificationErrorType { MissingChapter, MissingVerse, @@ -15,7 +15,7 @@ public enum UsfmVersificationMismatchType ExtraVerseSegment } - public class UsfmVersificationMismatch + public class UsfmVersificationError { private readonly int _bookNum; private readonly int _expectedChapter; @@ -24,7 +24,7 @@ public class UsfmVersificationMismatch private readonly int _actualVerse; private VerseRef? _verseRef = null; - public UsfmVersificationMismatch( + public UsfmVersificationError( int bookNum, int expectedChapter, int expectedVerse, @@ -41,32 +41,32 @@ public UsfmVersificationMismatch( _verseRef = verseRef; } - public UsfmVersificationMismatchType Type { get; private set; } + public UsfmVersificationErrorType Type { get; private set; } - // Returns true if there is a mismatch - public bool CheckMismatch() + // Returns true if there is an error + public bool CheckError() { //A non-empty chapter is expected if (_expectedChapter > _actualChapter && _expectedVerse != 0) { - Type = UsfmVersificationMismatchType.MissingChapter; + Type = UsfmVersificationErrorType.MissingChapter; return true; } if (_expectedVerse > _actualVerse && _expectedChapter == _actualChapter) { - Type = UsfmVersificationMismatchType.MissingVerse; + Type = UsfmVersificationErrorType.MissingVerse; return true; } if (_verseRef != null) { if (string.IsNullOrEmpty(_verseRef.Value.Segment()) && _verseRef.Value.HasSegmentsDefined) { - Type = UsfmVersificationMismatchType.MissingVerseSegment; + Type = UsfmVersificationErrorType.MissingVerseSegment; return true; } if (!string.IsNullOrEmpty(_verseRef.Value.Segment()) && !_verseRef.Value.HasSegmentsDefined) { - Type = UsfmVersificationMismatchType.ExtraVerseSegment; + Type = UsfmVersificationErrorType.ExtraVerseSegment; return true; } if (!_verseRef.Value.Valid) @@ -78,15 +78,15 @@ public bool CheckMismatch() return false; } - private static UsfmVersificationMismatchType Map(VerseRef.ValidStatusType validStatus) + private static UsfmVersificationErrorType Map(VerseRef.ValidStatusType validStatus) { switch (validStatus) { case VerseRef.ValidStatusType.OutOfRange: - return UsfmVersificationMismatchType.ExtraVerse; + return UsfmVersificationErrorType.ExtraVerse; case VerseRef.ValidStatusType.VerseRepeated: case VerseRef.ValidStatusType.VerseOutOfOrder: - return UsfmVersificationMismatchType.InvalidVerseRange; + return UsfmVersificationErrorType.InvalidVerseRange; default: throw new InvalidEnumArgumentException( nameof(validStatus), @@ -106,10 +106,10 @@ public string ExpectedVerseRef { return ""; } - if (Type == UsfmVersificationMismatchType.ExtraVerse) + if (Type == UsfmVersificationErrorType.ExtraVerse) return ""; if ( - Type == UsfmVersificationMismatchType.MissingVerseSegment + Type == UsfmVersificationErrorType.MissingVerseSegment && VerseRef.TryParse( $"{defaultVerseRef.Book} {defaultVerseRef.Chapter}:{defaultVerseRef.Verse}a", out VerseRef verseWithSegment @@ -118,7 +118,7 @@ out VerseRef verseWithSegment { return verseWithSegment.ToString(); } - if (Type == UsfmVersificationMismatchType.InvalidVerseRange) + if (Type == UsfmVersificationErrorType.InvalidVerseRange) { List sortedAllUniqueVerses = _verseRef .Value.AllVerses() @@ -150,38 +150,38 @@ out VerseRef correctedVerseRangeRef : new VerseRef(_bookNum, _actualChapter, _actualVerse).ToString(); } - public class UsfmVersificationMismatchDetector : UsfmParserHandlerBase + public class UsfmVersificationErrorDetector : UsfmParserHandlerBase { private readonly ScrVers _versification; private int _currentBook; private int _currentChapter; private VerseRef _currentVerse; - private readonly List _errors; + private readonly List _errors; - public UsfmVersificationMismatchDetector(ScrVers versification) + public UsfmVersificationErrorDetector(ScrVers versification) { _versification = versification; _currentBook = 0; _currentChapter = 0; _currentVerse = new VerseRef(); - _errors = new List(); + _errors = new List(); } - public IReadOnlyList Errors => _errors; + public IReadOnlyList Errors => _errors; public override void EndUsfm(UsfmParserState state) { if (_currentBook > 0 && Canon.IsCanonical(_currentBook)) { - var versificationMismatch = new UsfmVersificationMismatch( + var versificationError = new UsfmVersificationError( _currentBook, _versification.GetLastChapter(_currentBook), _versification.GetLastVerse(_currentBook, _versification.GetLastChapter(_currentBook)), _currentChapter, _currentVerse.AllVerses().Last().VerseNum ); - if (versificationMismatch.CheckMismatch()) - _errors.Add(versificationMismatch); + if (versificationError.CheckError()) + _errors.Add(versificationError); } } @@ -202,15 +202,15 @@ string pubNumber { if (_currentBook > 0 && Canon.IsCanonical(_currentBook) && _currentChapter > 0) { - var versificationMismatch = new UsfmVersificationMismatch( + var versificationError = new UsfmVersificationError( _currentBook, _currentChapter, _versification.GetLastVerse(_currentBook, _currentChapter), _currentChapter, _currentVerse.AllVerses().Last().VerseNum ); - if (versificationMismatch.CheckMismatch()) - _errors.Add(versificationMismatch); + if (versificationError.CheckError()) + _errors.Add(versificationError); } _currentChapter = state.VerseRef.ChapterNum; @@ -228,7 +228,7 @@ string pubNumber _currentVerse = state.VerseRef; if (_currentBook > 0 && Canon.IsCanonical(_currentBook) && _currentChapter > 0) { - var versificationMismatch = new UsfmVersificationMismatch( + var versificationError = new UsfmVersificationError( _currentBook, _currentChapter, _currentVerse.AllVerses().Last().VerseNum, @@ -236,8 +236,8 @@ string pubNumber _currentVerse.AllVerses().Last().VerseNum, _currentVerse ); - if (versificationMismatch.CheckMismatch()) - _errors.Add(versificationMismatch); + if (versificationError.CheckError()) + _errors.Add(versificationError); } } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectVersificationErrorDetector.cs b/src/SIL.Machine/Corpora/ZipParatextProjectVersificationErrorDetector.cs new file mode 100644 index 00000000..71139836 --- /dev/null +++ b/src/SIL.Machine/Corpora/ZipParatextProjectVersificationErrorDetector.cs @@ -0,0 +1,10 @@ +using System.IO.Compression; + +namespace SIL.Machine.Corpora +{ + public class ZipParatextProjectVersificationErrorDetector : ParatextProjectVersificationErrorDetectorBase + { + public ZipParatextProjectVersificationErrorDetector(ZipArchive archive) + : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) { } + } +} diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs b/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs deleted file mode 100644 index 4f9e4c39..00000000 --- a/src/SIL.Machine/Corpora/ZipParatextProjectVersificationMismatchDetector.cs +++ /dev/null @@ -1,10 +0,0 @@ -using System.IO.Compression; - -namespace SIL.Machine.Corpora -{ - public class ZipParatextProjectVersificationMismatchDetector : ParatextProjectVersificationMismatchDetectorBase - { - public ZipParatextProjectVersificationMismatchDetector(ZipArchive archive) - : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) { } - } -} diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationErrorDetector.cs similarity index 69% rename from tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs rename to tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationErrorDetector.cs index 1aaca781..d8f00008 100644 --- a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationMismatchDetector.cs +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationErrorDetector.cs @@ -1,10 +1,10 @@ namespace SIL.Machine.Corpora; -public class MemoryParatextProjectVersificationMismatchDetector( +public class MemoryParatextProjectVersificationErrorDetector( IDictionary? files = null, ParatextProjectSettings? settings = null ) - : ParatextProjectVersificationMismatchDetectorBase( + : ParatextProjectVersificationErrorDetectorBase( new MemoryParatextProjectFileHandler(files), settings ?? new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings() ) { } diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetectorTests.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs similarity index 62% rename from tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetectorTests.cs rename to tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs index b98c5095..5b4d581f 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationMismatchDetectorTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs @@ -9,7 +9,7 @@ namespace SIL.Machine.Corpora; public class ParatextProjectQuoteConventionDetectorTests { [Test] - public void GetUsfmVersificationMismatches_NoMismatches() + public void GetUsfmVersificationErrors_Noerrors() { var env = new TestEnvironment( files: new Dictionary() @@ -38,14 +38,14 @@ public void GetUsfmVersificationMismatches_NoMismatches() } ); Assert.That( - env.GetUsfmVersificationMismatches(), + env.GetUsfmVersificationErrors(), Has.Count.EqualTo(0), - JsonSerializer.Serialize(env.GetUsfmVersificationMismatches()) + JsonSerializer.Serialize(env.GetUsfmVersificationErrors()) ); } [Test] - public void GetUsfmVersificationMismatches_MissingVerse() + public void GetUsfmVersificationErrors_MissingVerse() { var env = new TestEnvironment( files: new Dictionary() @@ -72,13 +72,13 @@ public void GetUsfmVersificationMismatches_MissingVerse() } } ); - IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); - Assert.That(mismatches, Has.Count.EqualTo(1), JsonSerializer.Serialize(mismatches)); - Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.MissingVerse)); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse)); } [Test] - public void GetUsfmVersificationMismatches_MissingChapter() + public void GetUsfmVersificationErrors_MissingChapter() { var env = new TestEnvironment( files: new Dictionary() @@ -90,13 +90,13 @@ public void GetUsfmVersificationMismatches_MissingChapter() } } ); - IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); - Assert.That(mismatches, Has.Count.EqualTo(1), JsonSerializer.Serialize(mismatches)); - Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.MissingChapter)); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingChapter)); } [Test] - public void GetUsfmVersificationMismatches_ExtraVerse() + public void GetUsfmVersificationErrors_ExtraVerse() { var env = new TestEnvironment( files: new Dictionary() @@ -125,13 +125,13 @@ public void GetUsfmVersificationMismatches_ExtraVerse() } } ); - IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); - Assert.That(mismatches, Has.Count.EqualTo(1), JsonSerializer.Serialize(mismatches)); - Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.ExtraVerse)); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerse)); } [Test] - public void GetUsfmVersificationMismatches_InvalidVerse() + public void GetUsfmVersificationErrors_InvalidVerse() { var env = new TestEnvironment( files: new Dictionary() @@ -158,13 +158,13 @@ public void GetUsfmVersificationMismatches_InvalidVerse() } } ); - IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); - Assert.That(mismatches, Has.Count.EqualTo(1), JsonSerializer.Serialize(mismatches)); - Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.InvalidVerseRange)); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.InvalidVerseRange)); } [Test] - public void GetUsfmVersificationMismatches_ExtraVerseSegment() + public void GetUsfmVersificationErrors_ExtraVerseSegment() { var env = new TestEnvironment( files: new Dictionary() @@ -193,13 +193,13 @@ public void GetUsfmVersificationMismatches_ExtraVerseSegment() } } ); - IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); - Assert.That(mismatches, Has.Count.EqualTo(2), JsonSerializer.Serialize(mismatches)); - Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.ExtraVerseSegment)); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(2), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerseSegment)); } [Test] - public void GetUsfmVersificationMismatches_MissingVerseSegment() + public void GetUsfmVersificationErrors_MissingVerseSegment() { var env = new TestEnvironment( settings: new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( @@ -230,13 +230,13 @@ public void GetUsfmVersificationMismatches_MissingVerseSegment() } } ); - IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); - Assert.That(mismatches, Has.Count.EqualTo(1), JsonSerializer.Serialize(mismatches)); - Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.MissingVerseSegment)); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerseSegment)); } [Test] - public void GetUsfmVersificationMismatches_IgnoreNonCanonicals() + public void GetUsfmVersificationErrors_IgnoreNonCanonicals() { var env = new TestEnvironment( files: new Dictionary() @@ -250,12 +250,12 @@ public void GetUsfmVersificationMismatches_IgnoreNonCanonicals() } } ); - IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); - Assert.That(mismatches, Has.Count.EqualTo(0), JsonSerializer.Serialize(mismatches)); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(0), JsonSerializer.Serialize(errors)); } [Test] - public void GetUsfmVersificationMismatches_ExtraVerse_ExcludedInCustomVrs() + public void GetUsfmVersificationErrors_ExtraVerse_ExcludedInCustomVrs() { var env = new TestEnvironment( settings: new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( @@ -286,13 +286,13 @@ public void GetUsfmVersificationMismatches_ExtraVerse_ExcludedInCustomVrs() } } ); - IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); - Assert.That(mismatches, Has.Count.EqualTo(1), JsonSerializer.Serialize(mismatches)); - Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.ExtraVerse)); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerse)); } [Test] - public void GetUsfmVersificationMismatches_MultipleBooks() + public void GetUsfmVersificationErrors_MultipleBooks() { var env = new TestEnvironment( files: new Dictionary() @@ -338,13 +338,13 @@ public void GetUsfmVersificationMismatches_MultipleBooks() } } ); - IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); - Assert.That(mismatches, Has.Count.EqualTo(1), JsonSerializer.Serialize(mismatches)); - Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.MissingVerse)); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse)); } [Test] - public void GetUsfmVersificationMismatches_MultipleChapters() + public void GetUsfmVersificationErrors_MultipleChapters() { var env = new TestEnvironment( files: new Dictionary() @@ -371,20 +371,20 @@ public void GetUsfmVersificationMismatches_MultipleChapters() } } ); - IReadOnlyList mismatches = env.GetUsfmVersificationMismatches(); - Assert.That(mismatches, Has.Count.EqualTo(2), JsonSerializer.Serialize(mismatches)); - Assert.That(mismatches[0].Type, Is.EqualTo(UsfmVersificationMismatchType.MissingVerse)); - Assert.That(mismatches[1].Type, Is.EqualTo(UsfmVersificationMismatchType.ExtraVerse)); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(2), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse)); + Assert.That(errors[1].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerse)); } private class TestEnvironment(ParatextProjectSettings? settings = null, Dictionary? files = null) { - public ParatextProjectVersificationMismatchDetectorBase Detector { get; } = - new MemoryParatextProjectVersificationMismatchDetector(files, settings); + public ParatextProjectVersificationErrorDetectorBase Detector { get; } = + new MemoryParatextProjectVersificationErrorDetector(files, settings); - public IReadOnlyList GetUsfmVersificationMismatches() + public IReadOnlyList GetUsfmVersificationErrors() { - return Detector.GetUsfmVersificationMismatches(); + return Detector.GetUsfmVersificationErrors(); } } diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs index 29afbbf9..96966bfd 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs @@ -85,13 +85,13 @@ public void AnalyzeCorporaQuoteConventions() public void ValidateUsfmVersification() { using ZipArchive zipArchive = ZipFile.OpenRead(CorporaTestHelpers.UsfmSourceProjectZipPath); - var quoteConventionDetector = new ZipParatextProjectVersificationMismatchDetector(zipArchive); - IReadOnlyList mismatches = quoteConventionDetector.GetUsfmVersificationMismatches(); + var versificationErrorDetector = new ZipParatextProjectVersificationErrorDetector(zipArchive); + IReadOnlyList errors = versificationErrorDetector.GetUsfmVersificationErrors(); Assert.That( - mismatches, + errors, Has.Count.EqualTo(0), - JsonSerializer.Serialize(mismatches, new JsonSerializerOptions { WriteIndented = true }) + JsonSerializer.Serialize(errors, new JsonSerializerOptions { WriteIndented = true }) ); } }