From d97a62354310dc957239239ac0fc66ec7921e630 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 7 Oct 2025 15:08:06 -0400 Subject: [PATCH 1/2] Remove 'pretranslation' usfm manual test; remove language of pretranslations --- .../Corpora/UsfmManualTests.cs | 123 +----------------- 1 file changed, 3 insertions(+), 120 deletions(-) diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs index e02ba432..e2eefd4f 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs @@ -1,5 +1,4 @@ using System.IO.Compression; -using System.Text.Json; using NUnit.Framework; using SIL.Machine.PunctuationAnalysis; @@ -28,8 +27,8 @@ public void ParseParallelCorpusAsync() List rows = pCorpus.GetRows().ToList(); Assert.That(rows, Has.Count.GreaterThan(0)); - // insert the source into the target as pretranslations to make sure that USFM generation works - IReadOnlyList pretranslations = rows.Select(r => new UpdateUsfmRow( + // insert the source into the target as update rows to make sure that USFM generation works + IReadOnlyList updateRows = rows.Select(r => new UpdateUsfmRow( (IReadOnlyList)r.SourceRefs.Select(s => (ScriptureRef)s).ToList(), r.SourceText )) @@ -51,127 +50,11 @@ string sfmFileName in Directory string bookId; if (!targetSettings.IsBookFileName(sfmFileName, out bookId)) continue; - string newUsfm = updater.UpdateUsfm( - bookId, - pretranslations, - textBehavior: UpdateUsfmTextBehavior.StripExisting - ); + string newUsfm = updater.UpdateUsfm(bookId, updateRows, textBehavior: UpdateUsfmTextBehavior.StripExisting); Assert.That(newUsfm, Is.Not.Null); } } - public record PretranslationDto - { - public required string TextId { get; init; } - public required IReadOnlyList Refs { get; init; } - public required string Translation { get; init; } - } - - public static readonly string PretranslationPath = Path.Combine( - CorporaTestHelpers.TestDataPath, - "pretranslations.json" - ); - public static readonly string ParatextProjectPath = Path.Combine(CorporaTestHelpers.TestDataPath, "project"); - - [Test] - [Ignore("This is for manual testing only. Remove this tag to run the test.")] - /* - In order to run this test on specific projects, place the Paratext projects or Paratext project zips in the Corpora/TestData/project/ folder. - If only testing one project, you can instead place the project in the Corpora/TestData/ folder and rename it to "project" - */ - public async Task CreateUsfmFile() - { - async Task GetUsfmAsync(string projectPath) - { - ParatextProjectSettingsParserBase parser; - ZipArchive? projectArchive = null; - try - { - projectArchive = ZipFile.Open(projectPath, ZipArchiveMode.Read); - parser = new ZipParatextProjectSettingsParser(projectArchive); - } - catch (UnauthorizedAccessException) - { - parser = new FileParatextProjectSettingsParser(projectPath); - } - ParatextProjectSettings settings = parser.Parse(); - - // Read text from pretranslations file - using Stream pretranslationStream = File.OpenRead(PretranslationPath); - UpdateUsfmRow[] pretranslations = await JsonSerializer - .DeserializeAsyncEnumerable( - pretranslationStream, - new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase } - ) - .Select(p => new UpdateUsfmRow( - (IReadOnlyList)( - p?.Refs.Select(r => ScriptureRef.Parse(r, settings.Versification).ToRelaxed()).ToArray() ?? [] - ), - p?.Translation ?? "" - )) - .ToArrayAsync(); - List bookIds = []; - ParatextProjectTextUpdaterBase updater; - if (projectArchive == null) - { - bookIds = ( - Directory - .EnumerateFiles(projectPath, $"{settings.FileNamePrefix}*{settings.FileNameSuffix}") - .Select(path => new DirectoryInfo(path).Name) - .Select(filename => - { - string bookId; - if (settings.IsBookFileName(filename, out bookId)) - return bookId; - else - return ""; - }) - .Where(id => id != "") - ).ToList(); - updater = new FileParatextProjectTextUpdater(projectPath); - } - else - { - bookIds = projectArchive - .Entries.Where(e => - e.Name.StartsWith(settings.FileNamePrefix) && e.Name.EndsWith(settings.FileNameSuffix) - ) - .Select(e => - { - string bookId; - if (settings.IsBookFileName(e.Name, out bookId)) - return bookId; - else - return ""; - }) - .Where(id => id != "") - .ToList(); - updater = new ZipParatextProjectTextUpdater(projectArchive); - } - foreach (string bookId in bookIds) - { - string newUsfm = updater.UpdateUsfm( - bookId, - pretranslations, - textBehavior: UpdateUsfmTextBehavior.StripExisting - ); - Assert.That(newUsfm, Is.Not.Null); - } - } - if (!File.Exists(Path.Combine(ParatextProjectPath, "Settings.xml"))) - { - Assert.Multiple(() => - { - foreach (string subdir in Directory.EnumerateFiles(ParatextProjectPath)) - Assert.DoesNotThrowAsync(async () => await GetUsfmAsync(subdir), $"Failed to parse {subdir}"); - }); - } - else - { - await GetUsfmAsync(ParatextProjectPath); - } - } - [Test] [Ignore("This is for manual testing only. Remove this tag to run the test.")] public void AnalyzeCorporaQuoteConventions() From 2d09e642095c3a2c831420fe81aa864c0082255a Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 7 Oct 2025 15:10:26 -0400 Subject: [PATCH 2/2] Remove references to pretranslations --- .../Corpora/CorporaTestHelpers.cs | 1 - .../PlaceMarkersUsfmUpdateBlockHandlerTests.cs | 16 ++++++++-------- .../Corpora/UpdateUsfmParserHandlerTests.cs | 12 ++++++------ 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/tests/SIL.Machine.Tests/Corpora/CorporaTestHelpers.cs b/tests/SIL.Machine.Tests/Corpora/CorporaTestHelpers.cs index 2677683b..eb4e37af 100644 --- a/tests/SIL.Machine.Tests/Corpora/CorporaTestHelpers.cs +++ b/tests/SIL.Machine.Tests/Corpora/CorporaTestHelpers.cs @@ -17,7 +17,6 @@ internal static class CorporaTestHelpers public static readonly string UsfmTestProjectPath = Path.Combine(TestDataPath, "usfm", "Tes"); public static readonly string UsfmTargetProjectPath = Path.Combine(TestDataPath, "usfm", "target"); public static readonly string UsfmTargetProjectZipPath = Path.Combine(TestDataPath, "project", "target"); - public static readonly string UsfmTargetCustomVrsPath = Path.Combine(TestDataPath, "usfm", "target", "custom.vrs"); public static readonly string UsfmSourceProjectPath = Path.Combine(TestDataPath, "usfm", "source"); public static readonly string UsfmSourceProjectZipPath = Path.Combine(TestDataPath, "project", "source"); public static readonly string UsxTestProjectPath = Path.Combine(TestDataPath, "usx", "Tes"); diff --git a/tests/SIL.Machine.Tests/Corpora/PlaceMarkersUsfmUpdateBlockHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/PlaceMarkersUsfmUpdateBlockHandlerTests.cs index 0fa163e7..c9130a8f 100644 --- a/tests/SIL.Machine.Tests/Corpora/PlaceMarkersUsfmUpdateBlockHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/PlaceMarkersUsfmUpdateBlockHandlerTests.cs @@ -13,11 +13,11 @@ public class PlaceMarkersUsfmUpdateBlockHandlerTests public void UpdateUsfm_ParagraphMarkers() { string source = "This is the first paragraph. This text is in English, and this test is for paragraph markers."; - string pretranslation = + string updateRows = "Este es el primer párrafo. Este texto está en inglés y esta prueba es para marcadores de párrafo."; PlaceMarkersAlignmentInfo alignInfo = new PlaceMarkersAlignmentInfo( sourceTokens: Tokenizer.Tokenize(source).ToList(), - translationTokens: Tokenizer.Tokenize(pretranslation).ToList(), + translationTokens: Tokenizer.Tokenize(updateRows).ToList(), alignment: ToWordAlignmentMatrix( "0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 9-9 10-10 12-11 13-12 14-13 15-14 16-15 17-18 18-16 19-19" ), @@ -28,7 +28,7 @@ public void UpdateUsfm_ParagraphMarkers() [ new UpdateUsfmRow( ScrRef("MAT 1:1"), - pretranslation, + updateRows, new Dictionary { { "alignment_info", alignInfo } } ) ]; @@ -62,11 +62,11 @@ public void UpdateUsfm_ParagraphMarkers() public void UpdateUsfm_StyleMarkers() { string source = "This is the first sentence. This text is in English, and this test is for style markers."; - string pretranslation = + string updateRows = "Esta es la primera oración. Este texto está en inglés y esta prueba es para marcadores de estilo."; PlaceMarkersAlignmentInfo alignInfo = new PlaceMarkersAlignmentInfo( sourceTokens: Tokenizer.Tokenize(source).ToList(), - translationTokens: Tokenizer.Tokenize(pretranslation).ToList(), + translationTokens: Tokenizer.Tokenize(updateRows).ToList(), alignment: ToWordAlignmentMatrix( "0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 9-9 10-10 12-11 13-12 14-13 15-14 16-15 17-18 18-16 19-19" ), @@ -77,7 +77,7 @@ public void UpdateUsfm_StyleMarkers() [ new UpdateUsfmRow( ScrRef("MAT 1:1"), - pretranslation, + updateRows, new Dictionary { { "alignment_info", alignInfo } } ) ]; @@ -104,7 +104,7 @@ public void UpdateUsfm_StyleMarkers() alignInfo = new PlaceMarkersAlignmentInfo( sourceTokens: Tokenizer.Tokenize(source).ToList(), - translationTokens: Tokenizer.Tokenize(pretranslation).ToList(), + translationTokens: Tokenizer.Tokenize(updateRows).ToList(), alignment: ToWordAlignmentMatrix( "0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 9-9 10-10 12-11 13-12 14-13 15-14 16-15 17-18 18-16 19-19" ), @@ -115,7 +115,7 @@ public void UpdateUsfm_StyleMarkers() [ new UpdateUsfmRow( ScrRef("MAT 1:1"), - pretranslation, + updateRows, new Dictionary { { "alignment_info", alignInfo } } ) ]; diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index 6255fd22..98d67f4e 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -642,15 +642,15 @@ public void GetUsfm_Verse_LastSegment() } [Test] - public void GetUsfm_Verse_PretranslationsBeforeText() + public void GetUsfm_Verse_UpdateRowsBeforeText() { var rows = new List { - new UpdateUsfmRow(ScrRef("GEN 1:1"), "Pretranslations before the start"), - new UpdateUsfmRow(ScrRef("GEN 1:2"), "Pretranslations before the start"), - new UpdateUsfmRow(ScrRef("GEN 1:3"), "Pretranslations before the start"), - new UpdateUsfmRow(ScrRef("GEN 1:4"), "Pretranslations before the start"), - new UpdateUsfmRow(ScrRef("GEN 1:5"), "Pretranslations before the start"), + new UpdateUsfmRow(ScrRef("GEN 1:1"), "Update rows before the start"), + new UpdateUsfmRow(ScrRef("GEN 1:2"), "Update rows before the start"), + new UpdateUsfmRow(ScrRef("GEN 1:3"), "Update rows before the start"), + new UpdateUsfmRow(ScrRef("GEN 1:4"), "Update rows before the start"), + new UpdateUsfmRow(ScrRef("GEN 1:5"), "Update rows before the start"), new UpdateUsfmRow(ScrRef("MAT 1:0/3:ip"), "The introductory paragraph.") };