Skip to content

Commit 0f18e01

Browse files
authored
Use new usfm-updater and quotation denormalization features (#777)
1 parent db2ac0a commit 0f18e01

File tree

8 files changed

+36
-33
lines changed

8 files changed

+36
-33
lines changed

src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@
3838
<PackageReference Include="Hangfire.Mongo" Version="1.11.6" />
3939
<PackageReference Include="Microsoft.AspNetCore.Mvc.NewtonsoftJson" Version="8.0.8" />
4040
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.8" />
41-
<PackageReference Include="SIL.Machine" Version="3.7.5" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine\SIL.Machine.csproj')" />
42-
<PackageReference Include="SIL.Machine.Morphology.HermitCrab" Version="3.7.5" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine.Morphology.HermitCrab\SIL.Machine.Morphology.HermitCrab.csproj')" />
43-
<PackageReference Include="SIL.Machine.Translation.Thot" Version="3.7.5" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine.Translation.Thot\SIL.Machine.Translation.Thot.csproj')" />
41+
<PackageReference Include="SIL.Machine" Version="3.7.6" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine\SIL.Machine.csproj')" />
42+
<PackageReference Include="SIL.Machine.Morphology.HermitCrab" Version="3.7.6" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine.Morphology.HermitCrab\SIL.Machine.Morphology.HermitCrab.csproj')" />
43+
<PackageReference Include="SIL.Machine.Translation.Thot" Version="3.7.6" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine.Translation.Thot\SIL.Machine.Translation.Thot.csproj')" />
4444
<PackageReference Include="SIL.WritingSystems" Version="14.1.1" />
4545
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
4646
<PackageReference Include="YamlDotNet" Version="11.2.1" />

src/Machine/src/Serval.Machine.Shared/Services/NmtPreprocessBuildJob.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ CancellationToken cancellationToken
4343
string targetQuotationConventionName = targetQuotationConvention?.BestQuoteConvention.Name ?? string.Empty;
4444
if (
4545
!string.IsNullOrWhiteSpace(sourceQuotationConventionName)
46-
|| !string.IsNullOrWhiteSpace(sourceQuotationConventionName)
46+
|| !string.IsNullOrWhiteSpace(targetQuotationConventionName)
4747
)
4848
{
4949
parallelCorpusAnalysis.Add(

src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ public async Task RunAsync_EnableKeyTerms()
136136
Assert.That(src1Count, Is.EqualTo(14));
137137
Assert.That(src2Count, Is.EqualTo(0));
138138
Assert.That(trgCount, Is.EqualTo(1));
139-
Assert.That(termCount, Is.EqualTo(3642));
139+
Assert.That(termCount, Is.EqualTo(3652));
140140
});
141141
}
142142

src/Serval/src/Serval.Client/Client.g.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2409,7 +2409,7 @@ public partial interface ITranslationEnginesClient
24092409
/// <br/>
24102410
/// <br/>Preserving or stripping different types of USFM markers can be controlled by the `paragraph-marker-behavior`, `embed-behavior`, and `style-marker-behavior` parameters.
24112411
/// <br/>* `PushToEnd`: The USFM markers (or the entire embed) are preserved and placed at the end of the verse. **This is the default for paragraph markers and embeds**.
2412-
/// <br/>* `TryToPlace`: The USFM markers (or the entire embed) are placed in approximately the right location within the verse. **This option is only available for paragraph markers. Quality of placement may differ from language to language.**.
2412+
/// <br/>* `TryToPlace`: The USFM markers (or the entire embed) are placed in approximately the right location within the verse. **This option is only available for paragraph markers. Quality of placement may differ from language to language. Only works when `template` is set to `Source`**.
24132413
/// <br/>* `Strip`: The USFM markers (or the entire embed) are removed. **This is the default for style markers**.
24142414
/// <br/>
24152415
/// <br/>Quote normalization behavior is controlled by the `quote-normalization-behavior` parameter options:
@@ -4695,7 +4695,7 @@ public string BaseUrl
46954695
/// <br/>
46964696
/// <br/>Preserving or stripping different types of USFM markers can be controlled by the `paragraph-marker-behavior`, `embed-behavior`, and `style-marker-behavior` parameters.
46974697
/// <br/>* `PushToEnd`: The USFM markers (or the entire embed) are preserved and placed at the end of the verse. **This is the default for paragraph markers and embeds**.
4698-
/// <br/>* `TryToPlace`: The USFM markers (or the entire embed) are placed in approximately the right location within the verse. **This option is only available for paragraph markers. Quality of placement may differ from language to language.**.
4698+
/// <br/>* `TryToPlace`: The USFM markers (or the entire embed) are placed in approximately the right location within the verse. **This option is only available for paragraph markers. Quality of placement may differ from language to language. Only works when `template` is set to `Source`**.
46994699
/// <br/>* `Strip`: The USFM markers (or the entire embed) are removed. **This is the default for style markers**.
47004700
/// <br/>
47014701
/// <br/>Quote normalization behavior is controlled by the `quote-normalization-behavior` parameter options:

src/Serval/src/Serval.Shared/Serval.Shared.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
<PackageReference Include="Grpc.Core.Api" Version="2.65.0" />
2020
<PackageReference Include="Grpc.HealthCheck" Version="2.65.0" />
2121
<PackageReference Include="Grpc.Net.ClientFactory" Version="2.65.0" />
22-
<PackageReference Include="SIL.Machine" Version="3.7.5" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine\SIL.Machine.csproj')" />
22+
<PackageReference Include="SIL.Machine" Version="3.7.6" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine\SIL.Machine.csproj')" />
2323
<PackageReference Include="Microsoft.FeatureManagement.AspNetCore" Version="3.5.0" />
2424
</ItemGroup>
2525

src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,7 @@ CancellationToken cancellationToken
736736
///
737737
/// Preserving or stripping different types of USFM markers can be controlled by the `paragraph-marker-behavior`, `embed-behavior`, and `style-marker-behavior` parameters.
738738
/// * `PushToEnd`: The USFM markers (or the entire embed) are preserved and placed at the end of the verse. **This is the default for paragraph markers and embeds**.
739-
/// * `TryToPlace`: The USFM markers (or the entire embed) are placed in approximately the right location within the verse. **This option is only available for paragraph markers. Quality of placement may differ from language to language.**.
739+
/// * `TryToPlace`: The USFM markers (or the entire embed) are placed in approximately the right location within the verse. **This option is only available for paragraph markers. Quality of placement may differ from language to language. Only works when `template` is set to `Source`**.
740740
/// * `Strip`: The USFM markers (or the entire embed) are removed. **This is the default for style markers**.
741741
///
742742
/// Quote normalization behavior is controlled by the `quote-normalization-behavior` parameter options:

src/Serval/src/Serval.Translation/Services/PretranslationService.cs

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -148,11 +148,13 @@ PretranslationUsfmMarkerBehavior StyleBehavior
148148
.OrderBy(p => p.ScriptureRefs[0]);
149149

150150
List<IUsfmUpdateBlockHandler> updateBlockHandlers = [];
151-
if (paragraphMarkerBehavior == PretranslationUsfmMarkerBehavior.PreservePosition)
152-
updateBlockHandlers.Add(new PlaceMarkersUsfmUpdateBlockHandler());
153-
154-
if (paragraphMarkerBehavior == PretranslationUsfmMarkerBehavior.PreservePosition)
151+
if (
152+
paragraphMarkerBehavior == PretranslationUsfmMarkerBehavior.PreservePosition
153+
&& template == PretranslationUsfmTemplate.Source
154+
)
155+
{
155156
updateBlockHandlers.Add(new PlaceMarkersUsfmUpdateBlockHandler());
157+
}
156158

157159
string usfm = "";
158160
// Update the target book if it exists
@@ -183,7 +185,9 @@ PretranslationUsfmMarkerBehavior StyleBehavior
183185
embedBehavior: Map(embedBehavior),
184186
styleBehavior: Map(styleMarkerBehavior),
185187
updateBlockHandlers: updateBlockHandlers,
186-
remarks: remarks
188+
remarks: remarks,
189+
errorHandler: (_) => true,
190+
compareSegments: false
187191
) ?? "";
188192
break;
189193
case PretranslationUsfmTextOrigin.PreferPretranslated:
@@ -197,7 +201,9 @@ PretranslationUsfmMarkerBehavior StyleBehavior
197201
embedBehavior: Map(embedBehavior),
198202
styleBehavior: Map(styleMarkerBehavior),
199203
updateBlockHandlers: updateBlockHandlers,
200-
remarks: remarks
204+
remarks: remarks,
205+
errorHandler: (_) => true,
206+
compareSegments: false
201207
) ?? "";
202208
break;
203209
case PretranslationUsfmTextOrigin.OnlyExisting:
@@ -211,7 +217,9 @@ PretranslationUsfmMarkerBehavior StyleBehavior
211217
embedBehavior: Map(embedBehavior),
212218
styleBehavior: Map(styleMarkerBehavior),
213219
updateBlockHandlers: updateBlockHandlers,
214-
remarks: remarks
220+
remarks: remarks,
221+
errorHandler: (_) => true,
222+
compareSegments: false
215223
) ?? "";
216224
break;
217225
case PretranslationUsfmTextOrigin.OnlyPretranslated:
@@ -225,7 +233,9 @@ PretranslationUsfmMarkerBehavior StyleBehavior
225233
embedBehavior: Map(embedBehavior),
226234
styleBehavior: Map(styleMarkerBehavior),
227235
updateBlockHandlers: updateBlockHandlers,
228-
remarks: remarks
236+
remarks: remarks,
237+
errorHandler: (_) => true,
238+
compareSegments: false
229239
) ?? "";
230240
break;
231241
}
@@ -255,7 +265,9 @@ PretranslationUsfmMarkerBehavior StyleBehavior
255265
embedBehavior: Map(embedBehavior),
256266
styleBehavior: Map(styleMarkerBehavior),
257267
updateBlockHandlers: updateBlockHandlers,
258-
remarks: remarks
268+
remarks: remarks,
269+
errorHandler: (_) => true,
270+
compareSegments: true
259271
) ?? "";
260272
break;
261273
case PretranslationUsfmTextOrigin.OnlyExisting:
@@ -269,7 +281,9 @@ PretranslationUsfmMarkerBehavior StyleBehavior
269281
embedBehavior: Map(embedBehavior),
270282
styleBehavior: Map(styleMarkerBehavior),
271283
updateBlockHandlers: updateBlockHandlers,
272-
remarks: remarks
284+
remarks: remarks,
285+
errorHandler: (_) => true,
286+
compareSegments: true
273287
) ?? "";
274288
break;
275289
}
@@ -289,31 +303,20 @@ PretranslationUsfmMarkerBehavior StyleBehavior
289303

290304
private static string DenormalizeQuotationMarks(string usfm, ParallelCorpusAnalysis analysis)
291305
{
292-
QuoteConvention sourceQuoteConvention = QuoteConventions.Standard.GetQuoteConventionByName(
293-
analysis.SourceQuoteConvention
294-
);
295-
if (sourceQuoteConvention is null)
296-
return usfm;
297-
298306
QuoteConvention targetQuoteConvention = QuoteConventions.Standard.GetQuoteConventionByName(
299307
analysis.TargetQuoteConvention
300308
);
301309
if (targetQuoteConvention is null)
302310
return usfm;
303311

304-
QuotationMarkDenormalizationFirstPass quotationMarkDenormalizationFirstPass =
305-
new(sourceQuoteConvention, targetQuoteConvention);
312+
QuotationMarkDenormalizationFirstPass quotationMarkDenormalizationFirstPass = new(targetQuoteConvention);
306313

307314
UsfmParser.Parse(usfm, quotationMarkDenormalizationFirstPass);
308315
List<QuotationMarkUpdateStrategy> bestChapterStrategies =
309316
quotationMarkDenormalizationFirstPass.FindBestChapterStrategies();
310317

311318
QuotationMarkDenormalizationUsfmUpdateBlockHandler quotationMarkDenormalizer =
312-
new(
313-
sourceQuoteConvention,
314-
targetQuoteConvention,
315-
new QuotationMarkUpdateSettings(chapterStrategies: bestChapterStrategies)
316-
);
319+
new(targetQuoteConvention, new QuotationMarkUpdateSettings(chapterStrategies: bestChapterStrategies));
317320
List<string> remarks = [];
318321
if (bestChapterStrategies.Any(s => s != QuotationMarkUpdateStrategy.Skip))
319322
{

src/ServiceToolkit/src/SIL.ServiceToolkit/SIL.ServiceToolkit.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
<PackageReference Include="SIL.WritingSystems" Version="14.1.1" />
1919
<PackageReference Include="System.Text.RegularExpressions" Version="4.3.1" />
2020
<PackageReference Include="SIL.Scripture" Version="12.0.1" />
21-
<PackageReference Include="SIL.Machine" Version="3.7.5" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine\SIL.Machine.csproj')" />
21+
<PackageReference Include="SIL.Machine" Version="3.7.6" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine\SIL.Machine.csproj')" />
2222
</ItemGroup>
2323

2424
<ItemGroup>

0 commit comments

Comments
 (0)